diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc
index a9cf01467f6..451dacf904e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string.cc
@@ -50,6 +50,11 @@ static inline uint8_t ascii_toupper(uint8_t utf8_code_unit) {
                                                               : utf8_code_unit;
 }
 
+template <typename T>
+static inline bool IsAsciiCharacter(T character) {
+  return character < 128;
+}
+
 // TODO: optional ascii validation
 
 struct AsciiLength {
@@ -66,6 +71,7 @@ constexpr uint32_t kMaxCodepointLookup =
     0xffff;  // up to this codepoint is in a lookup table
 std::vector<uint32_t> lut_upper_codepoint;
 std::vector<uint32_t> lut_lower_codepoint;
+std::vector<utf8proc_category_t> lut_category;
 std::once_flag flag_case_luts;
 
 void EnsureLookupTablesFilled() {
@@ -75,6 +81,7 @@ void EnsureLookupTablesFilled() {
     for (uint32_t i = 0; i <= kMaxCodepointLookup; i++) {
       lut_upper_codepoint.push_back(utf8proc_toupper(i));
       lut_lower_codepoint.push_back(utf8proc_tolower(i));
+      lut_category.push_back(utf8proc_category(i));
     }
   });
 }
@@ -97,8 +104,8 @@ struct UTF8Transform {
   }
 
   static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    EnsureLookupTablesFilled();
     if (batch[0].kind() == Datum::ARRAY) {
-      EnsureLookupTablesFilled();
       const ArrayData& input = *batch[0].array();
       ArrayType input_boxed(batch[0].array());
       ArrayData* output = out->mutable_array();
@@ -199,6 +206,10 @@ struct UTF8Lower : UTF8Transform<Type, UTF8Lower<Type>> {
   }
 };
 
+#else
+
+void EnsureLookupTablesFilled() {}
+
 #endif  // ARROW_WITH_UTF8PROC
 
 using TransformFunc = std::function<void(const uint8_t*, int64_t, uint8_t*)>;
@@ -322,9 +333,7 @@ void StringBoolTransform(KernelContext* ctx, const ExecBatch& batch,
   } else {
     const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar());
     if (input.is_valid) {
-      auto result = checked_pointer_cast<BooleanScalar>(MakeNullScalar(out->type()));
       uint8_t result_value = 0;
-      result->is_valid = true;
       std::array<offset_type, 2> offsets{0,
                                          static_cast<offset_type>(input.value->size())};
       transform(offsets.data(), input.value->data(), 1, /*output_offset=*/0,
@@ -409,6 +418,413 @@ void AddBinaryContainsExact(FunctionRegistry* registry) {
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
+// IsAlpha/Digit etc
+
+#ifdef ARROW_WITH_UTF8PROC
+
+static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint, uint32_t mask) {
+  utf8proc_category_t general_category = codepoint <= kMaxCodepointLookup
+                                             ? lut_category[codepoint]
+                                             : utf8proc_category(codepoint);
+  uint32_t general_category_bit = 1 << general_category;
+  // for e.g. undefined (but valid) codepoints, general_category == 0 ==
+  // UTF8PROC_CATEGORY_CN
+  return (general_category != UTF8PROC_CATEGORY_CN) &&
+         ((general_category_bit & mask) != 0);
+}
+
+template <typename... Categories>
+static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint, uint32_t mask,
+                                                utf8proc_category_t category,
+                                                Categories... categories) {
+  return HasAnyUnicodeGeneralCategory(codepoint, mask | (1 << category), categories...);
+}
+
+template <typename... Categories>
+static inline bool HasAnyUnicodeGeneralCategory(uint32_t codepoint,
+                                                utf8proc_category_t category,
+                                                Categories... categories) {
+  return HasAnyUnicodeGeneralCategory(codepoint, static_cast<uint32_t>(1u << category),
+                                      categories...);
+}
+
+static inline bool IsCasedCharacterUnicode(uint32_t codepoint) {
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU,
+                                      UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT) ||
+         ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) != codepoint) ||
+          (static_cast<uint32_t>(utf8proc_tolower(codepoint)) != codepoint));
+}
+
+static inline bool IsLowerCaseCharacterUnicode(uint32_t codepoint) {
+  // although this trick seems to work for upper case, this is not enough for lower case
+  // testing, see https://github.com/JuliaStrings/utf8proc/issues/195 . But currently the
+  // best we can do
+  return (HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LL) ||
+          ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) != codepoint) &&
+           (static_cast<uint32_t>(utf8proc_tolower(codepoint)) == codepoint))) &&
+         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LT);
+}
+
+static inline bool IsUpperCaseCharacterUnicode(uint32_t codepoint) {
+  // this seems to be a good workaround for utf8proc not having case information
+  // https://github.com/JuliaStrings/utf8proc/issues/195
+  return (HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU) ||
+          ((static_cast<uint32_t>(utf8proc_toupper(codepoint)) == codepoint) &&
+           (static_cast<uint32_t>(utf8proc_tolower(codepoint)) != codepoint))) &&
+         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LT);
+}
+
+static inline bool IsAlphaNumericCharacterUnicode(uint32_t codepoint) {
+  return HasAnyUnicodeGeneralCategory(
+      codepoint, UTF8PROC_CATEGORY_LU, UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT,
+      UTF8PROC_CATEGORY_LM, UTF8PROC_CATEGORY_LO, UTF8PROC_CATEGORY_ND,
+      UTF8PROC_CATEGORY_NL, UTF8PROC_CATEGORY_NO);
+}
+
+static inline bool IsAlphaCharacterUnicode(uint32_t codepoint) {
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_LU,
+                                      UTF8PROC_CATEGORY_LL, UTF8PROC_CATEGORY_LT,
+                                      UTF8PROC_CATEGORY_LM, UTF8PROC_CATEGORY_LO);
+}
+
+static inline bool IsDecimalCharacterUnicode(uint32_t codepoint) {
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND);
+}
+
+static inline bool IsDigitCharacterUnicode(uint32_t codepoint) {
+  // Python defines this as Numeric_Type=Digit or Numeric_Type=Decimal.
+  // utf8proc has no support for this, this is the best we can do:
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND);
+}
+
+static inline bool IsNumericCharacterUnicode(uint32_t codepoint) {
+  // Formally this is not correct, but utf8proc does not allow us to query for Numerical
+  // properties, e.g. Numeric_Value and Numeric_Type
+  // Python defines Numeric as Numeric_Type=Digit, Numeric_Type=Decimal or
+  // Numeric_Type=Numeric.
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ND,
+                                      UTF8PROC_CATEGORY_NL, UTF8PROC_CATEGORY_NO);
+}
+
+static inline bool IsSpaceCharacterUnicode(uint32_t codepoint) {
+  auto property = utf8proc_get_property(codepoint);
+  return HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_ZS) ||
+         property->bidi_class == UTF8PROC_BIDI_CLASS_WS ||
+         property->bidi_class == UTF8PROC_BIDI_CLASS_B ||
+         property->bidi_class == UTF8PROC_BIDI_CLASS_S;
+}
+
+static inline bool IsPrintableCharacterUnicode(uint32_t codepoint) {
+  uint32_t general_category = utf8proc_category(codepoint);
+  return (general_category != UTF8PROC_CATEGORY_CN) &&
+         !HasAnyUnicodeGeneralCategory(codepoint, UTF8PROC_CATEGORY_CC,
+                                       UTF8PROC_CATEGORY_CF, UTF8PROC_CATEGORY_CS,
+                                       UTF8PROC_CATEGORY_CO, UTF8PROC_CATEGORY_ZS,
+                                       UTF8PROC_CATEGORY_ZL, UTF8PROC_CATEGORY_ZP);
+}
+
+#endif
+
+static inline bool IsLowerCaseCharacterAscii(uint8_t ascii_character) {
+  return (ascii_character >= 'a') && (ascii_character <= 'z');
+}
+
+static inline bool IsUpperCaseCharacterAscii(uint8_t ascii_character) {
+  return (ascii_character >= 'A') && (ascii_character <= 'Z');
+}
+
+static inline bool IsCasedCharacterAscii(uint8_t ascii_character) {
+  return IsLowerCaseCharacterAscii(ascii_character) ||
+         IsUpperCaseCharacterAscii(ascii_character);
+}
+
+static inline bool IsAlphaCharacterAscii(uint8_t ascii_character) {
+  return IsCasedCharacterAscii(ascii_character);  // same
+}
+
+static inline bool IsAlphaNumericCharacterAscii(uint8_t ascii_character) {
+  return ((ascii_character >= '0') && (ascii_character <= '9')) ||
+         ((ascii_character >= 'a') && (ascii_character <= 'z')) ||
+         ((ascii_character >= 'A') && (ascii_character <= 'Z'));
+}
+
+static inline bool IsDecimalCharacterAscii(uint8_t ascii_character) {
+  return ((ascii_character >= '0') && (ascii_character <= '9'));
+}
+
+static inline bool IsSpaceCharacterAscii(uint8_t ascii_character) {
+  return ((ascii_character >= 0x09) && (ascii_character <= 0x0D)) ||
+         (ascii_character == ' ');
+}
+
+static inline bool IsPrintableCharacterAscii(uint8_t ascii_character) {
+  return ((ascii_character >= ' ') && (ascii_character <= '~'));
+}
+
+template <typename Derived, bool allow_empty = false>
+struct CharacterPredicateUnicode {
+  static bool Call(KernelContext* ctx, const uint8_t* input,
+                   size_t input_string_ncodeunits) {
+    if (allow_empty && input_string_ncodeunits == 0) {
+      return true;
+    }
+    bool all;
+    bool any = false;
+    if (!ARROW_PREDICT_TRUE(arrow::util::UTF8AllOf(
+            input, input + input_string_ncodeunits, &all, [&any](uint32_t codepoint) {
+              any |= Derived::PredicateCharacterAny(codepoint);
+              return Derived::PredicateCharacterAll(codepoint);
+            }))) {
+      ctx->SetStatus(Status::Invalid("Invalid UTF8 sequence in input"));
+      return false;
+    }
+    return all & any;
+  }
+
+  static inline bool PredicateCharacterAny(uint32_t) {
+    return true;  // default condition make sure there is at least 1 charachter
+  }
+};
+
+template <typename Derived, bool allow_empty = false>
+struct CharacterPredicateAscii {
+  static bool Call(KernelContext* ctx, const uint8_t* input,
+                   size_t input_string_ncodeunits) {
+    if (allow_empty && input_string_ncodeunits == 0) {
+      return true;
+    }
+    bool any = false;
+    // MB: A simple for loops seems 8% faster on gcc 9.3, running the IsAlphaNumericAscii
+    // benchmark. I don't consider that worth it.
+    bool all = std::all_of(input, input + input_string_ncodeunits,
+                           [&any](uint8_t ascii_character) {
+                             any |= Derived::PredicateCharacterAny(ascii_character);
+                             return Derived::PredicateCharacterAll(ascii_character);
+                           });
+    return all & any;
+  }
+
+  static inline bool PredicateCharacterAny(uint8_t) {
+    return true;  // default condition make sure there is at least 1 charachter
+  }
+};
+
+#ifdef ARROW_WITH_UTF8PROC
+struct IsAlphaNumericUnicode : CharacterPredicateUnicode<IsAlphaNumericUnicode> {
+  static inline bool PredicateCharacterAll(uint32_t codepoint) {
+    return IsAlphaNumericCharacterUnicode(codepoint);
+  }
+};
+#endif
+
+struct IsAlphaNumericAscii : CharacterPredicateAscii<IsAlphaNumericAscii> {
+  static inline bool PredicateCharacterAll(uint8_t ascii_character) {
+    return IsAlphaNumericCharacterAscii(ascii_character);
+  }
+};
+
+#ifdef ARROW_WITH_UTF8PROC
+struct IsAlphaUnicode : CharacterPredicateUnicode<IsAlphaUnicode> {
+  static inline bool PredicateCharacterAll(uint32_t codepoint) {
+    return IsAlphaCharacterUnicode(codepoint);
+  }
+};
+#endif
+
+struct IsAlphaAscii : CharacterPredicateAscii<IsAlphaAscii> {
+  static inline bool PredicateCharacterAll(uint8_t ascii_character) {
+    return IsAlphaCharacterAscii(ascii_character);
+  }
+};
+
+#ifdef ARROW_WITH_UTF8PROC
+struct IsDecimalUnicode : CharacterPredicateUnicode<IsDecimalUnicode> {
+  static inline bool PredicateCharacterAll(uint32_t codepoint) {
+    return IsDecimalCharacterUnicode(codepoint);
+  }
+};
+#endif
+
+struct IsDecimalAscii : CharacterPredicateAscii<IsDecimalAscii> {
+  static inline bool PredicateCharacterAll(uint8_t ascii_character) {
+    return IsDecimalCharacterAscii(ascii_character);
+  }
+};
+
+#ifdef ARROW_WITH_UTF8PROC
+struct IsDigitUnicode : CharacterPredicateUnicode<IsDigitUnicode> {
+  static inline bool PredicateCharacterAll(uint32_t codepoint) {
+    return IsDigitCharacterUnicode(codepoint);
+  }
+};
+
+struct IsNumericUnicode : CharacterPredicateUnicode<IsNumericUnicode> {
+  static inline bool PredicateCharacterAll(uint32_t codepoint) {
+    return IsNumericCharacterUnicode(codepoint);
+  }
+};
+#endif
+
+struct IsAscii {
+  static bool Call(KernelContext* ctx, const uint8_t* input,
+                   size_t input_string_nascii_characters) {
+    return std::all_of(input, input + input_string_nascii_characters,
+                       IsAsciiCharacter<uint8_t>);
+  }
+};
+
+#ifdef ARROW_WITH_UTF8PROC
+struct IsLowerUnicode : CharacterPredicateUnicode<IsLowerUnicode> {
+  static inline bool PredicateCharacterAll(uint32_t codepoint) {
+    // Only for cased character it needs to be lower case
+    return !IsCasedCharacterUnicode(codepoint) || IsLowerCaseCharacterUnicode(codepoint);
+  }
+  static inline bool PredicateCharacterAny(uint32_t codepoint) {
+    return IsCasedCharacterUnicode(codepoint);  // at least 1 cased character
+  }
+};
+#endif
+
+struct IsLowerAscii : CharacterPredicateAscii<IsLowerAscii> {
+  static inline bool PredicateCharacterAll(uint8_t ascii_character) {
+    // Only for cased character it needs to be lower case
+    return !IsCasedCharacterAscii(ascii_character) ||
+           IsLowerCaseCharacterAscii(ascii_character);
+  }
+  static inline bool PredicateCharacterAny(uint8_t ascii_character) {
+    return IsCasedCharacterAscii(ascii_character);  // at least 1 cased character
+  }
+};
+
+#ifdef ARROW_WITH_UTF8PROC
+struct IsPrintableUnicode
+    : CharacterPredicateUnicode<IsPrintableUnicode, /*allow_empty=*/true> {
+  static inline bool PredicateCharacterAll(uint32_t codepoint) {
+    return codepoint == ' ' || IsPrintableCharacterUnicode(codepoint);
+  }
+};
+#endif
+
+struct IsPrintableAscii
+    : CharacterPredicateAscii<IsPrintableAscii, /*allow_empty=*/true> {
+  static inline bool PredicateCharacterAll(uint8_t ascii_character) {
+    return IsPrintableCharacterAscii(ascii_character);
+  }
+};
+
+#ifdef ARROW_WITH_UTF8PROC
+struct IsSpaceUnicode : CharacterPredicateUnicode<IsSpaceUnicode> {
+  static inline bool PredicateCharacterAll(uint32_t codepoint) {
+    return IsSpaceCharacterUnicode(codepoint);
+  }
+};
+#endif
+
+struct IsSpaceAscii : CharacterPredicateAscii<IsSpaceAscii> {
+  static inline bool PredicateCharacterAll(uint8_t ascii_character) {
+    return IsSpaceCharacterAscii(ascii_character);
+  }
+};
+
+#ifdef ARROW_WITH_UTF8PROC
+struct IsTitleUnicode {
+  static bool Call(KernelContext* ctx, const uint8_t* input,
+                   size_t input_string_ncodeunits) {
+    // rules:
+    // * 1: lower case follows cased
+    // * 2: upper case follows uncased
+    // * 3: at least 1 cased character (which logically should be upper/title)
+    bool rules_1_and_2;
+    bool previous_cased = false;  // in LL, LU or LT
+    bool rule_3 = false;
+    bool status =
+        arrow::util::UTF8AllOf(input, input + input_string_ncodeunits, &rules_1_and_2,
+                               [&previous_cased, &rule_3](uint32_t codepoint) {
+                                 if (IsLowerCaseCharacterUnicode(codepoint)) {
+                                   if (!previous_cased) return false;  // rule 1 broken
+                                   previous_cased = true;
+                                 } else if (IsCasedCharacterUnicode(codepoint)) {
+                                   if (previous_cased) return false;  // rule 2 broken
+                                   // next should be a lower case or uncased
+                                   previous_cased = true;
+                                   rule_3 = true;  // rule 3 obeyed
+                                 } else {
+                                   // a non-cased char, like _ or 1
+                                   // next should be upper case or more uncased
+                                   previous_cased = false;
+                                 }
+                                 return true;
+                               });
+    if (!ARROW_PREDICT_TRUE(status)) {
+      ctx->SetStatus(Status::Invalid("Invalid UTF8 sequence in input"));
+      return false;
+    }
+    return rules_1_and_2 & rule_3;
+  }
+};
+#endif
+
+struct IsTitleAscii {
+  static bool Call(KernelContext* ctx, const uint8_t* input,
+                   size_t input_string_ncodeunits) {
+    // rules:
+    // * 1: lower case follows cased
+    // * 2: upper case follows uncased
+    // * 3: at least 1 cased character (which logically should be upper/title)
+    bool rules_1_and_2 = true;
+    bool previous_cased = false;  // in LL, LU or LT
+    bool rule_3 = false;
+    // we cannot rely on std::all_of because we need guaranteed order
+    for (const uint8_t* c = input; c < input + input_string_ncodeunits; ++c) {
+      if (IsLowerCaseCharacterAscii(*c)) {
+        if (!previous_cased) {
+          // rule 1 broken
+          rules_1_and_2 = false;
+          break;
+        }
+        previous_cased = true;
+      } else if (IsCasedCharacterAscii(*c)) {
+        if (previous_cased) {
+          // rule 2 broken
+          rules_1_and_2 = false;
+          break;
+        }
+        // next should be a lower case or uncased
+        previous_cased = true;
+        rule_3 = true;  // rule 3 obeyed
+      } else {
+        // a non-cased char, like _ or 1
+        // next should be upper case or more uncased
+        previous_cased = false;
+      }
+    }
+    return rules_1_and_2 & rule_3;
+  }
+};
+
+#ifdef ARROW_WITH_UTF8PROC
+struct IsUpperUnicode : CharacterPredicateUnicode<IsUpperUnicode> {
+  static inline bool PredicateCharacterAll(uint32_t codepoint) {
+    // Only for cased character it needs to be lower case
+    return !IsCasedCharacterUnicode(codepoint) || IsUpperCaseCharacterUnicode(codepoint);
+  }
+  static inline bool PredicateCharacterAny(uint32_t codepoint) {
+    return IsCasedCharacterUnicode(codepoint);  // at least 1 cased character
+  }
+};
+#endif
+
+struct IsUpperAscii : CharacterPredicateAscii<IsUpperAscii> {
+  static inline bool PredicateCharacterAll(uint8_t ascii_character) {
+    // Only for cased character it needs to be lower case
+    return !IsCasedCharacterAscii(ascii_character) ||
+           IsUpperCaseCharacterAscii(ascii_character);
+  }
+  static inline bool PredicateCharacterAny(uint8_t ascii_character) {
+    return IsCasedCharacterAscii(ascii_character);  // at least 1 cased character
+  }
+};
+
 // ----------------------------------------------------------------------
 // strptime string parsing
 
@@ -479,15 +895,84 @@ void MakeUnaryStringUTF8TransformKernel(std::string name, FunctionRegistry* regi
 
 #endif
 
+using StringPredicate = std::function<bool(KernelContext*, const uint8_t*, size_t)>;
+
+template <typename Type>
+void ApplyPredicate(KernelContext* ctx, const ExecBatch& batch, StringPredicate predicate,
+                    Datum* out) {
+  EnsureLookupTablesFilled();
+  if (batch[0].kind() == Datum::ARRAY) {
+    const ArrayData& input = *batch[0].array();
+    ArrayIterator<Type> input_it(input);
+    ArrayData* out_arr = out->mutable_array();
+    ::arrow::internal::GenerateBitsUnrolled(
+        out_arr->buffers[1]->mutable_data(), out_arr->offset, input.length,
+        [&]() -> bool {
+          util::string_view val = input_it();
+          return predicate(ctx, reinterpret_cast<const uint8_t*>(val.data()), val.size());
+        });
+  } else {
+    const auto& input = checked_cast<const BaseBinaryScalar&>(*batch[0].scalar());
+    if (input.is_valid) {
+      bool boolean_result =
+          predicate(ctx, input.value->data(), static_cast<size_t>(input.value->size()));
+      if (!ctx->status().ok()) {
+        // UTF decoding can lead to issues
+        return;
+      }
+      out->value = std::make_shared<BooleanScalar>(boolean_result);
+    }
+  }
+}
+
+template <typename Predicate>
+void AddUnaryStringPredicate(std::string name, FunctionRegistry* registry) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary());
+  auto exec_32 = [](KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    ApplyPredicate<StringType>(ctx, batch, Predicate::Call, out);
+  };
+  auto exec_64 = [](KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    ApplyPredicate<LargeStringType>(ctx, batch, Predicate::Call, out);
+  };
+  DCHECK_OK(func->AddKernel({utf8()}, boolean(), std::move(exec_32)));
+  DCHECK_OK(func->AddKernel({large_utf8()}, boolean(), std::move(exec_64)));
+  DCHECK_OK(registry->AddFunction(std::move(func)));
+}
+
 }  // namespace
 
 void RegisterScalarStringAscii(FunctionRegistry* registry) {
   MakeUnaryStringBatchKernel<AsciiUpper>("ascii_upper", registry);
   MakeUnaryStringBatchKernel<AsciiLower>("ascii_lower", registry);
+
+  AddUnaryStringPredicate<IsAscii>("binary_isascii", registry);
+
+  AddUnaryStringPredicate<IsAlphaNumericAscii>("ascii_isalnum", registry);
+  AddUnaryStringPredicate<IsAlphaAscii>("ascii_isalpha", registry);
+  AddUnaryStringPredicate<IsDecimalAscii>("ascii_isdecimal", registry);
+  // no isdigic for ascii, since it is the same as isdecimal
+  AddUnaryStringPredicate<IsLowerAscii>("ascii_islower", registry);
+  // no isnumeric for ascii, since it is the same as isdecimal
+  AddUnaryStringPredicate<IsPrintableAscii>("ascii_isprintable", registry);
+  AddUnaryStringPredicate<IsSpaceAscii>("ascii_isspace", registry);
+  AddUnaryStringPredicate<IsTitleAscii>("ascii_istitle", registry);
+  AddUnaryStringPredicate<IsUpperAscii>("ascii_isupper", registry);
 #ifdef ARROW_WITH_UTF8PROC
   MakeUnaryStringUTF8TransformKernel<UTF8Upper>("utf8_upper", registry);
   MakeUnaryStringUTF8TransformKernel<UTF8Lower>("utf8_lower", registry);
+  AddUnaryStringPredicate<IsAlphaNumericUnicode>("utf8_isalnum", registry);
+  AddUnaryStringPredicate<IsAlphaUnicode>("utf8_isalpha", registry);
+  AddUnaryStringPredicate<IsDecimalUnicode>("utf8_isdecimal", registry);
+  AddUnaryStringPredicate<IsDigitUnicode>("utf8_isdigit", registry);
+  AddUnaryStringPredicate<IsLowerUnicode>("utf8_islower", registry);
+  AddUnaryStringPredicate<IsNumericUnicode>("utf8_isnumeric", registry);
+  AddUnaryStringPredicate<IsPrintableUnicode>("utf8_isprintable", registry);
+  AddUnaryStringPredicate<IsSpaceUnicode>("utf8_isspace", registry);
+  AddUnaryStringPredicate<IsTitleUnicode>("utf8_istitle", registry);
+  AddUnaryStringPredicate<IsUpperUnicode>("utf8_isupper", registry);
+
 #endif
+
   AddAsciiLength(registry);
   AddBinaryContainsExact(registry);
   AddStrptime(registry);
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
index 46ee129b03c..01a32c71f34 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_benchmark.cc
@@ -57,6 +57,10 @@ static void AsciiUpper(benchmark::State& state) {
   UnaryStringBenchmark(state, "ascii_upper");
 }
 
+static void IsAlphaNumericAscii(benchmark::State& state) {
+  UnaryStringBenchmark(state, "ascii_isalnum");
+}
+
 static void BinaryContainsExact(benchmark::State& state) {
   BinaryContainsExactOptions options("abac");
   UnaryStringBenchmark(state, "binary_contains_exact", &options);
@@ -70,14 +74,20 @@ static void Utf8Upper(benchmark::State& state) {
 static void Utf8Lower(benchmark::State& state) {
   UnaryStringBenchmark(state, "utf8_lower");
 }
+
+static void IsAlphaNumericUnicode(benchmark::State& state) {
+  UnaryStringBenchmark(state, "utf8_isalnum");
+}
 #endif
 
 BENCHMARK(AsciiLower);
 BENCHMARK(AsciiUpper);
+BENCHMARK(IsAlphaNumericAscii);
 BENCHMARK(BinaryContainsExact);
 #ifdef ARROW_WITH_UTF8PROC
 BENCHMARK(Utf8Lower);
 BENCHMARK(Utf8Upper);
+BENCHMARK(IsAlphaNumericUnicode);
 #endif
 
 }  // namespace compute
diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
index 0989401d034..88a0258ee5f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc
@@ -145,8 +145,172 @@ TYPED_TEST(TestStringKernels, Utf8Lower) {
                                   CallFunction("utf8_lower", {invalid_input}));
 }
 
+TYPED_TEST(TestStringKernels, IsAlphaNumericUnicode) {
+  // U+08BE (utf8: 	\xE0\xA2\xBE) is undefined, but utf8proc things it is
+  // UTF8PROC_CATEGORY_LO
+  this->CheckUnary("utf8_isalnum", "[\"ⱭɽⱤoW123\", null, \"Ɑ2\", \"!\", \"\"]", boolean(),
+                   "[true, null, true, false, false]");
+}
+
+TYPED_TEST(TestStringKernels, IsAlphaUnicode) {
+  // U+08BE (utf8: 	\xE0\xA2\xBE) is undefined, but utf8proc things it is
+  // UTF8PROC_CATEGORY_LO
+  this->CheckUnary("utf8_isalpha", "[\"ⱭɽⱤoW\", null, \"Ɑ2\", \"!\", \"\"]", boolean(),
+                   "[true, null, false, false, false]");
+}
+
+TYPED_TEST(TestStringKernels, IsAscii) {
+  this->CheckUnary("binary_isascii", "[\"azAZ~\", null, \"Ɑ\", \"\"]", boolean(),
+                   "[true, null, false, true]");
+}
+
+TYPED_TEST(TestStringKernels, IsDecimalUnicode) {
+  // ٣ is arabic 3 (decimal), Ⅳ roman (non-decimal)
+  this->CheckUnary("utf8_isdecimal", "[\"12\", null, \"٣\", \"Ⅳ\", \"1a\", \"\"]",
+                   boolean(), "[true, null, true, false, false, false]");
+}
+
+TYPED_TEST(TestStringKernels, IsDigitUnicode) {
+  // These are digits according to Python, but we don't have the information in
+  // utf8proc for this
+  // this->CheckUnary("utf8_isdigit", "[\"²\", \"①\"]", boolean(), "[true,
+  // true]");
+}
+
+TYPED_TEST(TestStringKernels, IsNumericUnicode) {
+  // ٣ is arabic 3 (decimal), Ⅳ roman (non-decimal)
+  this->CheckUnary("utf8_isnumeric", "[\"12\", null, \"٣\", \"Ⅳ\", \"1a\", \"\"]",
+                   boolean(), "[true, null, true, true, false, false]");
+  // These are numerical according to Python, but we don't have the information in
+  // utf8proc for this
+  // this->CheckUnary("utf8_isnumeric", "[\"㐅\", \"卌\"]", boolean(),
+  //                  "[true, null, true, true, false, false]");
+}
+
+TYPED_TEST(TestStringKernels, IsLowerUnicode) {
+  // ٣ is arabic 3 (decimal), Φ capital
+  this->CheckUnary("utf8_islower",
+                   "[\"12\", null, \"٣a\", \"٣A\", \"1a\", \"Φ\", \"\", \"with space\", "
+                   "\"With space\"]",
+                   boolean(),
+                   "[false, null, true, false, true, false, false, true, false]");
+  // lower case character utf8proc does not know about
+  // this->CheckUnary("utf8_islower", "[\"ª\", \"ₕ\"]", boolean(), "[true,
+  // true]");
+}
+
+TYPED_TEST(TestStringKernels, IsPrintableUnicode) {
+  // U+2008 (utf8: \xe2\x80\x88) is punctuaction space, it is NOT printable
+  // U+0378 (utf8: \xCD\xB8) is an undefined char, it has no category
+  this->CheckUnary(
+      "utf8_isprintable",
+      "[\" 123azAZ!~\", null, \"\xe2\x80\x88\", \"\", \"\\r\", \"\xCD\xB8\"]", boolean(),
+      "[true, null, false, true, false, false]");
+}
+
+TYPED_TEST(TestStringKernels, IsSpaceUnicode) {
+  // U+2008 (utf8: \xe2\x80\x88) is punctuaction space
+  this->CheckUnary("utf8_isspace", "[\" \", null, \"  \", \"\\t\\r\"]", boolean(),
+                   "[true, null, true, true]");
+  this->CheckUnary("utf8_isspace", "[\" a\", null, \"a \", \"~\", \"\xe2\x80\x88\"]",
+                   boolean(), "[false, null, false, false, true]");
+}
+
+TYPED_TEST(TestStringKernels, IsTitleUnicode) {
+  // ٣ is arabic 3 (decimal), Φ capital
+  this->CheckUnary("utf8_istitle",
+                   "[\"Is\", null, \"Is Title\", \"Is٣Title\", \"Is_Ǆ\", \"Φ\", \"Ǆ\"]",
+                   boolean(), "[true, null, true, true, true, true, true]");
+  this->CheckUnary(
+      "utf8_istitle",
+      "[\"IsN\", null, \"IsNoTitle\", \"Is No T٣tle\", \"IsǄ\", \"ΦΦ\", \"ǆ\", \"_\"]",
+      boolean(), "[false, null, false, false, false, false, false, false]");
+}
+
+// Older versions of utf8proc fail
+#if !(UTF8PROC_VERSION_MAJOR <= 2 && UTF8PROC_VERSION_MINOR < 5)
+
+TYPED_TEST(TestStringKernels, IsUpperUnicode) {
+  // ٣ is arabic 3 (decimal), Φ capital
+  this->CheckUnary(
+      "utf8_isupper", "[\"12\", null, \"٣a\", \"٣A\", \"1A\", \"Φ\", \"\", \"Ⅰ\", \"Ⅿ\"]",
+      boolean(), "[false, null, false, true, true, true, false, true, true]");
+  // * Ⅰ to Ⅿ is a special case (roman capital), as well as Ⓐ to Ⓩ
+  // * ϒ - \xCF\x92 - Greek Upsilon with Hook Symbol - upper case, but has no direct lower
+  // case
+  // * U+1F88 - ᾈ - \E1\xBE\x88 - Greek Capital Letter Alpha with Psili and Prosgegrammeni
+  // - title case
+  // U+10400 - 𐐀 - \xF0x90x90x80 - Deseret Capital Letter Long - upper case
+  // * U+A7BA - Ꞻ - \xEA\x9E\xBA - Latin Capital Letter Glottal A -  new in unicode 13
+  // (not tested since it depends on the version of libutf8proc)
+  // * U+A7BB - ꞻ - \xEA\x9E\xBB - Latin Small Letter Glottal A - new in unicode 13
+  this->CheckUnary("utf8_isupper",
+                   "[\"Ⓐ\", \"Ⓩ\", \"ϒ\", \"ᾈ\", \"\xEA\x9E\xBA\", \"xF0x90x90x80\"]",
+                   boolean(), "[true, true, true, false, true, false]");
+}
+
+#endif  // UTF8PROC_VERSION_MINOR >= 5
+
 #endif  // ARROW_WITH_UTF8PROC
 
+TYPED_TEST(TestStringKernels, IsAlphaNumericAscii) {
+  this->CheckUnary("ascii_isalnum",
+                   "[\"ⱭɽⱤoW123\", null, \"Ɑ2\", \"!\", \"\", \"a space\", \"1 space\"]",
+                   boolean(), "[false, null, false, false, false, false, false]");
+  this->CheckUnary("ascii_isalnum", "[\"aRoW123\", null, \"a2\", \"a\", \"2\", \"\"]",
+                   boolean(), "[true, null, true, true, true, false]");
+}
+
+TYPED_TEST(TestStringKernels, IsAlphaAscii) {
+  this->CheckUnary("ascii_isalpha", "[\"ⱭɽⱤoW\", \"arrow\", null, \"a2\", \"!\", \"\"]",
+                   boolean(), "[false, true, null, false, false, false]");
+}
+
+TYPED_TEST(TestStringKernels, IsDecimalAscii) {
+  // ٣ is arabic 3
+  this->CheckUnary("ascii_isdecimal", "[\"12\", null, \"٣\", \"Ⅳ\", \"1a\", \"\"]",
+                   boolean(), "[true, null, false, false, false, false]");
+}
+
+TYPED_TEST(TestStringKernels, IsLowerAscii) {
+  // ٣ is arabic 3 (decimal), φ lower greek
+  this->CheckUnary("ascii_islower", "[\"12\", null, \"٣a\", \"٣A\", \"1a\", \"φ\", \"\"]",
+                   boolean(), "[false, null, true, false, true, false, false]");
+}
+TYPED_TEST(TestStringKernels, IsPrintableAscii) {
+  // \xe2\x80\x88 is punctuaction space
+  this->CheckUnary("ascii_isprintable",
+                   "[\" 123azAZ!~\", null, \"\xe2\x80\x88\", \"\", \"\\r\"]", boolean(),
+                   "[true, null, false, true, false]");
+}
+
+TYPED_TEST(TestStringKernels, IsSpaceAscii) {
+  // \xe2\x80\x88 is punctuaction space
+  // Note: for ascii version, the non-ascii chars are seen as non-cased
+  this->CheckUnary("ascii_isspace", "[\" \", null, \"  \", \"\\t\\r\"]", boolean(),
+                   "[true, null, true, true]");
+  this->CheckUnary("ascii_isspace", "[\" a\", null, \"a \", \"~\", \"\xe2\x80\x88\"]",
+                   boolean(), "[false, null, false, false, false]");
+}
+
+TYPED_TEST(TestStringKernels, IsTitleAscii) {
+  // ٣ is arabic 3 (decimal), Φ capital
+  // Note: for ascii version, the non-ascii chars are seen as non-cased
+  this->CheckUnary("ascii_istitle",
+                   "[\"Is\", null, \"Is Title\", \"Is٣Title\", \"Is_Ǆ\", \"Φ\", \"Ǆ\"]",
+                   boolean(), "[true, null, true, true, true, false, false]");
+  this->CheckUnary(
+      "ascii_istitle",
+      "[\"IsN\", null, \"IsNoTitle\", \"Is No T٣tle\", \"IsǄ\", \"ΦΦ\", \"ǆ\", \"_\"]",
+      boolean(), "[false, null, false, false, true, false, false, false]");
+}
+
+TYPED_TEST(TestStringKernels, IsUpperAscii) {
+  // ٣ is arabic 3 (decimal), Φ capital greek
+  this->CheckUnary("ascii_isupper", "[\"12\", null, \"٣a\", \"٣A\", \"1A\", \"Φ\", \"\"]",
+                   boolean(), "[false, null, false, true, true, false, false]");
+}
+
 TYPED_TEST(TestStringKernels, BinaryContainsExact) {
   BinaryContainsExactOptions options{"ab"};
   this->CheckUnary("binary_contains_exact", "[]", boolean(), "[]", &options);
diff --git a/cpp/src/arrow/util/utf8.h b/cpp/src/arrow/util/utf8.h
index 1775b19d0fe..d5875c4590b 100644
--- a/cpp/src/arrow/util/utf8.h
+++ b/cpp/src/arrow/util/utf8.h
@@ -366,5 +366,24 @@ static inline bool UTF8Transform(const uint8_t* first, const uint8_t* last,
   return true;
 }
 
+template <class UnaryPredicate>
+static inline bool UTF8AllOf(const uint8_t* first, const uint8_t* last, bool* result,
+                             UnaryPredicate&& predicate) {
+  const uint8_t* i = first;
+  while (i < last) {
+    uint32_t codepoint = 0;
+    if (ARROW_PREDICT_FALSE(!UTF8Decode(&i, &codepoint))) {
+      return false;
+    }
+
+    if (!predicate(codepoint)) {
+      *result = false;
+      return true;
+    }
+  }
+  *result = true;
+  return true;
+}
+
 }  // namespace util
 }  // namespace arrow
diff --git a/docker-compose.yml b/docker-compose.yml
index 62ddca0cfcd..89feef5eb21 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -319,6 +319,8 @@ services:
       ARROW_ORC: "OFF"
       ARROW_USE_ASAN: "ON"
       ARROW_USE_UBSAN: "ON"
+      # utf8proc 2.1.0 in Ubuntu Bionic has test failures
+      utf8proc_SOURCE: "BUNDLED"
     command: *cpp-command
 
   fedora-cpp:
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index ae7dae86d18..165895af7fb 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -103,6 +103,27 @@ def func(left, right):
 utf8_upper = _simple_unary_function('utf8_upper')
 utf8_lower = _simple_unary_function('utf8_lower')
 
+binary_isascii = _simple_unary_function('binary_isascii')
+
+ascii_isalnum = _simple_unary_function('ascii_isalnum')
+utf8_isalnum = _simple_unary_function('utf8_isalnum')
+ascii_isalpha = _simple_unary_function('ascii_isalpha')
+utf8_isalpha = _simple_unary_function('utf8_isalpha')
+ascii_isdecimal = _simple_unary_function('ascii_isdecimal')
+utf8_isdecimal = _simple_unary_function('utf8_isdecimal')
+ascii_isdigit = ascii_isdecimal  # alias
+utf8_isdigit = _simple_unary_function('utf8_isdigit')
+ascii_islower = _simple_unary_function('ascii_islower')
+utf8_islower = _simple_unary_function('utf8_islower')
+ascii_isnumeric = ascii_isdecimal  # alias
+utf8_isnumeric = _simple_unary_function('utf8_isnumeric')
+ascii_isprintable = _simple_unary_function('ascii_isprintable')
+utf8_isprintable = _simple_unary_function('utf8_isprintable')
+ascii_istitle = _simple_unary_function('ascii_istitle')
+utf8_istitle = _simple_unary_function('utf8_istitle')
+ascii_isupper = _simple_unary_function('ascii_isupper')
+utf8_isupper = _simple_unary_function('utf8_isupper')
+
 is_valid = _simple_unary_function('is_valid')
 is_null = _simple_unary_function('is_null')
 
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 00486d54149..52107f70dfa 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -95,6 +95,128 @@ def test_binary_contains_exact():
     assert expected.equals(result)
 
 
+# We use isprintable to find about codepoints that Python doesn't know, but
+# utfproc does (or in future version of Python the other way around).
+# These codepoints cannot be compared between Arrow and the Python
+# implementation.
+def _find_new_unicode_codepoints():
+    new = set()
+    for i in range(0x80, 0x11000):
+        c = chr(i)
+        if i in range(0xD800, 0xE000):
+            continue  # bug? pyarrow doesn't allow utf16 surrogates
+        ar = pa.array([c])
+        if pc.utf8_isprintable(ar)[0].as_py() != c.isprintable():
+            new.add(i)
+    return new
+
+
+new_unicode_codepoints = _find_new_unicode_codepoints()
+
+# Python claims there are not alpha, not sure why, they are in
+#  gc='Other Letter': https://graphemica.com/%E1%B3%B2
+unknown_issue_isalpha = {0x1cf2, 0x1cf3}
+# utf8proc does not know if codepoints are lower case
+utf8proc_issue_islower = {0xaa, 0xba, 0x2b0, 0x2b1, 0x2b2, 0x2b3, 0x2b4,
+                          0x2b5, 0x2b6, 0x2b7, 0x2b8, 0x2c0, 0x2c1, 0x2e0,
+                          0x2e1, 0x2e2, 0x2e3, 0x2e4, 0x37a, 0x1d2c, 0x1d2d,
+                          0x1d2e, 0x1d2f, 0x1d30, 0x1d31, 0x1d32, 0x1d33,
+                          0x1d34, 0x1d35, 0x1d36, 0x1d37, 0x1d38, 0x1d39,
+                          0x1d3a, 0x1d3b, 0x1d3c, 0x1d3d, 0x1d3e, 0x1d3f,
+                          0x1d40, 0x1d41, 0x1d42, 0x1d43, 0x1d44, 0x1d45,
+                          0x1d46, 0x1d47, 0x1d48, 0x1d49, 0x1d4a, 0x1d4b,
+                          0x1d4c, 0x1d4d, 0x1d4e, 0x1d4f, 0x1d50, 0x1d51,
+                          0x1d52, 0x1d53, 0x1d54, 0x1d55, 0x1d56, 0x1d57,
+                          0x1d58, 0x1d59, 0x1d5a, 0x1d5b, 0x1d5c, 0x1d5d,
+                          0x1d5e, 0x1d5f, 0x1d60, 0x1d61, 0x1d62, 0x1d63,
+                          0x1d64, 0x1d65, 0x1d66, 0x1d67, 0x1d68, 0x1d69,
+                          0x1d6a, 0x1d78, 0x1d9b, 0x1d9c, 0x1d9d, 0x1d9e,
+                          0x1d9f, 0x1da0, 0x1da1, 0x1da2, 0x1da3, 0x1da4,
+                          0x1da5, 0x1da6, 0x1da7, 0x1da8, 0x1da9, 0x1daa,
+                          0x1dab, 0x1dac, 0x1dad, 0x1dae, 0x1daf, 0x1db0,
+                          0x1db1, 0x1db2, 0x1db3, 0x1db4, 0x1db5, 0x1db6,
+                          0x1db7, 0x1db8, 0x1db9, 0x1dba, 0x1dbb, 0x1dbc,
+                          0x1dbd, 0x1dbe, 0x1dbf, 0x2071, 0x207f, 0x2090,
+                          0x2091, 0x2092, 0x2093, 0x2094, 0x2095, 0x2096,
+                          0x2097, 0x2098, 0x2099, 0x209a, 0x209b, 0x209c,
+                          0x2c7c, 0x2c7d, 0xa69c, 0xa69d, 0xa770, 0xa7f8,
+                          0xa7f9, 0xab5c, 0xab5d, 0xab5e, 0xab5f, }
+# utf8proc does not store if a codepoint is numeric
+numeric_info_missing = {0x3405, 0x3483, 0x382a, 0x3b4d, 0x4e00, 0x4e03,
+                        0x4e07, 0x4e09, 0x4e5d, 0x4e8c, 0x4e94, 0x4e96,
+                        0x4ebf, 0x4ec0, 0x4edf, 0x4ee8, 0x4f0d, 0x4f70,
+                        0x5104, 0x5146, 0x5169, 0x516b, 0x516d, 0x5341,
+                        0x5343, 0x5344, 0x5345, 0x534c, 0x53c1, 0x53c2,
+                        0x53c3, 0x53c4, 0x56db, 0x58f1, 0x58f9, 0x5e7a,
+                        0x5efe, 0x5eff, 0x5f0c, 0x5f0d, 0x5f0e, 0x5f10,
+                        0x62fe, 0x634c, 0x67d2, 0x6f06, 0x7396, 0x767e,
+                        0x8086, 0x842c, 0x8cae, 0x8cb3, 0x8d30, 0x9621,
+                        0x9646, 0x964c, 0x9678, 0x96f6, 0xf96b, 0xf973,
+                        0xf978, 0xf9b2, 0xf9d1, 0xf9d3, 0xf9fd, 0x10fc5,
+                        0x10fc6, 0x10fc7, 0x10fc8, 0x10fc9, 0x10fca,
+                        0x10fcb, }
+# utf8proc has no no digit/numeric information
+digit_info_missing = {0xb2, 0xb3, 0xb9, 0x1369, 0x136a, 0x136b, 0x136c,
+                      0x136d, 0x136e, 0x136f, 0x1370, 0x1371, 0x19da, 0x2070,
+                      0x2074, 0x2075, 0x2076, 0x2077, 0x2078, 0x2079, 0x2080,
+                      0x2081, 0x2082, 0x2083, 0x2084, 0x2085, 0x2086, 0x2087,
+                      0x2088, 0x2089, 0x2460, 0x2461, 0x2462, 0x2463, 0x2464,
+                      0x2465, 0x2466, 0x2467, 0x2468, 0x2474, 0x2475, 0x2476,
+                      0x2477, 0x2478, 0x2479, 0x247a, 0x247b, 0x247c, 0x2488,
+                      0x2489, 0x248a, 0x248b, 0x248c, 0x248d, 0x248e, 0x248f,
+                      0x2490, 0x24ea, 0x24f5, 0x24f6, 0x24f7, 0x24f8, 0x24f9,
+                      0x24fa, 0x24fb, 0x24fc, 0x24fd, 0x24ff, 0x2776, 0x2777,
+                      0x2778, 0x2779, 0x277a, 0x277b, 0x277c, 0x277d, 0x277e,
+                      0x2780, 0x2781, 0x2782, 0x2783, 0x2784, 0x2785, 0x2786,
+                      0x2787, 0x2788, 0x278a, 0x278b, 0x278c, 0x278d, 0x278e,
+                      0x278f, 0x2790, 0x2791, 0x2792, 0x10a40, 0x10a41,
+                      0x10a42, 0x10a43, 0x10e60, 0x10e61, 0x10e62, 0x10e63,
+                      0x10e64, 0x10e65, 0x10e66, 0x10e67, 0x10e68, }
+numeric_info_missing = {0x3405, 0x3483, 0x382a, 0x3b4d, 0x4e00, 0x4e03,
+                        0x4e07, 0x4e09, 0x4e5d, 0x4e8c, 0x4e94, 0x4e96,
+                        0x4ebf, 0x4ec0, 0x4edf, 0x4ee8, 0x4f0d, 0x4f70,
+                        0x5104, 0x5146, 0x5169, 0x516b, 0x516d, 0x5341,
+                        0x5343, 0x5344, 0x5345, 0x534c, 0x53c1, 0x53c2,
+                        0x53c3, 0x53c4, 0x56db, 0x58f1, 0x58f9, 0x5e7a,
+                        0x5efe, 0x5eff, 0x5f0c, 0x5f0d, 0x5f0e, 0x5f10,
+                        0x62fe, 0x634c, 0x67d2, 0x6f06, 0x7396, 0x767e,
+                        0x8086, 0x842c, 0x8cae, 0x8cb3, 0x8d30, 0x9621,
+                        0x9646, 0x964c, 0x9678, 0x96f6, 0xf96b, 0xf973,
+                        0xf978, 0xf9b2, 0xf9d1, 0xf9d3, 0xf9fd, }
+
+codepoints_ignore = {
+    'isalnum': numeric_info_missing | digit_info_missing |
+    unknown_issue_isalpha,
+    'isalpha': unknown_issue_isalpha,
+    'isdigit': digit_info_missing,
+    'isnumeric': numeric_info_missing,
+    'islower': utf8proc_issue_islower
+}
+
+
+@pytest.mark.parametrize('function_name', ['isalnum', 'isalpha', 'isascii',
+                                           'isdecimal', 'isdigit', 'islower',
+                                           'isnumeric', 'isprintable',
+                                           'isspace', 'isupper', ])
+@pytest.mark.parametrize('variant', ['ascii', 'utf8'])
+def test_string_py_compat_boolean(function_name, variant):
+    arrow_name = variant + "_" + function_name
+    py_name = function_name
+    ignore = codepoints_ignore.get(function_name, set()) |\
+        new_unicode_codepoints
+    for i in range(128 if ascii else 0x11000):
+        if i in range(0xD800, 0xE000):
+            continue  # bug? pyarrow doesn't allow utf16 surrogates
+        # the issues we know of, we skip
+        if i in ignore:
+            continue
+        c = chr(i)
+        if hasattr(pc, arrow_name):
+            ar = pa.array([c])
+            assert getattr(pc, arrow_name)(
+                ar)[0].as_py() == getattr(c, py_name)()
+
+
 @pytest.mark.parametrize(('ty', 'values'), all_array_types)
 def test_take(ty, values):
     arr = pa.array(values, type=ty)