From 3c483aaeaae3201a1751613d7b9edd2f5e96664d Mon Sep 17 00:00:00 2001 From: "Maarten A. Breddels" Date: Fri, 5 Jun 2020 16:49:40 +0200 Subject: [PATCH] ARROW-555: [C++] String algorithms --- cpp/src/arrow/compute/kernels/scalar_string.cc | 11 +++++++++++ cpp/src/arrow/compute/kernels/scalar_string_test.cc | 5 +++++ python/pyarrow/compute.py | 1 + 3 files changed, 17 insertions(+) diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc index efb54e79e98..232f195c8b6 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string.cc @@ -48,6 +48,16 @@ struct AsciiUpper { } }; +struct AsciiLower { + template + static std::string Call(KernelContext*, const util::string_view& val) { + std::string result = val.to_string(); + std::transform(result.begin(), result.end(), result.begin(), + [](unsigned char c) { return std::tolower(c); }); + return result; + } +}; + void AddAsciiLength(FunctionRegistry* registry) { auto func = std::make_shared("ascii_length", Arity::Unary()); ArrayKernelExec exec_offset_32 = @@ -108,6 +118,7 @@ void AddStrptime(FunctionRegistry* registry) { void RegisterScalarStringAscii(FunctionRegistry* registry) { MakeUnaryStringToString("ascii_upper", registry); + MakeUnaryStringToString("ascii_lower", registry); AddAsciiLength(registry); AddStrptime(registry); } diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index 5a4c2c0e219..2e96b222e83 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -61,6 +61,11 @@ TYPED_TEST(TestStringKernels, AsciiUpper) { "[\"AAA&\", null, \"\", \"B\"]"); } +TYPED_TEST(TestStringKernels, AsciiLower) { + this->CheckUnary("ascii_lower", "[\"aAa&\", null, \"\", \"b\"]", this->string_type(), + "[\"aaa&\", null, \"\", \"b\"]"); +} + TYPED_TEST(TestStringKernels, Strptime) { std::string input1 = R"(["5/1/2020", null, "12/11/1900"])"; std::string output1 = R"(["2020-05-01", null, "1900-12-11"])"; diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 3761c91cc2a..62c58f9e9c5 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -92,6 +92,7 @@ def func(arg): ascii_length = _simple_unary_function('ascii_length') ascii_upper = _simple_unary_function('ascii_upper') +ascii_lower = _simple_unary_function('ascii_lower') def sum(array):