From ec2c19ff814c846267edc138922990ca6ada2238 Mon Sep 17 00:00:00 2001 From: Eduardo Ponce Date: Mon, 2 Aug 2021 20:48:15 -0400 Subject: [PATCH 1/8] add ASCII capitalize kernel --- .../arrow/compute/kernels/scalar_string.cc | 27 +++++++++++++++++++ .../compute/kernels/scalar_string_test.cc | 7 +++++ docs/source/cpp/compute.rst | 2 ++ docs/source/python/api/compute.rst | 1 + 4 files changed, 37 insertions(+) diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc index 5359567fc12..250d2a223a0 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string.cc @@ -632,6 +632,24 @@ struct AsciiSwapCase { } }; +struct AsciiCapitalizeTransform : public StringTransformBase { + int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, + uint8_t* output) { + if (input_string_ncodeunits > 0) { + output[0] = ascii_toupper(input[0]); + std::memcpy(output + 1, input + 1, input_string_ncodeunits - 1); + } + return input_string_ncodeunits; + } + + Status InvalidStatus() override { + return Status::Invalid("Invalid ASCII sequence in input"); + } +}; + +template +using AsciiCapitalize = StringTransformExec; + // ---------------------------------------------------------------------- // exact pattern detection @@ -4074,6 +4092,13 @@ const FunctionDoc ascii_swapcase_doc( "non-ASCII characters, use \"utf8_swapcase\" instead."), {"strings"}); +const FunctionDoc ascii_capitalize_doc( + "Capilatize the first character of ASCII input", + ("For each string in `strings`, return a capitalized version.\n\n" + "This function assumes the input is fully ASCII. If it may contain\n" + "non-ASCII characters, use \"utf8_capitalize\" instead."), + {"strings"}); + const FunctionDoc utf8_upper_doc( "Transform input to uppercase", ("For each string in `strings`, return an uppercase version."), {"strings"}); @@ -4113,6 +4138,8 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) { MemAllocation::NO_PREALLOCATE); MakeUnaryStringBatchKernel( "ascii_swapcase", registry, &ascii_swapcase_doc, MemAllocation::NO_PREALLOCATE); + MakeUnaryStringBatchKernel("ascii_capitalize", registry, + &ascii_capitalize_doc); MakeUnaryStringBatchKernel("ascii_trim_whitespace", registry, &ascii_trim_whitespace_doc); MakeUnaryStringBatchKernel("ascii_ltrim_whitespace", registry, diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index 3aa6f5368d2..e0bbb27621a 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -403,6 +403,13 @@ TYPED_TEST(TestStringKernels, AsciiSwapCase) { "[\"HeLLo, wOrLD!\", \"$. a35?\"]"); } +TYPED_TEST(TestStringKernels, AsciiCapitalize) { + this->CheckUnary("ascii_capitalize", "[]", this->type(), "[]"); + this->CheckUnary("ascii_capitalize", + "[\"aAazZæÆ&\", null, \"\", \"bBB\", \"one word\"]", this->type(), + "[\"AAazZæÆ&\", null, \"\", \"BBB\", \"One word\"]"); +} + TYPED_TEST(TestStringKernels, AsciiReverse) { this->CheckUnary("ascii_reverse", "[]", this->type(), "[]"); this->CheckUnary("ascii_reverse", R"(["abcd", null, "", "bbb"])", this->type(), diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 01dc1d92e17..b82e6f24831 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -587,6 +587,8 @@ String transforms +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+ | Function name | Arity | Input types | Output type | Options class | Notes | +=========================+=======+========================+========================+===================================+=======+ +| ascii_capitalize | Unary | String-like | String-like | | | ++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+ | ascii_lower | Unary | String-like | String-like | | \(1) | +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+ | ascii_reverse | Unary | String-like | String-like | | \(2) | diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index c503cba319c..07220c2f89b 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -250,6 +250,7 @@ String Transforms .. autosummary:: :toctree: ../generated/ + ascii_capitalize ascii_center ascii_lpad ascii_ltrim From 6dd7710a1f4b6f0f0d20d7c1fb929558aa7177ea Mon Sep 17 00:00:00 2001 From: Eduardo Ponce Date: Tue, 3 Aug 2021 10:26:26 -0400 Subject: [PATCH 2/8] fix bugs and add utf8_capitalize --- .../arrow/compute/kernels/scalar_string.cc | 68 ++++++++++++++----- .../compute/kernels/scalar_string_test.cc | 17 ++++- docs/source/cpp/compute.rst | 2 + docs/source/python/api/compute.rst | 1 + 4 files changed, 67 insertions(+), 21 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc index 250d2a223a0..41a0cf2d595 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string.cc @@ -446,10 +446,10 @@ struct StringTransformCodepoint : public StringTransformBase { // struct CaseMappingMixin { struct CaseMappingTransform { static int64_t MaxCodeunits(int64_t ninputs, int64_t input_ncodeunits) { - // Section 5.18 of the Unicode spec claim that the number of codepoints for case + // Section 5.18 of the Unicode spec claims that the number of codepoints for case // mapping can grow by a factor of 3. This means grow by a factor of 3 in bytes // However, since we don't support all casings (SpecialCasing.txt) the growth - // in bytes iss actually only at max 3/2 (as covered by the unittest). + // in bytes is actually only at max 3/2 (as covered by the unittest). // Note that rounding down the 3/2 is ok, since only codepoints encoded by // two code units (even) can grow to 3 code units. return static_cast(input_ncodeunits) * 3 / 2; @@ -496,6 +496,36 @@ template using UTF8SwapCase = StringTransformExec>; +struct Utf8CapitalizeTransform : public StringTransformBase { + int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, + uint8_t* output) { + uint8_t* output_start = output; + if (input_string_ncodeunits > 0) { + // Get number of code units in first code point + uint32_t codepoint = 0; + const uint8_t* i = input; + if (ARROW_PREDICT_FALSE(!util::UTF8Decode(&i, &codepoint))) { + return kTransformError; + } + int64_t codepoint_ncodeunits = std::min(i - input, input_string_ncodeunits); + if (ARROW_PREDICT_FALSE( + !util::UTF8Transform(input, input + codepoint_ncodeunits, &output, + UTF8UpperTransform::TransformCodepoint))) { + return kTransformError; + } + if (ARROW_PREDICT_FALSE(!util::UTF8Transform( + input + codepoint_ncodeunits, input + input_string_ncodeunits, &output, + UTF8LowerTransform::TransformCodepoint))) { + return kTransformError; + } + } + return output - output_start; + } +}; + +template +using Utf8Capitalize = StringTransformExec; + #endif // ARROW_WITH_UTF8PROC struct AsciiReverseTransform : public StringTransformBase { @@ -636,15 +666,11 @@ struct AsciiCapitalizeTransform : public StringTransformBase { int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, uint8_t* output) { if (input_string_ncodeunits > 0) { - output[0] = ascii_toupper(input[0]); - std::memcpy(output + 1, input + 1, input_string_ncodeunits - 1); + TransformAsciiUpper(input, 1, output); + TransformAsciiLower(input + 1, input_string_ncodeunits, output + 1); } return input_string_ncodeunits; } - - Status InvalidStatus() override { - return Status::Invalid("Invalid ASCII sequence in input"); - } }; template @@ -4093,12 +4119,19 @@ const FunctionDoc ascii_swapcase_doc( {"strings"}); const FunctionDoc ascii_capitalize_doc( - "Capilatize the first character of ASCII input", + "Capitalize the first character of ASCII input", ("For each string in `strings`, return a capitalized version.\n\n" "This function assumes the input is fully ASCII. If it may contain\n" "non-ASCII characters, use \"utf8_capitalize\" instead."), {"strings"}); +const FunctionDoc ascii_reverse_doc( + "Reverse ASCII input", + ("For each ASCII string in `strings`, return a reversed version.\n\n" + "This function assumes the input is fully ASCII. If it may contain\n" + "non-ASCII characters, use \"utf8_reverse\" instead."), + {"strings"}); + const FunctionDoc utf8_upper_doc( "Transform input to uppercase", ("For each string in `strings`, return an uppercase version."), {"strings"}); @@ -4112,17 +4145,14 @@ const FunctionDoc utf8_swapcase_doc( "lowercase", ("For each string in `strings`, return an opposite case version."), {"strings"}); -const FunctionDoc ascii_reverse_doc( - "Reverse ASCII input", - ("For each ASCII string in `strings`, return a reversed version.\n\n" - "This function assumes the input is fully ASCII. If it may contain\n" - "non-ASCII characters, use \"utf8_reverse\" instead."), - {"strings"}); +const FunctionDoc utf8_capitalize_doc( + "Capitalize the first codepoint of UTF8 input", + ("For each UTF8 string in `strings`, return a capitalized version."), {"strings"}); const FunctionDoc utf8_reverse_doc( - "Reverse utf8 input", - ("For each utf8 string in `strings`, return a reversed version.\n\n" - "This function operates on codepoints/UTF-8 code units, not grapheme\n" + "Reverse UTF8 input", + ("For each UTF8 string in `strings`, return a reversed version.\n\n" + "This function operates on codepoints/UTF8 code units, not grapheme\n" "clusters. Hence, it will not correctly reverse grapheme clusters\n" "composed of multiple codepoints."), {"strings"}); @@ -4185,6 +4215,8 @@ void RegisterScalarStringAscii(FunctionRegistry* registry) { MakeUnaryStringUTF8TransformKernel("utf8_lower", registry, &utf8_lower_doc); MakeUnaryStringUTF8TransformKernel("utf8_swapcase", registry, &utf8_swapcase_doc); + MakeUnaryStringBatchKernel("utf8_capitalize", registry, + &utf8_capitalize_doc); MakeUnaryStringBatchKernel("utf8_trim_whitespace", registry, &utf8_trim_whitespace_doc); MakeUnaryStringBatchKernel("utf8_ltrim_whitespace", registry, diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index e0bbb27621a..3b1603cdc3a 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -406,8 +406,9 @@ TYPED_TEST(TestStringKernels, AsciiSwapCase) { TYPED_TEST(TestStringKernels, AsciiCapitalize) { this->CheckUnary("ascii_capitalize", "[]", this->type(), "[]"); this->CheckUnary("ascii_capitalize", - "[\"aAazZæÆ&\", null, \"\", \"bBB\", \"one word\"]", this->type(), - "[\"AAazZæÆ&\", null, \"\", \"BBB\", \"One word\"]"); + "[\"aAazZæÆ&\", null, \"\", \"bBB\", \"hEllO, WoRld!\", \"$. A3\"]", + this->type(), + "[\"AaazzæÆ&\", null, \"\", \"Bbb\", \"Hello, world!\", \"$. a3\"]"); } TYPED_TEST(TestStringKernels, AsciiReverse) { @@ -469,7 +470,7 @@ TYPED_TEST(TestStringKernels, Utf8Upper) { this->CheckUnary("utf8_upper", "[\"aAazZæÆ&\", null, \"\", \"b\"]", this->type(), "[\"AAAZZÆÆ&\", null, \"\", \"B\"]"); - // test varying encoding lenghts and thus changing indices/offsets + // test varying encoding lengths and thus changing indices/offsets this->CheckUnary("utf8_upper", "[\"ɑɽⱤoW\", null, \"ıI\", \"b\"]", this->type(), "[\"ⱭⱤⱤOW\", null, \"II\", \"B\"]"); @@ -528,6 +529,16 @@ TYPED_TEST(TestStringKernels, Utf8SwapCase) { CallFunction("utf8_swapcase", {invalid_input})); } +TYPED_TEST(TestStringKernels, Utf8Capitalize) { + this->CheckUnary("ascii_capitalize", "[]", this->type(), "[]"); + this->CheckUnary("utf8_capitalize", + "[\"aAazZæÆ&\", null, \"\", \"b\", \"ɑɽⱤoW\", \"ıI\", \"ⱥⱥⱥȺ\", " + "\"hEllO, WoRld!\", \"$. A3\"]", + this->type(), + "[\"Aaazzææ&\", null, \"\", \"B\", \"Ɑɽɽow\", \"Ii\", \"Ⱥⱥⱥⱥ\", " + "\"Hello, world!\", \"$. a3\"]"); +} + TYPED_TEST(TestStringKernels, IsAlphaNumericUnicode) { // U+08BE (utf8: \xE0\xA2\xBE) is undefined, but utf8proc things it is // UTF8PROC_CATEGORY_LO diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index b82e6f24831..b12d0f2efde 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -605,6 +605,8 @@ String transforms +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+ | replace_substring_regex | Unary | String-like | String-like | :struct:`ReplaceSubstringOptions` | \(6) | +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+ +| utf8_capitalize | Unary | String-like | String-like | | | ++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+ | utf8_length | Unary | String-like | Int32 or Int64 | | \(7) | +-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+ | utf8_lower | Unary | String-like | String-like | | \(8) | diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index 07220c2f89b..b3ab086899a 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -267,6 +267,7 @@ String Transforms binary_replace_slice replace_substring replace_substring_regex + utf8_capitalize utf8_center utf8_length utf8_lower From 18148ee4cb56b9122487b4025ca2728fa955082f Mon Sep 17 00:00:00 2001 From: Eduardo Ponce Date: Tue, 3 Aug 2021 10:47:45 -0400 Subject: [PATCH 3/8] add static_cast to std::min --- cpp/src/arrow/compute/kernels/scalar_string.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc index 41a0cf2d595..c75c8065b1f 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string.cc @@ -507,7 +507,8 @@ struct Utf8CapitalizeTransform : public StringTransformBase { if (ARROW_PREDICT_FALSE(!util::UTF8Decode(&i, &codepoint))) { return kTransformError; } - int64_t codepoint_ncodeunits = std::min(i - input, input_string_ncodeunits); + int64_t codepoint_ncodeunits = + std::min(static_cast(i - input), input_string_ncodeunits); if (ARROW_PREDICT_FALSE( !util::UTF8Transform(input, input + codepoint_ncodeunits, &output, UTF8UpperTransform::TransformCodepoint))) { From 01a46e6b4339c807868c1781c77e29873246a0a6 Mon Sep 17 00:00:00 2001 From: Eduardo Ponce Date: Wed, 4 Aug 2021 07:10:33 -0400 Subject: [PATCH 4/8] extend tests (non-cased/non-ws chars followed by cased char) --- cpp/src/arrow/compute/kernels/scalar_string_test.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index 3b1603cdc3a..920197ca3c3 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -406,9 +406,11 @@ TYPED_TEST(TestStringKernels, AsciiSwapCase) { TYPED_TEST(TestStringKernels, AsciiCapitalize) { this->CheckUnary("ascii_capitalize", "[]", this->type(), "[]"); this->CheckUnary("ascii_capitalize", - "[\"aAazZæÆ&\", null, \"\", \"bBB\", \"hEllO, WoRld!\", \"$. A3\"]", + "[\"aAazZæÆ&\", null, \"\", \"bBB\", \"hEllO, WoRld!\", \"$. A3\", " + "\"!hELlo, wORLd!\"]", this->type(), - "[\"AaazzæÆ&\", null, \"\", \"Bbb\", \"Hello, world!\", \"$. a3\"]"); + "[\"AaazzæÆ&\", null, \"\", \"Bbb\", \"Hello, world!\", \"$. a3\", " + "\"!hello, world!\"]"); } TYPED_TEST(TestStringKernels, AsciiReverse) { @@ -533,10 +535,10 @@ TYPED_TEST(TestStringKernels, Utf8Capitalize) { this->CheckUnary("ascii_capitalize", "[]", this->type(), "[]"); this->CheckUnary("utf8_capitalize", "[\"aAazZæÆ&\", null, \"\", \"b\", \"ɑɽⱤoW\", \"ıI\", \"ⱥⱥⱥȺ\", " - "\"hEllO, WoRld!\", \"$. A3\"]", + "\"hEllO, WoRld!\", \"$. A3\", \"!ɑⱤⱤow\"]", this->type(), "[\"Aaazzææ&\", null, \"\", \"B\", \"Ɑɽɽow\", \"Ii\", \"Ⱥⱥⱥⱥ\", " - "\"Hello, world!\", \"$. a3\"]"); + "\"Hello, world!\", \"$. a3\", \"!ɑɽɽow\"]"); } TYPED_TEST(TestStringKernels, IsAlphaNumericUnicode) { From e0e7947815418face6846222457dc090829c1c51 Mon Sep 17 00:00:00 2001 From: Eduardo Ponce Date: Wed, 4 Aug 2021 08:36:22 -0400 Subject: [PATCH 5/8] fix buffer overflow in ASCII capitalize --- cpp/src/arrow/compute/kernels/scalar_string.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc index c75c8065b1f..88d7cc6d649 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string.cc @@ -668,7 +668,7 @@ struct AsciiCapitalizeTransform : public StringTransformBase { uint8_t* output) { if (input_string_ncodeunits > 0) { TransformAsciiUpper(input, 1, output); - TransformAsciiLower(input + 1, input_string_ncodeunits, output + 1); + TransformAsciiLower(input + 1, input_string_ncodeunits - 1, output + 1); } return input_string_ncodeunits; } From 49851898b0cd7ed44ee8769fd4cb85fbf3ce6991 Mon Sep 17 00:00:00 2001 From: Eduardo Ponce Date: Wed, 4 Aug 2021 08:44:59 -0400 Subject: [PATCH 6/8] remove redundant "UTF8" from FunctionDoc --- cpp/src/arrow/compute/kernels/scalar_string.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc index 88d7cc6d649..686a00d5e5b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string.cc @@ -4147,12 +4147,12 @@ const FunctionDoc utf8_swapcase_doc( ("For each string in `strings`, return an opposite case version."), {"strings"}); const FunctionDoc utf8_capitalize_doc( - "Capitalize the first codepoint of UTF8 input", - ("For each UTF8 string in `strings`, return a capitalized version."), {"strings"}); + "Capitalize the first codepoint of input", + ("For each string in `strings`, return a capitalized version."), {"strings"}); const FunctionDoc utf8_reverse_doc( - "Reverse UTF8 input", - ("For each UTF8 string in `strings`, return a reversed version.\n\n" + "Reverse input", + ("For each string in `strings`, return a reversed version.\n\n" "This function operates on codepoints/UTF8 code units, not grapheme\n" "clusters. Hence, it will not correctly reverse grapheme clusters\n" "composed of multiple codepoints."), From 997919e76e8a0642ef563476cd1bf3241d04b618 Mon Sep 17 00:00:00 2001 From: Eduardo Ponce Date: Wed, 4 Aug 2021 09:41:33 -0400 Subject: [PATCH 7/8] update function Docstrings --- cpp/src/arrow/compute/kernels/scalar_string.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc index 686a00d5e5b..dddd50dc92e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string.cc @@ -4147,13 +4147,15 @@ const FunctionDoc utf8_swapcase_doc( ("For each string in `strings`, return an opposite case version."), {"strings"}); const FunctionDoc utf8_capitalize_doc( - "Capitalize the first codepoint of input", - ("For each string in `strings`, return a capitalized version."), {"strings"}); + "Capitalize the first character of input", + ("For each string in `strings`, return a capitalized version,\n" + "with the first character uppercased and the others lowercased."), + {"strings"}); const FunctionDoc utf8_reverse_doc( "Reverse input", ("For each string in `strings`, return a reversed version.\n\n" - "This function operates on codepoints/UTF8 code units, not grapheme\n" + "This function operates on Unicode codepoints, not grapheme\n" "clusters. Hence, it will not correctly reverse grapheme clusters\n" "composed of multiple codepoints."), {"strings"}); From 134e854f173765cd343990df1b75bc9a2af41c73 Mon Sep 17 00:00:00 2001 From: Eduardo Ponce Date: Wed, 4 Aug 2021 09:55:16 -0400 Subject: [PATCH 8/8] use ascii_toupper for first char --- cpp/src/arrow/compute/kernels/scalar_string.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_string.cc b/cpp/src/arrow/compute/kernels/scalar_string.cc index dddd50dc92e..8d815274479 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string.cc @@ -667,7 +667,7 @@ struct AsciiCapitalizeTransform : public StringTransformBase { int64_t Transform(const uint8_t* input, int64_t input_string_ncodeunits, uint8_t* output) { if (input_string_ncodeunits > 0) { - TransformAsciiUpper(input, 1, output); + *output = ascii_toupper(*input); TransformAsciiLower(input + 1, input_string_ncodeunits - 1, output + 1); } return input_string_ncodeunits;