diff --git a/src/libraries/Common/tests/Tests/System/StringTests.cs b/src/libraries/Common/tests/Tests/System/StringTests.cs index c16a31a1f60140..3ae653f5d9dff5 100644 --- a/src/libraries/Common/tests/Tests/System/StringTests.cs +++ b/src/libraries/Common/tests/Tests/System/StringTests.cs @@ -5312,12 +5312,46 @@ private static void ToLower_Culture(string input, string expected, CultureInfo c } } + public static IEnumerable ToLower_Invariant_TestData() + { + yield return new object[] { "", "" }; + yield return new object[] { "Ab", "ab" }; + yield return new object[] { "H-/", "h-/" }; + yield return new object[] { "Hello", "hello" }; + yield return new object[] { "hElLo", "hello" }; + yield return new object[] { "AbcdAbc", "abcdabc" }; + yield return new object[] { "AbcdAbcd", "abcdabcd" }; + yield return new object[] { "AbcdAbcd/", "abcdabcd/" }; + yield return new object[] { "AbcdAbcd/-", "abcdabcd/-" }; + yield return new object[] { "AbcdAbcd/-_", "abcdabcd/-_" }; + yield return new object[] { "AbcdAbcd-bcdAbc", "abcdabcd-bcdabc" }; + yield return new object[] { "AbcdAbcd-bcdAbcd", "abcdabcd-bcdabcd" }; + yield return new object[] { "AbcdAbcd-bcdAbcdA", "abcdabcd-bcdabcda" }; + yield return new object[] { "AbcdAbcd-bcdAbcdA/", "abcdabcd-bcdabcda/" }; + // Non-ASCII char: + yield return new object[] { "\u0436", "\u0436" }; + yield return new object[] { "H\u0436/", "h\u0436/" }; + yield return new object[] { "Hell\u0436", "hell\u0436" }; + yield return new object[] { "hEl\u0436o", "hel\u0436o" }; + yield return new object[] { "AbcdAb\u0436", "abcdab\u0436" }; + yield return new object[] { "Abcd\u0436bcd", "abcd\u0436bcd" }; + yield return new object[] { "AbcdAbc\u0436/", "abcdabc\u0436/" }; + yield return new object[] { "AbcdAbcd/\u0436", "abcdabcd/\u0436" }; + yield return new object[] { "AbcdAbcd/-\u0436", "abcdabcd/-\u0436" }; + yield return new object[] { "AbcdAbc\u0436d-bcdAbc", "abcdabc\u0436d-bcdabc" }; + yield return new object[] { "AbcdAbcd-b\u0436dAbcd", "abcdabcd-b\u0436dabcd" }; + yield return new object[] { "AbcdAbcd-bcd\u0436bcdA", "abcdabcd-bcd\u0436bcda" }; + yield return new object[] { "AbcdAbcd-bcdAbc\u0436A/", "abcdabcd-bcdabc\u0436a/" }; + + yield return new object[] + { + "\u0410\u0411\u0412\u0413\u0414\u0415\u0401\u0416\u0417\u0418\u0419\u041A\u041B\u041C\u041D\u041E\u041F\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042C\u042B\u042A\u042D\u042E\u042F", + "\u0430\u0431\u0432\u0433\u0434\u0435\u0451\u0436\u0437\u0438\u0439\u043A\u043B\u043C\u043D\u043E\u043F\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044C\u044B\u044A\u044D\u044E\u044F" + }; + } + [Theory] - [InlineData("hello", "hello")] - [InlineData("HELLO", "hello")] - [InlineData("hElLo", "hello")] - [InlineData("HeLlO", "hello")] - [InlineData("", "")] + [MemberData(nameof(ToLower_Invariant_TestData))] public static void ToLowerInvariant(string s, string expected) { Assert.Equal(expected, s.ToLowerInvariant()); @@ -5885,12 +5919,46 @@ public static void ToUpper_TurkishI_InvariantCulture(string s, string expected) } } + public static IEnumerable ToUpper_Invariant_TestData() + { + yield return new object[] { "", "" }; + yield return new object[] { "Ab", "AB" }; + yield return new object[] { "H-/", "H-/" }; + yield return new object[] { "Hello", "HELLO" }; + yield return new object[] { "hElLo", "HELLO" }; + yield return new object[] { "AbcdAbc", "ABCDABC" }; + yield return new object[] { "AbcdAbcd", "ABCDABCD" }; + yield return new object[] { "AbcdAbcd/", "ABCDABCD/" }; + yield return new object[] { "AbcdAbcd/-", "ABCDABCD/-" }; + yield return new object[] { "AbcdAbcd/-_", "ABCDABCD/-_" }; + yield return new object[] { "AbcdAbcd-bcdAbc", "ABCDABCD-BCDABC" }; + yield return new object[] { "AbcdAbcd-bcdAbcd", "ABCDABCD-BCDABCD" }; + yield return new object[] { "AbcdAbcd-bcdAbcdA", "ABCDABCD-BCDABCDA" }; + yield return new object[] { "AbcdAbcd-bcdAbcdA/", "ABCDABCD-BCDABCDA/" }; + // Non-ASCII char: + yield return new object[] { "\u0436", "\u0416" }; + yield return new object[] { "H\u0436/", "H\u0416/" }; + yield return new object[] { "Hell\u0436", "HELL\u0416" }; + yield return new object[] { "hEl\u0436o", "HEL\u0416O" }; + yield return new object[] { "AbcdAb\u0436", "ABCDAB\u0416" }; + yield return new object[] { "Abcd\u0436bcd", "ABCD\u0416BCD" }; + yield return new object[] { "AbcdAbc\u0436/", "ABCDABC\u0416/" }; + yield return new object[] { "AbcdAbcd/\u0436", "ABCDABCD/\u0416" }; + yield return new object[] { "AbcdAbcd/-\u0436", "ABCDABCD/-\u0416" }; + yield return new object[] { "AbcdAbc\u0436d-bcdAbc", "ABCDABC\u0416D-BCDABC" }; + yield return new object[] { "AbcdAbcd-b\u0436dAbcd", "ABCDABCD-B\u0416DABCD" }; + yield return new object[] { "AbcdAbcd-bcd\u0436bcdA", "ABCDABCD-BCD\u0416BCDA" }; + yield return new object[] { "AbcdAbcd-bcdAbc\u0436A/", "ABCDABCD-BCDABC\u0416A/" }; + + yield return new object[] + { + "\u0430\u0431\u0432\u0433\u0434\u0435\u0451\u0436\u0437\u0438\u0439\u043A\u043B\u043C\u043D\u043E\u043F\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044C\u044B\u044A\u044D\u044E\u044F", + "\u0410\u0411\u0412\u0413\u0414\u0415\u0401\u0416\u0417\u0418\u0419\u041A\u041B\u041C\u041D\u041E\u041F\u0420\u0421\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042C\u042B\u042A\u042D\u042E\u042F" + }; + } + [Theory] - [InlineData("hello", "HELLO")] - [InlineData("HELLO", "HELLO")] - [InlineData("hElLo", "HELLO")] - [InlineData("HeLlO", "HELLO")] - [InlineData("", "")] + [MemberData(nameof(ToUpper_Invariant_TestData))] public static void ToUpperInvariant(string s, string expected) { Assert.Equal(expected, s.ToUpperInvariant()); diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs index 8949e5530b9c19..7efe5497ef9796 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs @@ -5,6 +5,7 @@ using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; using System.Runtime.Serialization; using System.Text; using System.Text.Unicode; @@ -207,7 +208,82 @@ private void ChangeCaseCommon(ReadOnlySpan source, Span ChangeCaseCommon(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length); } - private unsafe void ChangeCaseCommon(ref char source, ref char destination, int charCount) where TConversion : struct + private unsafe void ChangeCaseCommon_Vector128(ref char source, ref char destination, int charCount) + where TConversion : struct + { + Debug.Assert(charCount >= Vector128.Count); + Debug.Assert(Vector128.IsHardwareAccelerated); + + // JIT will treat this as a constant in release builds + bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); + nuint i = 0; + if (!IsAsciiCasingSameAsInvariant) + { + goto NON_ASCII; + } + + ref ushort src = ref Unsafe.As(ref source); + ref ushort dst = ref Unsafe.As(ref destination); + + nuint lengthU = (nuint)charCount; + nuint lengthToExamine = lengthU - (nuint)Vector128.Count; + do + { + Vector128 vec = Vector128.LoadUnsafe(ref src, i); + if (!Utf16Utility.AllCharsInVector128AreAscii(vec)) + { + goto NON_ASCII; + } + vec = toUpper ? + Utf16Utility.Vector128AsciiToUppercase(vec) : + Utf16Utility.Vector128AsciiToLowercase(vec); + vec.StoreUnsafe(ref dst, i); + + i += (nuint)Vector128.Count; + } while (i <= lengthToExamine); + + Debug.Assert(i <= lengthU); + + // Handle trailing elements + if (i < lengthU) + { + nuint trailingElements = lengthU - (nuint)Vector128.Count; + Vector128 vec = Vector128.LoadUnsafe(ref src, trailingElements); + if (!Utf16Utility.AllCharsInVector128AreAscii(vec)) + { + goto NON_ASCII; + } + vec = toUpper ? + Utf16Utility.Vector128AsciiToUppercase(vec) : + Utf16Utility.Vector128AsciiToLowercase(vec); + vec.StoreUnsafe(ref dst, trailingElements); + } + return; + + NON_ASCII: + // We encountered non-ASCII data and therefore can't perform invariant case conversion; + // Fallback to ICU/NLS + ChangeCaseCommon_Scalar( + ref Unsafe.Add(ref source, i), + ref Unsafe.Add(ref destination, i), + charCount - (int)i); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private unsafe void ChangeCaseCommon(ref char source, ref char destination, int charCount) + where TConversion : struct + { + if (!Vector128.IsHardwareAccelerated || charCount < Vector128.Count) + { + ChangeCaseCommon_Scalar(ref source, ref destination, charCount); + } + else + { + ChangeCaseCommon_Vector128(ref source, ref destination, charCount); + } + } + + private unsafe void ChangeCaseCommon_Scalar(ref char source, ref char destination, int charCount) where TConversion : struct { Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion)); bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs index fdee767a6aecc2..b8dec4640195e7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs @@ -256,5 +256,45 @@ internal static bool Vector128OrdinalIgnoreCaseAscii(Vector128 vec1, Vec // Compare two lowercased vectors return (lcVec1 ^ lcVec2) == Vector128.Zero; } + + /// + /// Convert Vector128 that represent 8 ASCII UTF-16 characters to lowercase + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector128 Vector128AsciiToLowercase(Vector128 vec) + { + // ASSUMPTION: Caller has validated that input values are ASCII. + Debug.Assert(AllCharsInVector128AreAscii(vec)); + + // the 0x80 bit of each word of 'lowerIndicator' will be set iff the word has value >= 'A' + Vector128 lowIndicator1 = Vector128.Create((sbyte)(0x80 - 'A')) + vec.AsSByte(); + + // the 0x80 bit of each word of 'combinedIndicator' will be set iff the word has value >= 'A' and <= 'Z' + Vector128 combIndicator1 = Vector128.LessThan( + Vector128.Create(unchecked((sbyte)(('Z' - 'A') - 0x80))), lowIndicator1); + + // Add the lowercase indicator (0x20 bit) to all A-Z letters + return Vector128.AndNot(Vector128.Create((sbyte)0x20), combIndicator1).AsUInt16() + vec; + } + + /// + /// Convert Vector128 that represent 8 ASCII UTF-16 characters to uppercase + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector128 Vector128AsciiToUppercase(Vector128 vec) + { + // ASSUMPTION: Caller has validated that input values are ASCII. + Debug.Assert(AllCharsInVector128AreAscii(vec)); + + // the 0x80 bit of each word of 'lowerIndicator' will be set iff the word has value >= 'a' + Vector128 lowIndicator1 = Vector128.Create((sbyte)(0x80 - 'a')) + vec.AsSByte(); + + // the 0x80 bit of each word of 'combinedIndicator' will be set iff the word has value >= 'a' and <= 'z' + Vector128 combIndicator1 = Vector128.LessThan( + Vector128.Create(unchecked((sbyte)(('z' - 'a') - 0x80))), lowIndicator1); + + // Drop the lowercase indicator (0x20 bit) from all a-z letters + return vec - Vector128.AndNot(Vector128.Create((sbyte)0x20), combIndicator1).AsUInt16(); + } } }