From af2b95026a223aa05d77acdbf77adcd6f8b07da2 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Wed, 20 Jul 2022 16:22:56 -0700 Subject: [PATCH 01/46] Initial ASCII methods --- .../System.Private.CoreLib.Shared.projitems | 1 + .../src/System/Buffers/Text/Ascii.cs | 77 +++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 11ca7cec9d6988..634f7c2cdfa714 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -114,6 +114,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs new file mode 100644 index 00000000000000..67452eebaefd00 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs @@ -0,0 +1,77 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Text; + +namespace System.Buffers.Text +{ + public static partial class Ascii + { + /// + /// Returns the index of the first non-ASCII byte in a buffer. + /// + /// The buffer to scan. + /// The index in where the first non-ASCII + /// byte appears, or -1 if the buffer contains only ASCII bytes. + public static unsafe int GetIndexOfFirstNonAsciiByte(ReadOnlySpan buffer) + { + nuint bufferLength = (uint)buffer.Length; + fixed (byte* pBuffer = &MemoryMarshal.GetReference(buffer)) + { + nuint idxOfFirstNonAsciiElement = ASCIIUtility.GetIndexOfFirstNonAsciiByte(pBuffer, bufferLength); + Debug.Assert(idxOfFirstNonAsciiElement <= bufferLength); + return (idxOfFirstNonAsciiElement == bufferLength) ? -1 : (int)idxOfFirstNonAsciiElement; + } + } + + /// + /// Returns the index of the first non-ASCII char in a buffer. + /// + /// The buffer to scan. + /// The index in where the first non-ASCII + /// char appears, or -1 if the buffer contains only ASCII char. + public static unsafe int GetIndexOfFirstNonAsciiChar(ReadOnlySpan buffer) + { + nuint bufferLength = (uint)buffer.Length; + fixed (char* pBuffer = &MemoryMarshal.GetReference(buffer)) + { + nuint idxOfFirstNonAsciiElement = ASCIIUtility.GetIndexOfFirstNonAsciiChar(pBuffer, bufferLength); + Debug.Assert(idxOfFirstNonAsciiElement <= bufferLength); + return (idxOfFirstNonAsciiElement == bufferLength) ? -1 : (int)idxOfFirstNonAsciiElement; + } + } + + /// + /// Determines whether the provided value contains only ASCII bytes. + /// + /// The value to inspect. + /// True if contains only ASCII bytes or is + /// empty; False otherwise. + public static unsafe bool IsAscii(ReadOnlySpan value) + { + nuint valueLength = (uint)value.Length; + fixed (byte* pValue = &MemoryMarshal.GetReference(value)) + { + return ASCIIUtility.GetIndexOfFirstNonAsciiByte(pValue, valueLength) == valueLength; + } + } + + /// + /// Determines whether the provided value contains only ASCII chars. + /// + /// The value to inspect. + /// True if contains only ASCII chars or is + /// empty; False otherwise. + public static unsafe bool IsAscii(ReadOnlySpan value) + { + nuint valueLength = (uint)value.Length; + fixed (char* pValue = &MemoryMarshal.GetReference(value)) + { + return ASCIIUtility.GetIndexOfFirstNonAsciiChar(pValue, valueLength) == valueLength; + } + } + } +} From bd2b5f1cb6fd69559b5caab15d3368e84dcc9803 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Wed, 20 Jul 2022 16:33:33 -0700 Subject: [PATCH 02/46] Add transcoding APIs --- .../src/System/Buffers/Text/Ascii.cs | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs index 67452eebaefd00..9281eb01d56095 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs @@ -73,5 +73,79 @@ public static unsafe bool IsAscii(ReadOnlySpan value) return ASCIIUtility.GetIndexOfFirstNonAsciiChar(pValue, valueLength) == valueLength; } } + + /// + /// Copies text from a source buffer to a destination buffer, converting + /// from ASCII to UTF-16 during the copy. + /// + /// The source buffer from which ASCII text is read. + /// The destination buffer to which UTF-16 text is written. + /// The number of bytes actually read from . + /// The number of chars actually written to . + /// An describing the result of the operation. + public static unsafe OperationStatus ToUtf16(ReadOnlySpan source, Span destination, out int bytesConsumed, out int charsWritten) + { + nuint numElementsToConvert; + OperationStatus statusToReturnOnSuccess; + + if (source.Length <= destination.Length) + { + numElementsToConvert = (uint)source.Length; + statusToReturnOnSuccess = OperationStatus.Done; + } + else + { + numElementsToConvert = (uint)destination.Length; + statusToReturnOnSuccess = OperationStatus.DestinationTooSmall; + } + + fixed (byte* pSource = &MemoryMarshal.GetReference(source)) + fixed (char* pDestination = &MemoryMarshal.GetReference(destination)) + { + nuint numElementsActuallyConverted = ASCIIUtility.WidenAsciiToUtf16(pSource, pDestination, numElementsToConvert); + Debug.Assert(numElementsActuallyConverted <= numElementsToConvert); + + bytesConsumed = (int)numElementsActuallyConverted; + charsWritten = (int)numElementsActuallyConverted; + return (numElementsToConvert == numElementsActuallyConverted) ? statusToReturnOnSuccess : OperationStatus.InvalidData; + } + } + + /// + /// Copies text from a source buffer to a destination buffer, converting + /// from UTF-16 to ASCII during the copy. + /// + /// The source buffer from which UTF-16 text is read. + /// The destination buffer to which ASCII text is written. + /// The number of chars actually read from . + /// The number of bytes actually written to . + /// An describing the result of the operation. + public static unsafe OperationStatus FromUtf16(ReadOnlySpan source, Span destination, out int charsConsumed, out int bytesWritten) + { + nuint numElementsToConvert; + OperationStatus statusToReturnOnSuccess; + + if (source.Length <= destination.Length) + { + numElementsToConvert = (uint)source.Length; + statusToReturnOnSuccess = OperationStatus.Done; + } + else + { + numElementsToConvert = (uint)destination.Length; + statusToReturnOnSuccess = OperationStatus.DestinationTooSmall; + } + + fixed (char* pSource = &MemoryMarshal.GetReference(source)) + fixed (byte* pDestination = &MemoryMarshal.GetReference(destination)) + { + nuint numElementsActuallyConverted = ASCIIUtility.NarrowUtf16ToAscii(pSource, pDestination, numElementsToConvert); + Debug.Assert(numElementsActuallyConverted <= numElementsToConvert); + + charsConsumed = (int)numElementsActuallyConverted; + bytesWritten = (int)numElementsActuallyConverted; + return (numElementsToConvert == numElementsActuallyConverted) ? statusToReturnOnSuccess : OperationStatus.InvalidData; + } + } } } From db37d323df5477fb72f24304fbf4d6d8988125f7 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Wed, 20 Jul 2022 17:01:37 -0700 Subject: [PATCH 03/46] Implement Trim --- .../src/System/Buffers/Text/Ascii.cs | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs index 9281eb01d56095..3516007f90c330 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; +using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text; @@ -147,5 +148,52 @@ public static unsafe OperationStatus FromUtf16(ReadOnlySpan source, Span value) => TrimHelper(value, TrimType.Both); + public static Range Trim(ReadOnlySpan value) => TrimHelper(value, TrimType.Both); + public static Range TrimStart(ReadOnlySpan value) => TrimHelper(value, TrimType.Head); + public static Range TrimStart(ReadOnlySpan value) => TrimHelper(value, TrimType.Head); + public static Range TrimEnd(ReadOnlySpan value) => TrimHelper(value, TrimType.Tail); + public static Range TrimEnd(ReadOnlySpan value) => TrimHelper(value, TrimType.Tail); + + private static Range TrimHelper(ReadOnlySpan value, TrimType trimType) + where T : unmanaged, IBinaryInteger + { + const uint trimMask = + (1u << (0x09 - 1)) + | (1u << (0x0A - 1)) + | (1u << (0x0B - 1)) + | (1u << (0x0C - 1)) + | (1u << (0x0D - 1)) + | (1u << (0x20 - 1)); + + int start = 0; + if ((trimType & TrimType.Head) != 0) + { + for (; start < value.Length; start++) + { + uint elementValue = uint.CreateTruncating(value[start]); + if ((elementValue > 0x20) || ((trimMask & (1u << ((int)elementValue - 1))) == 0)) + { + break; + } + } + } + + int end = value.Length - 1; + if ((trimType & TrimType.Tail) != 0) + { + for (; start < end; end--) + { + uint elementValue = uint.CreateTruncating(value[end]); + if ((elementValue > 0x20) || ((trimMask & (1u << ((int)elementValue - 1))) == 0)) + { + break; + } + } + } + + return start..(end + 1); + } } } From 4a832ccd435e1c75e1e7c844524eaeedbb54cb66 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Thu, 21 Jul 2022 14:12:01 -0700 Subject: [PATCH 04/46] Split ASCII utilities into separate files --- .../System.Private.CoreLib.Shared.projitems | 3 + .../Buffers/Text/Ascii.CaseConversion.cs | 123 ++++++++++++++++++ .../System/Buffers/Text/Ascii.Transcoding.cs | 86 ++++++++++++ .../src/System/Buffers/Text/Ascii.Trimming.cs | 58 +++++++++ .../src/System/Buffers/Text/Ascii.cs | 123 ------------------ 5 files changed, 270 insertions(+), 123 deletions(-) create mode 100644 src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Transcoding.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Trimming.cs diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 634f7c2cdfa714..f84a4652ac5a79 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -115,6 +115,9 @@ + + + diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs new file mode 100644 index 00000000000000..61fd75e872c6d2 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs @@ -0,0 +1,123 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Text; + +namespace System.Buffers.Text +{ + public static partial class Ascii + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int bytesConsumed, out int bytesWritten) + => ChangeCase(source, destination, out bytesConsumed, out bytesWritten); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int charsConsumed, out int charsWritten) + => ChangeCase(source, destination, out charsConsumed, out charsWritten); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int bytesConsumed, out int charsWritten) + => ChangeCase(source, destination, out bytesConsumed, out charsWritten); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int charsConsumed, out int bytesWritten) + => ChangeCase(source, destination, out charsConsumed, out bytesWritten); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int bytesConsumed, out int bytesWritten) + => ChangeCase(source, destination, out bytesConsumed, out bytesWritten); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int charsConsumed, out int charsWritten) + => ChangeCase(source, destination, out charsConsumed, out charsWritten); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int bytesConsumed, out int charsWritten) + => ChangeCase(source, destination, out bytesConsumed, out charsWritten); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int charsConsumed, out int bytesWritten) + => ChangeCase(source, destination, out charsConsumed, out bytesWritten); + + private static unsafe OperationStatus ChangeCase(ReadOnlySpan source, Span destination, out int sourceElementsConsumed, out int destinationElementsWritten) + where TFrom : unmanaged, IBinaryInteger + where TTo : unmanaged, IBinaryInteger + where TCasing : struct + { + if (typeof(TFrom) == typeof(TTo) && source.Overlaps(MemoryMarshal.Cast(destination))) + { + throw new InvalidOperationException(SR.InvalidOperation_SpanOverlappedOperation); + } + + nuint numElementsToConvert; + OperationStatus statusToReturnOnSuccess; + + if (source.Length <= destination.Length) + { + numElementsToConvert = (uint)source.Length; + statusToReturnOnSuccess = OperationStatus.Done; + } + else + { + numElementsToConvert = (uint)destination.Length; + statusToReturnOnSuccess = OperationStatus.DestinationTooSmall; + } + + fixed (TFrom* pSource = &MemoryMarshal.GetReference(source)) + fixed (TTo* pDestination = &MemoryMarshal.GetReference(destination)) + { + nuint numElementsActuallyConverted = ChangeCase(pSource, pDestination, numElementsToConvert); + Debug.Assert(numElementsActuallyConverted <= numElementsToConvert); + + sourceElementsConsumed = (int)numElementsActuallyConverted; + destinationElementsWritten = (int)numElementsActuallyConverted; + return (numElementsToConvert == numElementsActuallyConverted) ? statusToReturnOnSuccess : OperationStatus.InvalidData; + } + } + + private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pDest, nuint elementCount) + where TFrom : unmanaged, IBinaryInteger + where TTo : unmanaged, IBinaryInteger + where TCasing : struct + { + Debug.Assert(typeof(TFrom) == typeof(byte) || typeof(TFrom) == typeof(char)); + Debug.Assert(typeof(TTo) == typeof(byte) || typeof(TTo) == typeof(char)); + Debug.Assert(typeof(TCasing) == typeof(ToUpperConversion) || typeof(TCasing) == typeof(ToLowerConversion)); + + bool SourceIsAscii = (typeof(TFrom) == typeof(byte)); // JIT turns this into a const + bool DestIsAscii = (typeof(TTo) == typeof(byte)); // JIT turns this into a const + bool ConversionIsToUpper = (typeof(TCasing) == typeof(ToUpperConversion)); // JIT turns this into a const + + nuint i = 0; + for (; i < elementCount; i++) + { + uint element = uint.CreateTruncating(pSrc[i]); + if (!UnicodeUtility.IsAsciiCodePoint(element)) { break; } + if (ConversionIsToUpper) + { + if (UnicodeUtility.IsInRangeInclusive(element, 'a', 'z')) + { + element -= 0x20u; // lowercase to uppercase + } + } + else + { + if (UnicodeUtility.IsInRangeInclusive(element, 'A', 'Z')) + { + element += 0x20u; // uppercase to lowercase + } + } + pDest[i] = TTo.CreateTruncating(element); + } + + return i; + } + + private struct ToUpperConversion { } + private struct ToLowerConversion { } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Transcoding.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Transcoding.cs new file mode 100644 index 00000000000000..b356b4903b2d9a --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Transcoding.cs @@ -0,0 +1,86 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Runtime.InteropServices; +using System.Text; + +namespace System.Buffers.Text +{ + public static partial class Ascii + { + /// + /// Copies text from a source buffer to a destination buffer, converting + /// from ASCII to UTF-16 during the copy. + /// + /// The source buffer from which ASCII text is read. + /// The destination buffer to which UTF-16 text is written. + /// The number of bytes actually read from . + /// The number of chars actually written to . + /// An describing the result of the operation. + public static unsafe OperationStatus ToUtf16(ReadOnlySpan source, Span destination, out int bytesConsumed, out int charsWritten) + { + nuint numElementsToConvert; + OperationStatus statusToReturnOnSuccess; + + if (source.Length <= destination.Length) + { + numElementsToConvert = (uint)source.Length; + statusToReturnOnSuccess = OperationStatus.Done; + } + else + { + numElementsToConvert = (uint)destination.Length; + statusToReturnOnSuccess = OperationStatus.DestinationTooSmall; + } + + fixed (byte* pSource = &MemoryMarshal.GetReference(source)) + fixed (char* pDestination = &MemoryMarshal.GetReference(destination)) + { + nuint numElementsActuallyConverted = ASCIIUtility.WidenAsciiToUtf16(pSource, pDestination, numElementsToConvert); + Debug.Assert(numElementsActuallyConverted <= numElementsToConvert); + + bytesConsumed = (int)numElementsActuallyConverted; + charsWritten = (int)numElementsActuallyConverted; + return (numElementsToConvert == numElementsActuallyConverted) ? statusToReturnOnSuccess : OperationStatus.InvalidData; + } + } + + /// + /// Copies text from a source buffer to a destination buffer, converting + /// from UTF-16 to ASCII during the copy. + /// + /// The source buffer from which UTF-16 text is read. + /// The destination buffer to which ASCII text is written. + /// The number of chars actually read from . + /// The number of bytes actually written to . + /// An describing the result of the operation. + public static unsafe OperationStatus FromUtf16(ReadOnlySpan source, Span destination, out int charsConsumed, out int bytesWritten) + { + nuint numElementsToConvert; + OperationStatus statusToReturnOnSuccess; + + if (source.Length <= destination.Length) + { + numElementsToConvert = (uint)source.Length; + statusToReturnOnSuccess = OperationStatus.Done; + } + else + { + numElementsToConvert = (uint)destination.Length; + statusToReturnOnSuccess = OperationStatus.DestinationTooSmall; + } + + fixed (char* pSource = &MemoryMarshal.GetReference(source)) + fixed (byte* pDestination = &MemoryMarshal.GetReference(destination)) + { + nuint numElementsActuallyConverted = ASCIIUtility.NarrowUtf16ToAscii(pSource, pDestination, numElementsToConvert); + Debug.Assert(numElementsActuallyConverted <= numElementsToConvert); + + charsConsumed = (int)numElementsActuallyConverted; + bytesWritten = (int)numElementsActuallyConverted; + return (numElementsToConvert == numElementsActuallyConverted) ? statusToReturnOnSuccess : OperationStatus.InvalidData; + } + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Trimming.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Trimming.cs new file mode 100644 index 00000000000000..4d59841cb46bd7 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Trimming.cs @@ -0,0 +1,58 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Numerics; +using System.Text; + +namespace System.Buffers.Text +{ + public static partial class Ascii + { + public static Range Trim(ReadOnlySpan value) => TrimHelper(value, TrimType.Both); + public static Range Trim(ReadOnlySpan value) => TrimHelper(value, TrimType.Both); + public static Range TrimStart(ReadOnlySpan value) => TrimHelper(value, TrimType.Head); + public static Range TrimStart(ReadOnlySpan value) => TrimHelper(value, TrimType.Head); + public static Range TrimEnd(ReadOnlySpan value) => TrimHelper(value, TrimType.Tail); + public static Range TrimEnd(ReadOnlySpan value) => TrimHelper(value, TrimType.Tail); + + private static Range TrimHelper(ReadOnlySpan value, TrimType trimType) + where T : unmanaged, IBinaryInteger + { + const uint trimMask = + (1u << (0x09 - 1)) + | (1u << (0x0A - 1)) + | (1u << (0x0B - 1)) + | (1u << (0x0C - 1)) + | (1u << (0x0D - 1)) + | (1u << (0x20 - 1)); + + int start = 0; + if ((trimType & TrimType.Head) != 0) + { + for (; start < value.Length; start++) + { + uint elementValue = uint.CreateTruncating(value[start]); + if ((elementValue > 0x20) || ((trimMask & (1u << ((int)elementValue - 1))) == 0)) + { + break; + } + } + } + + int end = value.Length - 1; + if ((trimType & TrimType.Tail) != 0) + { + for (; start < end; end--) + { + uint elementValue = uint.CreateTruncating(value[end]); + if ((elementValue > 0x20) || ((trimMask & (1u << ((int)elementValue - 1))) == 0)) + { + break; + } + } + } + + return start..(end + 1); + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs index 3516007f90c330..08f2a1309234ba 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs @@ -2,8 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; -using System.Numerics; -using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text; @@ -74,126 +72,5 @@ public static unsafe bool IsAscii(ReadOnlySpan value) return ASCIIUtility.GetIndexOfFirstNonAsciiChar(pValue, valueLength) == valueLength; } } - - /// - /// Copies text from a source buffer to a destination buffer, converting - /// from ASCII to UTF-16 during the copy. - /// - /// The source buffer from which ASCII text is read. - /// The destination buffer to which UTF-16 text is written. - /// The number of bytes actually read from . - /// The number of chars actually written to . - /// An describing the result of the operation. - public static unsafe OperationStatus ToUtf16(ReadOnlySpan source, Span destination, out int bytesConsumed, out int charsWritten) - { - nuint numElementsToConvert; - OperationStatus statusToReturnOnSuccess; - - if (source.Length <= destination.Length) - { - numElementsToConvert = (uint)source.Length; - statusToReturnOnSuccess = OperationStatus.Done; - } - else - { - numElementsToConvert = (uint)destination.Length; - statusToReturnOnSuccess = OperationStatus.DestinationTooSmall; - } - - fixed (byte* pSource = &MemoryMarshal.GetReference(source)) - fixed (char* pDestination = &MemoryMarshal.GetReference(destination)) - { - nuint numElementsActuallyConverted = ASCIIUtility.WidenAsciiToUtf16(pSource, pDestination, numElementsToConvert); - Debug.Assert(numElementsActuallyConverted <= numElementsToConvert); - - bytesConsumed = (int)numElementsActuallyConverted; - charsWritten = (int)numElementsActuallyConverted; - return (numElementsToConvert == numElementsActuallyConverted) ? statusToReturnOnSuccess : OperationStatus.InvalidData; - } - } - - /// - /// Copies text from a source buffer to a destination buffer, converting - /// from UTF-16 to ASCII during the copy. - /// - /// The source buffer from which UTF-16 text is read. - /// The destination buffer to which ASCII text is written. - /// The number of chars actually read from . - /// The number of bytes actually written to . - /// An describing the result of the operation. - public static unsafe OperationStatus FromUtf16(ReadOnlySpan source, Span destination, out int charsConsumed, out int bytesWritten) - { - nuint numElementsToConvert; - OperationStatus statusToReturnOnSuccess; - - if (source.Length <= destination.Length) - { - numElementsToConvert = (uint)source.Length; - statusToReturnOnSuccess = OperationStatus.Done; - } - else - { - numElementsToConvert = (uint)destination.Length; - statusToReturnOnSuccess = OperationStatus.DestinationTooSmall; - } - - fixed (char* pSource = &MemoryMarshal.GetReference(source)) - fixed (byte* pDestination = &MemoryMarshal.GetReference(destination)) - { - nuint numElementsActuallyConverted = ASCIIUtility.NarrowUtf16ToAscii(pSource, pDestination, numElementsToConvert); - Debug.Assert(numElementsActuallyConverted <= numElementsToConvert); - - charsConsumed = (int)numElementsActuallyConverted; - bytesWritten = (int)numElementsActuallyConverted; - return (numElementsToConvert == numElementsActuallyConverted) ? statusToReturnOnSuccess : OperationStatus.InvalidData; - } - } - - public static Range Trim(ReadOnlySpan value) => TrimHelper(value, TrimType.Both); - public static Range Trim(ReadOnlySpan value) => TrimHelper(value, TrimType.Both); - public static Range TrimStart(ReadOnlySpan value) => TrimHelper(value, TrimType.Head); - public static Range TrimStart(ReadOnlySpan value) => TrimHelper(value, TrimType.Head); - public static Range TrimEnd(ReadOnlySpan value) => TrimHelper(value, TrimType.Tail); - public static Range TrimEnd(ReadOnlySpan value) => TrimHelper(value, TrimType.Tail); - - private static Range TrimHelper(ReadOnlySpan value, TrimType trimType) - where T : unmanaged, IBinaryInteger - { - const uint trimMask = - (1u << (0x09 - 1)) - | (1u << (0x0A - 1)) - | (1u << (0x0B - 1)) - | (1u << (0x0C - 1)) - | (1u << (0x0D - 1)) - | (1u << (0x20 - 1)); - - int start = 0; - if ((trimType & TrimType.Head) != 0) - { - for (; start < value.Length; start++) - { - uint elementValue = uint.CreateTruncating(value[start]); - if ((elementValue > 0x20) || ((trimMask & (1u << ((int)elementValue - 1))) == 0)) - { - break; - } - } - } - - int end = value.Length - 1; - if ((trimType & TrimType.Tail) != 0) - { - for (; start < end; end--) - { - uint elementValue = uint.CreateTruncating(value[end]); - if ((elementValue > 0x20) || ((trimMask & (1u << ((int)elementValue - 1))) == 0)) - { - break; - } - } - } - - return start..(end + 1); - } } } From e054a01445d3c05fb2f86c9a0ac1ecbf34276bf1 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Thu, 21 Jul 2022 14:21:59 -0700 Subject: [PATCH 05/46] Add ref asm --- .../System.Runtime/ref/System.Runtime.cs | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index f0425a2bf00c2e..79c391351841eb 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -7325,6 +7325,29 @@ public enum OperationStatus } namespace System.Buffers.Text { + public static class Ascii + { + public static System.Buffers.OperationStatus FromUtf16(System.ReadOnlySpan source, System.Span destination, out int charsConsumed, out int bytesWritten) { throw null; } + public static int GetIndexOfFirstNonAsciiByte(System.ReadOnlySpan buffer) { throw null; } + public static int GetIndexOfFirstNonAsciiChar(System.ReadOnlySpan buffer) { throw null; } + public static bool IsAscii(System.ReadOnlySpan value) { throw null; } + public static bool IsAscii(System.ReadOnlySpan value) { throw null; } + public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int bytesWritten) { throw null; } + public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int charsConsumed, out int charsWritten) { throw null; } + public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int charsWritten) { throw null; } + public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int charsConsumed, out int bytesWritten) { throw null; } + public static System.Buffers.OperationStatus ToUpper(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int bytesWritten) { throw null; } + public static System.Buffers.OperationStatus ToUpper(System.ReadOnlySpan source, System.Span destination, out int charsConsumed, out int charsWritten) { throw null; } + public static System.Buffers.OperationStatus ToUpper(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int charsWritten) { throw null; } + public static System.Buffers.OperationStatus ToUpper(System.ReadOnlySpan source, System.Span destination, out int charsConsumed, out int bytesWritten) { throw null; } + public static System.Buffers.OperationStatus ToUtf16(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int charsWritten) { throw null; } + public static System.Range Trim(System.ReadOnlySpan value) { throw null; } + public static System.Range Trim(System.ReadOnlySpan value) { throw null; } + public static System.Range TrimEnd(System.ReadOnlySpan value) { throw null; } + public static System.Range TrimEnd(System.ReadOnlySpan value) { throw null; } + public static System.Range TrimStart(System.ReadOnlySpan value) { throw null; } + public static System.Range TrimStart(System.ReadOnlySpan value) { throw null; } + } public static partial class Base64 { public static System.Buffers.OperationStatus DecodeFromUtf8(System.ReadOnlySpan utf8, System.Span bytes, out int bytesConsumed, out int bytesWritten, bool isFinalBlock = true) { throw null; } From 36cbfa6b5343531ec8f4c9824be6576bcc275ca1 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Thu, 21 Jul 2022 19:20:56 -0700 Subject: [PATCH 06/46] Fun with case conversion! --- .../Buffers/Text/Ascii.CaseConversion.cs | 218 +++++++++++++++++- .../src/System/Text/Unicode/Utf16Utility.cs | 29 +++ 2 files changed, 245 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs index 61fd75e872c6d2..df76309cebc84f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs @@ -5,7 +5,11 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; using System.Text; +using System.Text.Unicode; namespace System.Buffers.Text { @@ -84,15 +88,185 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD where TTo : unmanaged, IBinaryInteger where TCasing : struct { - Debug.Assert(typeof(TFrom) == typeof(byte) || typeof(TFrom) == typeof(char)); - Debug.Assert(typeof(TTo) == typeof(byte) || typeof(TTo) == typeof(char)); + Debug.Assert(typeof(TFrom) == typeof(byte) || typeof(TFrom) == typeof(ushort)); + Debug.Assert(typeof(TTo) == typeof(byte) || typeof(TTo) == typeof(ushort)); Debug.Assert(typeof(TCasing) == typeof(ToUpperConversion) || typeof(TCasing) == typeof(ToLowerConversion)); bool SourceIsAscii = (typeof(TFrom) == typeof(byte)); // JIT turns this into a const bool DestIsAscii = (typeof(TTo) == typeof(byte)); // JIT turns this into a const + bool ConversionIsWidening = SourceIsAscii && !DestIsAscii; // JIT turns this into a const + bool ConversionIsNarrowing = !SourceIsAscii && DestIsAscii; // JIT turns this into a const + bool ConversionIsWidthPreserving = typeof(TFrom) == typeof(TTo); // JIT turns this into a const bool ConversionIsToUpper = (typeof(TCasing) == typeof(ToUpperConversion)); // JIT turns this into a const + // Is there enough data to perform vectorized operations? + nuint i = 0; + + // The only situation we can't easily optimize is non-hardware-accelerated + // widening or narrowing. In this case, fall back to a naive element-by-element + // loop. + + if (!ConversionIsWidthPreserving && Vector128.IsHardwareAccelerated) + { + goto DrainRemaining; + } + + // Attempt to process 128 input bits. + + if (Vector128.IsHardwareAccelerated && elementCount >= (nuint)(16 / sizeof(TFrom))) + { + Vector128 srcVector = Vector128.LoadUnsafe(ref *pSrc); + + // First, check for non-ASCII data. If we see any, immediately + // exit the vectorized logic and fall back to the slower drain paths. + + if (VectorContainsAnyNonAsciiData(srcVector)) + { + goto Drain64; + } + + // Now find matching characters and perform case conversion. + + Vector128 searchValuesLowerExclusive = Vector128.Create(TFrom.CreateTruncating(ConversionIsToUpper ? '`' : '@')); // just before 'a' and 'A' + Vector128 searchValuesUpperExclusive = Vector128.Create(TFrom.CreateTruncating(ConversionIsToUpper ? '{' : '[')); // just after 'z' and 'Z' + Vector128 caseConversionVector = Vector128.Create(TFrom.CreateTruncating(0x20)); // works both directions + + Vector128 matches = Vector128.LessThan(srcVector, searchValuesUpperExclusive) + & Vector128.LessThan(searchValuesLowerExclusive, srcVector); + srcVector ^= (matches & caseConversionVector); + + // Now narrow or widen the vector as needed and write to the destination. + + if (ConversionIsNarrowing) + { + Vector128 wide = srcVector.AsUInt16(); + Vector128 narrow = Vector128.Narrow(wide, wide); + Unsafe.WriteUnaligned(pDest, narrow.AsUInt64().ToScalar()); + } + else if (ConversionIsWidening) + { + Vector128 narrow = srcVector.AsByte(); + Vector128.WidenLower(narrow).StoreUnsafe(ref *(ushort*)pDest); + Vector128.WidenUpper(narrow).StoreUnsafe(ref *(ushort*)pDest, 8); + } + else + { + srcVector.As().StoreUnsafe(ref *pDest); + } + } + + + Drain64: + + // Attempt to process 64 input bits. + + if (IntPtr.Size >= 8 && (elementCount - i) >= (nuint)(8 / sizeof(TFrom))) + { + ulong nextBlockAsUInt64 = Unsafe.ReadUnaligned(&pSrc[i]); + if (SourceIsAscii) + { + throw new NotImplementedException(); + } + else + { + if (!Utf16Utility.AllCharsInUInt64AreAscii(nextBlockAsUInt64)) + { + goto Drain32; + } + nextBlockAsUInt64 = (ConversionIsToUpper) + ? Utf16Utility.ConvertAllAsciiCharsInUInt64ToUppercase(nextBlockAsUInt64) + : throw new NotImplementedException(); + } + + if (ConversionIsWidthPreserving) + { + Unsafe.WriteUnaligned(&pDest[i], nextBlockAsUInt64); + } + else + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 blockAsVectorOfUInt64 = Vector128.CreateScalarUnsafe(nextBlockAsUInt64); + if (ConversionIsWidening) + { + Vector128.StoreUnsafe(Vector128.WidenLower(blockAsVectorOfUInt64.AsByte()), ref *(ushort*)pDest, i); + } + else + { + Vector128 blockAsVectorOfUInt16 = blockAsVectorOfUInt64.AsUInt16(); + Vector128 narrowedBlock = Vector128.Narrow(blockAsVectorOfUInt16, blockAsVectorOfUInt16).AsUInt32(); + Unsafe.WriteUnaligned(&pDest[i], narrowedBlock.ToScalar()); + } + } + + i += (nuint)(8 / sizeof(TFrom)); + + // If vectorization is not accelerated, turn this into a while loop. + + if (!Vector128.IsHardwareAccelerated) + { + goto Drain64; + } + } + + Drain32: + + // Attempt to process 32 input bits. + + if ((elementCount - i) >= (nuint)(4 / sizeof(TFrom))) + { + uint nextBlockAsUInt32 = Unsafe.ReadUnaligned(&pSrc[i]); + if (SourceIsAscii) + { + throw new NotImplementedException(); + } + else + { + if (!Utf16Utility.AllCharsInUInt32AreAscii(nextBlockAsUInt32)) + { + goto DrainRemaining; + } + nextBlockAsUInt32 = (ConversionIsToUpper) + ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(nextBlockAsUInt32) + : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(nextBlockAsUInt32); + } + + if (ConversionIsWidthPreserving) + { + Unsafe.WriteUnaligned(&pDest[i], nextBlockAsUInt32); + } + else + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 blockAsVectorOfUInt32 = Vector128.CreateScalarUnsafe(nextBlockAsUInt32); + if (ConversionIsWidening) + { + Vector128 widenedBlock = Vector128.WidenLower(blockAsVectorOfUInt32.AsByte()).AsUInt64(); + Unsafe.WriteUnaligned(&pDest[i], widenedBlock.ToScalar()); + } + else + { + Vector128 blockAsVectorOfUInt16 = blockAsVectorOfUInt32.AsUInt16(); + Vector128 narrowedBlock = Vector128.Narrow(blockAsVectorOfUInt16, blockAsVectorOfUInt16).AsUInt16(); + Unsafe.WriteUnaligned(&pDest[i], narrowedBlock.ToScalar()); + } + } + + i += (nuint)(4 / sizeof(TFrom)); + + // If vectorization is not accelerated or we're on 32-bit, + // turn this into a while loop. + + if (IntPtr.Size < 8 || !Vector128.IsHardwareAccelerated) + { + goto Drain32; + } + } + + DrainRemaining: + for (; i < elementCount; i++) { uint element = uint.CreateTruncating(pSrc[i]); @@ -117,6 +291,46 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD return i; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool VectorContainsAnyNonAsciiData(Vector128 vector) + where T : unmanaged + { + if (typeof(T) == typeof(byte) || typeof(T) == typeof(sbyte)) + { + if (vector.ExtractMostSignificantBits() != 0) { return true; } + } + else if (typeof(T) == typeof(short) || typeof(T) == typeof(ushort)) + { + if (ASCIIUtility.VectorContainsNonAsciiChar(vector.AsUInt16())) { return true; } + } + else + { + Debug.Fail("Unknown types provided."); + throw new NotSupportedException(); + } + + return false; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128 NarrowOrWidenLowerVector(Vector128 vector) + where TFrom : unmanaged + where TTo : unmanaged + { + if (typeof(TFrom) == typeof(byte) && typeof(TTo) == typeof(ushort)) + { + return Vector128.WidenLower(vector.AsByte()).As(); + } + else if (typeof(TFrom) == typeof(ushort) && typeof(TTo) == typeof(byte)) + { + return Vector128.Narrow(vector.AsUInt16(), vector.AsUInt16()).As(); + } + else + { + throw new NotSupportedException(); + } + } + private struct ToUpperConversion { } private struct ToLowerConversion { } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs index ab75f3e6789d38..11fea69cd63f3e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs @@ -85,6 +85,35 @@ internal static uint ConvertAllAsciiCharsInUInt32ToUppercase(uint value) return value ^ mask; // bit flip lowercase letters [a-z] => [A-Z] } + /// + /// Given a UInt64 that represents four ASCII UTF-16 characters, returns the invariant + /// uppercase representation of those characters. Requires the input value to contain + /// four ASCII UTF-16 characters in machine endianness. + /// + /// + /// This is a branchless implementation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static ulong ConvertAllAsciiCharsInUInt64ToUppercase(ulong value) + { + // ASSUMPTION: Caller has validated that input value is ASCII. + Debug.Assert(AllCharsInUInt64AreAscii(value)); + + // the 0x80 bit of each word of 'lowerIndicator' will be set iff the word has value >= 'a' + ulong lowerIndicator = value + 0x0080_0080_0080_0080ul - 0x0061_0061_0061_0061ul; + + // the 0x80 bit of each word of 'upperIndicator' will be set iff the word has value > 'z' + ulong upperIndicator = value + 0x0080_0080_0080_0080ul - 0x007B_007B_007B_007Bul; + + // the 0x80 bit of each word of 'combinedIndicator' will be set iff the word has value >= 'a' and <= 'z' + ulong combinedIndicator = (lowerIndicator ^ upperIndicator); + + // the 0x20 bit of each word of 'mask' will be set iff the word has value >= 'a' and <= 'z' + ulong mask = (combinedIndicator & 0x0080_0080_0080_0080ul) >> 2; + + return value ^ mask; // bit flip lowercase letters [a-z] => [A-Z] + } + /// /// Given a UInt32 that represents two ASCII UTF-16 characters, returns true iff /// the input contains one or more lowercase ASCII characters. From bba61f5aad2d4245ba10cb08f9f37672122626d3 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Thu, 21 Jul 2022 19:21:13 -0700 Subject: [PATCH 07/46] Fun with case conversion! --- .../src/System/Buffers/Text/Ascii.CaseConversion.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs index df76309cebc84f..c22c389f077e77 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs @@ -152,7 +152,7 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD } else { - srcVector.As().StoreUnsafe(ref *pDest); + srcVector.As().StoreUnsafe(ref *pDest); } } From 89331d6ab48bd0623c418abab8d74bfda2bfc58c Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Fri, 22 Jul 2022 12:13:19 -0700 Subject: [PATCH 08/46] Updates! --- .../Buffers/Text/Ascii.CaseConversion.cs | 138 +++++++++++++++--- .../src/System/Text/ASCIIUtility.cs | 14 +- 2 files changed, 127 insertions(+), 25 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs index c22c389f077e77..6807ae69be19f2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs @@ -6,7 +6,6 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; using System.Text; using System.Text.Unicode; @@ -21,15 +20,15 @@ public static OperationStatus ToUpper(ReadOnlySpan source, Span dest [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int charsConsumed, out int charsWritten) - => ChangeCase(source, destination, out charsConsumed, out charsWritten); + => ChangeCase(MemoryMarshal.Cast(source), MemoryMarshal.Cast(destination), out charsConsumed, out charsWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int bytesConsumed, out int charsWritten) - => ChangeCase(source, destination, out bytesConsumed, out charsWritten); + => ChangeCase(source, MemoryMarshal.Cast(destination), out bytesConsumed, out charsWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int charsConsumed, out int bytesWritten) - => ChangeCase(source, destination, out charsConsumed, out bytesWritten); + => ChangeCase(MemoryMarshal.Cast(source), destination, out charsConsumed, out bytesWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int bytesConsumed, out int bytesWritten) @@ -37,15 +36,15 @@ public static OperationStatus ToLower(ReadOnlySpan source, Span dest [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int charsConsumed, out int charsWritten) - => ChangeCase(source, destination, out charsConsumed, out charsWritten); + => ChangeCase(MemoryMarshal.Cast(source), MemoryMarshal.Cast(destination), out charsConsumed, out charsWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int bytesConsumed, out int charsWritten) - => ChangeCase(source, destination, out bytesConsumed, out charsWritten); + => ChangeCase(source, MemoryMarshal.Cast(destination), out bytesConsumed, out charsWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int charsConsumed, out int bytesWritten) - => ChangeCase(source, destination, out charsConsumed, out bytesWritten); + => ChangeCase(MemoryMarshal.Cast(source), destination, out charsConsumed, out bytesWritten); private static unsafe OperationStatus ChangeCase(ReadOnlySpan source, Span destination, out int sourceElementsConsumed, out int destinationElementsWritten) where TFrom : unmanaged, IBinaryInteger @@ -112,11 +111,13 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD goto DrainRemaining; } - // Attempt to process 128 input bits. + // Attempt to process blocks of 128 input bits. if (Vector128.IsHardwareAccelerated && elementCount >= (nuint)(16 / sizeof(TFrom))) { - Vector128 srcVector = Vector128.LoadUnsafe(ref *pSrc); + // The first iteration of this loop will be unaligned. + + Vector128 srcVector = Vector128.LoadUnsafe(ref *pSrc, i); // First, check for non-ASCII data. If we see any, immediately // exit the vectorized logic and fall back to the slower drain paths. @@ -127,39 +128,77 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD } // Now find matching characters and perform case conversion. + // Basically, the (A <= value && value <= Z) check is converted to: + // (value - CONST) < (Z - A), but using signed instead of unsigned arithmetic. - Vector128 searchValuesLowerExclusive = Vector128.Create(TFrom.CreateTruncating(ConversionIsToUpper ? '`' : '@')); // just before 'a' and 'A' - Vector128 searchValuesUpperExclusive = Vector128.Create(TFrom.CreateTruncating(ConversionIsToUpper ? '{' : '[')); // just after 'z' and 'Z' + Vector128 subtractionVector = Vector128.Create(TFrom.CreateTruncating((ConversionIsToUpper ? 'a' : 'A') + 0x80)); + Vector128 comparisionVector = Vector128.Create(TFrom.CreateTruncating(26 /* a..z or A..Z */)); Vector128 caseConversionVector = Vector128.Create(TFrom.CreateTruncating(0x20)); // works both directions - Vector128 matches = Vector128.LessThan(srcVector, searchValuesUpperExclusive) - & Vector128.LessThan(searchValuesLowerExclusive, srcVector); + Vector128 matches = SignedLessThan((srcVector - subtractionVector), comparisionVector); srcVector ^= (matches & caseConversionVector); // Now narrow or widen the vector as needed and write to the destination. if (ConversionIsNarrowing) { - Vector128 wide = srcVector.AsUInt16(); - Vector128 narrow = Vector128.Narrow(wide, wide); - Unsafe.WriteUnaligned(pDest, narrow.AsUInt64().ToScalar()); + Narrow16To8AndAndWriteTo(srcVector.AsUInt16(), (byte*)pDest, 0); } else if (ConversionIsWidening) { - Vector128 narrow = srcVector.AsByte(); - Vector128.WidenLower(narrow).StoreUnsafe(ref *(ushort*)pDest); - Vector128.WidenUpper(narrow).StoreUnsafe(ref *(ushort*)pDest, 8); + Widen8To16AndAndWriteTo(srcVector.AsByte(), (char*)pDest, 0); } else { srcVector.As().StoreUnsafe(ref *pDest); } - } + // Now that the first conversion is out of the way, calculate how + // many elements we should skip in order to have future writes be + // aligned. + + uint expectedWriteAlignment = ConversionIsNarrowing ? 8u : 16u; // JIT turns this into a const + i = expectedWriteAlignment - ((uint)pDest & (expectedWriteAlignment - 1)) / (uint)sizeof(TTo); + Debug.Assert((nuint)(&pDest[i]) % expectedWriteAlignment == 0, "Destination buffer wasn't properly aligned!"); + + // Future iterations of this loop will be aligned. + + for (; (elementCount - i) >= (nuint)(16 / sizeof(TFrom)); i += (nuint)(16 / sizeof(TFrom))) + { + // Unaligned read & check for non-ASCII data. + + srcVector = Vector128.LoadUnsafe(ref *pSrc, i); + if (VectorContainsAnyNonAsciiData(srcVector)) + { + goto Drain64; + } + + // Now find matching characters and perform case conversion. + + matches = SignedLessThan((srcVector - subtractionVector), comparisionVector); + srcVector ^= (matches & caseConversionVector); + + // Now narrow or widen the vector as needed and write to the destination. + // We expect this write to be aligned. + + if (ConversionIsNarrowing) + { + Narrow16To8AndAndWriteTo(srcVector.AsUInt16(), (byte*)pDest, i); + } + else if (ConversionIsWidening) + { + Widen8To16AndAndWriteTo(srcVector.AsByte(), (char*)pDest, i); + } + else + { + srcVector.As().StoreUnsafe(ref *pDest, i); + } + } + } Drain64: - // Attempt to process 64 input bits. + // Attempt to process blocks of 64 input bits. if (IntPtr.Size >= 8 && (elementCount - i) >= (nuint)(8 / sizeof(TFrom))) { @@ -212,7 +251,7 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD Drain32: - // Attempt to process 32 input bits. + // Attempt to process blocks of 32 input bits. if ((elementCount - i) >= (nuint)(4 / sizeof(TFrom))) { @@ -267,6 +306,8 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD DrainRemaining: + // Process single elements at a time. + for (; i < elementCount; i++) { uint element = uint.CreateTruncating(pSrc[i]); @@ -312,6 +353,59 @@ private static bool VectorContainsAnyNonAsciiData(Vector128 vector) return false; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe void Widen8To16AndAndWriteTo(Vector128 narrowVector, char* pDest, nuint destOffset) + { + if (Vector256.IsHardwareAccelerated) + { + Vector256 wide = Vector256.WidenLower(narrowVector.ToVector256Unsafe()); + wide.StoreUnsafe(ref *(ushort*)pDest, destOffset); + } + else + { + Vector128.WidenLower(narrowVector).StoreUnsafe(ref *(ushort*)pDest, destOffset); + Vector128.WidenUpper(narrowVector).StoreUnsafe(ref *(ushort*)pDest, destOffset + 8); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe void Narrow16To8AndAndWriteTo(Vector128 wideVector, byte* pDest, nuint destOffset) + { + Vector128 narrow = Vector128.Narrow(wideVector, wideVector); + + if (Sse2.IsSupported) + { + // MOVQ is supported even on x86, unaligned accesses allowed + Sse2.StoreScalar((ulong*)(pDest + destOffset), narrow.AsUInt64()); + } + else if (Vector64.IsHardwareAccelerated) + { + narrow.GetLower().StoreUnsafe(ref *pDest, destOffset); + } + else + { + Unsafe.WriteUnaligned(pDest + destOffset, narrow.AsUInt64().ToScalar()); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe Vector128 SignedLessThan(Vector128 left, Vector128 right) + where T : unmanaged + { + if (typeof(T) == typeof(byte) || typeof(T) == typeof(sbyte)) + { + return Vector128.LessThan(left.AsSByte(), right.AsSByte()).As(); + } + else if (typeof(T) == typeof(ushort) || typeof(T) == typeof(short)) + { + return Vector128.LessThan(left.AsInt16(), right.AsInt16()).As(); + } + else + { + throw new NotSupportedException(); + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128 NarrowOrWidenLowerVector(Vector128 vector) where TFrom : unmanaged diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs index 5cba2be3143e0e..ac112b8fc3bb6b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs @@ -12,8 +12,11 @@ namespace System.Text { internal static partial class ASCIIUtility { + /// + /// Returns iff all bytes in are ASCII. + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool AllBytesInUInt64AreAscii(ulong value) + internal static bool AllBytesInUInt64AreAscii(ulong value) { // If the high bit of any byte is set, that byte is non-ASCII. @@ -1378,7 +1381,7 @@ public static unsafe nuint NarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAsciiBu } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool VectorContainsNonAsciiChar(Vector128 utf16Vector) + internal static bool VectorContainsNonAsciiChar(Vector128 utf16Vector) { if (Sse2.IsSupported) { @@ -1415,8 +1418,13 @@ private static bool VectorContainsNonAsciiChar(Vector128 utf16Vector) } else { - throw new PlatformNotSupportedException(); + // Fallback: use Vector's default implementation. + if (Vector128.GreaterThanOrEqualAny(utf16Vector, Vector128.Create((ushort)0x0080))) + { + return true; + } } + return false; } From 8333ec80b5faceea32bef9e9ff8878ddd98aabec Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Fri, 22 Jul 2022 12:19:02 -0700 Subject: [PATCH 09/46] Fix incorrect comparison --- .../src/System/Buffers/Text/Ascii.CaseConversion.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs index 6807ae69be19f2..39de8040d86dea 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs @@ -129,10 +129,10 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD // Now find matching characters and perform case conversion. // Basically, the (A <= value && value <= Z) check is converted to: - // (value - CONST) < (Z - A), but using signed instead of unsigned arithmetic. + // (value - CONST) <= (Z - A), but using signed instead of unsigned arithmetic. Vector128 subtractionVector = Vector128.Create(TFrom.CreateTruncating((ConversionIsToUpper ? 'a' : 'A') + 0x80)); - Vector128 comparisionVector = Vector128.Create(TFrom.CreateTruncating(26 /* a..z or A..Z */)); + Vector128 comparisionVector = Vector128.Create(TFrom.CreateTruncating(26 /* a..z or A..Z */ - 0x80)); Vector128 caseConversionVector = Vector128.Create(TFrom.CreateTruncating(0x20)); // works both directions Vector128 matches = SignedLessThan((srcVector - subtractionVector), comparisionVector); From 841fe3c8ef07c3766e56d83d37cc9316dfbeef96 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Fri, 22 Jul 2022 14:24:14 -0700 Subject: [PATCH 10/46] Fix incorrect precondition checks --- .../Buffers/Text/Ascii.CaseConversion.cs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs index 39de8040d86dea..d499d334d26cea 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs @@ -106,7 +106,7 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD // widening or narrowing. In this case, fall back to a naive element-by-element // loop. - if (!ConversionIsWidthPreserving && Vector128.IsHardwareAccelerated) + if (!ConversionIsWidthPreserving && !Vector128.IsHardwareAccelerated) { goto DrainRemaining; } @@ -333,14 +333,14 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool VectorContainsAnyNonAsciiData(Vector128 vector) + private static unsafe bool VectorContainsAnyNonAsciiData(Vector128 vector) where T : unmanaged { - if (typeof(T) == typeof(byte) || typeof(T) == typeof(sbyte)) + if (sizeof(T) == 1) { if (vector.ExtractMostSignificantBits() != 0) { return true; } } - else if (typeof(T) == typeof(short) || typeof(T) == typeof(ushort)) + else if (sizeof(T) == 2) { if (ASCIIUtility.VectorContainsNonAsciiChar(vector.AsUInt16())) { return true; } } @@ -392,11 +392,11 @@ private static unsafe void Narrow16To8AndAndWriteTo(Vector128 wideVector private static unsafe Vector128 SignedLessThan(Vector128 left, Vector128 right) where T : unmanaged { - if (typeof(T) == typeof(byte) || typeof(T) == typeof(sbyte)) + if (sizeof(T) == 1) { return Vector128.LessThan(left.AsSByte(), right.AsSByte()).As(); } - else if (typeof(T) == typeof(ushort) || typeof(T) == typeof(short)) + else if (sizeof(T) == 2) { return Vector128.LessThan(left.AsInt16(), right.AsInt16()).As(); } @@ -407,15 +407,15 @@ private static unsafe Vector128 SignedLessThan(Vector128 left, Vector12 } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128 NarrowOrWidenLowerVector(Vector128 vector) + private static unsafe Vector128 NarrowOrWidenLowerVectorUnsigned(Vector128 vector) where TFrom : unmanaged where TTo : unmanaged { - if (typeof(TFrom) == typeof(byte) && typeof(TTo) == typeof(ushort)) + if (sizeof(TFrom) == 1 && sizeof(TTo) == 2) { return Vector128.WidenLower(vector.AsByte()).As(); } - else if (typeof(TFrom) == typeof(ushort) && typeof(TTo) == typeof(byte)) + else if (sizeof(TFrom) == 2 && sizeof(TTo) == 1) { return Vector128.Narrow(vector.AsUInt16(), vector.AsUInt16()).As(); } From dce2cae44d8a69fe13f7892dfb32b3ba0aed14d9 Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Fri, 22 Jul 2022 15:55:10 -0700 Subject: [PATCH 11/46] Update main vectorized loop --- .../Buffers/Text/Ascii.CaseConversion.cs | 110 +++++++++++------- 1 file changed, 69 insertions(+), 41 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs index d499d334d26cea..a359b60a891986 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs @@ -97,8 +97,7 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD bool ConversionIsNarrowing = !SourceIsAscii && DestIsAscii; // JIT turns this into a const bool ConversionIsWidthPreserving = typeof(TFrom) == typeof(TTo); // JIT turns this into a const bool ConversionIsToUpper = (typeof(TCasing) == typeof(ToUpperConversion)); // JIT turns this into a const - - // Is there enough data to perform vectorized operations? + uint NumInputElementsToConsumeEachVectorizedLoopIteration = (uint)(sizeof(Vector128) / sizeof(TFrom)); // JIT turns this into a const nuint i = 0; @@ -111,17 +110,13 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD goto DrainRemaining; } - // Attempt to process blocks of 128 input bits. + // Process the input as a series of 128-bit blocks. - if (Vector128.IsHardwareAccelerated && elementCount >= (nuint)(16 / sizeof(TFrom))) + if (Vector128.IsHardwareAccelerated && elementCount >= NumInputElementsToConsumeEachVectorizedLoopIteration) { - // The first iteration of this loop will be unaligned. - - Vector128 srcVector = Vector128.LoadUnsafe(ref *pSrc, i); - - // First, check for non-ASCII data. If we see any, immediately - // exit the vectorized logic and fall back to the slower drain paths. + // Unaligned read and check for non-ASCII data. + Vector128 srcVector = Vector128.LoadUnsafe(ref *pSrc); if (VectorContainsAnyNonAsciiData(srcVector)) { goto Drain64; @@ -131,39 +126,33 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD // Basically, the (A <= value && value <= Z) check is converted to: // (value - CONST) <= (Z - A), but using signed instead of unsigned arithmetic. - Vector128 subtractionVector = Vector128.Create(TFrom.CreateTruncating((ConversionIsToUpper ? 'a' : 'A') + 0x80)); + Vector128 subtractionVector = Vector128.Create(TFrom.CreateTruncating(ConversionIsToUpper ? ('a' + 0x80) : ('A' + 0x80))); Vector128 comparisionVector = Vector128.Create(TFrom.CreateTruncating(26 /* a..z or A..Z */ - 0x80)); Vector128 caseConversionVector = Vector128.Create(TFrom.CreateTruncating(0x20)); // works both directions Vector128 matches = SignedLessThan((srcVector - subtractionVector), comparisionVector); srcVector ^= (matches & caseConversionVector); - // Now narrow or widen the vector as needed and write to the destination. + // Now write to the destination. - if (ConversionIsNarrowing) - { - Narrow16To8AndAndWriteTo(srcVector.AsUInt16(), (byte*)pDest, 0); - } - else if (ConversionIsWidening) - { - Widen8To16AndAndWriteTo(srcVector.AsByte(), (char*)pDest, 0); - } - else - { - srcVector.As().StoreUnsafe(ref *pDest); - } + ChangeWidthAndWriteTo(srcVector, pDest, 0); // Now that the first conversion is out of the way, calculate how // many elements we should skip in order to have future writes be // aligned. uint expectedWriteAlignment = ConversionIsNarrowing ? 8u : 16u; // JIT turns this into a const - i = expectedWriteAlignment - ((uint)pDest & (expectedWriteAlignment - 1)) / (uint)sizeof(TTo); + i = expectedWriteAlignment - ((uint)pDest & 0xFu) / (uint)sizeof(TTo); Debug.Assert((nuint)(&pDest[i]) % expectedWriteAlignment == 0, "Destination buffer wasn't properly aligned!"); - // Future iterations of this loop will be aligned. + // Future iterations of this loop will be aligned, + // except for the last iteration. + + bool finalIteration = false; + + RunLoopAgain: - for (; (elementCount - i) >= (nuint)(16 / sizeof(TFrom)); i += (nuint)(16 / sizeof(TFrom))) + for (; finalIteration || (elementCount - i) >= NumInputElementsToConsumeEachVectorizedLoopIteration; i += NumInputElementsToConsumeEachVectorizedLoopIteration) { // Unaligned read & check for non-ASCII data. @@ -178,22 +167,22 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD matches = SignedLessThan((srcVector - subtractionVector), comparisionVector); srcVector ^= (matches & caseConversionVector); - // Now narrow or widen the vector as needed and write to the destination. - // We expect this write to be aligned. + // Now write to the destination. + // We expect this write to be aligned except for the last iteration. - if (ConversionIsNarrowing) - { - Narrow16To8AndAndWriteTo(srcVector.AsUInt16(), (byte*)pDest, i); - } - else if (ConversionIsWidening) - { - Widen8To16AndAndWriteTo(srcVector.AsByte(), (char*)pDest, i); - } - else - { - srcVector.As().StoreUnsafe(ref *pDest, i); - } + ChangeWidthAndWriteTo(srcVector, pDest, 0); } + + Debug.Assert(i <= elementCount, "We overran a buffer."); + if (i == elementCount) + { + goto Return; + } + + Debug.Assert(!finalIteration, "We already ran the final iteration but didn't consume all elements?"); + i = elementCount - NumInputElementsToConsumeEachVectorizedLoopIteration; // we know there's enough data in the buffer to support this + finalIteration = true; + goto RunLoopAgain; } Drain64: @@ -329,6 +318,8 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD pDest[i] = TTo.CreateTruncating(element); } + Return: + return i; } @@ -388,6 +379,43 @@ private static unsafe void Narrow16To8AndAndWriteTo(Vector128 wideVector } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe void ChangeWidthAndWriteTo(Vector128 vector, TTo* pDest, nuint elementOffset) + where TFrom : unmanaged + where TTo : unmanaged + { + if (sizeof(TFrom) == sizeof(TTo)) + { + // no width change needed + Vector128.StoreUnsafe(vector.As(), ref *pDest, elementOffset); + } + else if (sizeof(TFrom) == 1 && sizeof(TTo) == 2) + { + // widening operation required + if (Vector256.IsHardwareAccelerated) + { + Vector256 wide = Vector256.WidenLower(vector.AsByte().ToVector256Unsafe()); + Vector256.StoreUnsafe(wide, ref *(ushort*)pDest, elementOffset); + } + else + { + Vector128.StoreUnsafe(Vector128.WidenLower(vector.AsByte()), ref *(ushort*)pDest, elementOffset); + Vector128.StoreUnsafe(Vector128.WidenUpper(vector.AsByte()), ref *(ushort*)pDest, elementOffset + 8); + } + } + else if (sizeof(TFrom) == 2 && sizeof(TTo) == 1) + { + // narrowing operation required + Vector128 narrow = Vector128.Narrow(vector.AsUInt16(), vector.AsUInt16()); + Vector128.StoreUnsafe(narrow, ref *(byte*)pDest, elementOffset); + } + else + { + Debug.Fail("Unknown types."); + throw new NotSupportedException(); + } + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe Vector128 SignedLessThan(Vector128 left, Vector128 right) where T : unmanaged From 685b3304319ecad7ef3416ecb67198e81b8dd00f Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Fri, 22 Jul 2022 17:26:55 -0700 Subject: [PATCH 12/46] Perf improvements & fix arithmetic error --- .../Buffers/Text/Ascii.CaseConversion.cs | 54 ++++++++++--------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs index a359b60a891986..ff29874305d039 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs @@ -91,8 +91,8 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD Debug.Assert(typeof(TTo) == typeof(byte) || typeof(TTo) == typeof(ushort)); Debug.Assert(typeof(TCasing) == typeof(ToUpperConversion) || typeof(TCasing) == typeof(ToLowerConversion)); - bool SourceIsAscii = (typeof(TFrom) == typeof(byte)); // JIT turns this into a const - bool DestIsAscii = (typeof(TTo) == typeof(byte)); // JIT turns this into a const + bool SourceIsAscii = (sizeof(TFrom) == 1); // JIT turns this into a const + bool DestIsAscii = (sizeof(TTo) == 1); // JIT turns this into a const bool ConversionIsWidening = SourceIsAscii && !DestIsAscii; // JIT turns this into a const bool ConversionIsNarrowing = !SourceIsAscii && DestIsAscii; // JIT turns this into a const bool ConversionIsWidthPreserving = typeof(TFrom) == typeof(TTo); // JIT turns this into a const @@ -126,8 +126,9 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD // Basically, the (A <= value && value <= Z) check is converted to: // (value - CONST) <= (Z - A), but using signed instead of unsigned arithmetic. - Vector128 subtractionVector = Vector128.Create(TFrom.CreateTruncating(ConversionIsToUpper ? ('a' + 0x80) : ('A' + 0x80))); - Vector128 comparisionVector = Vector128.Create(TFrom.CreateTruncating(26 /* a..z or A..Z */ - 0x80)); + TFrom SourceSignedMinValue = TFrom.CreateTruncating(1 << (8 * sizeof(TFrom) - 1)); + Vector128 subtractionVector = Vector128.Create(ConversionIsToUpper ? (SourceSignedMinValue + TFrom.CreateTruncating('a')) : (SourceSignedMinValue + TFrom.CreateTruncating('A'))); + Vector128 comparisionVector = Vector128.Create(SourceSignedMinValue + TFrom.CreateTruncating(26 /* A..Z or a..z */)); Vector128 caseConversionVector = Vector128.Create(TFrom.CreateTruncating(0x20)); // works both directions Vector128 matches = SignedLessThan((srcVector - subtractionVector), comparisionVector); @@ -141,19 +142,31 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD // many elements we should skip in order to have future writes be // aligned. - uint expectedWriteAlignment = ConversionIsNarrowing ? 8u : 16u; // JIT turns this into a const - i = expectedWriteAlignment - ((uint)pDest & 0xFu) / (uint)sizeof(TTo); + uint expectedWriteAlignment = NumInputElementsToConsumeEachVectorizedLoopIteration * (uint)sizeof(TTo); // JIT turns this into a const + i = NumInputElementsToConsumeEachVectorizedLoopIteration - ((uint)pDest % expectedWriteAlignment) / (uint)sizeof(TTo); Debug.Assert((nuint)(&pDest[i]) % expectedWriteAlignment == 0, "Destination buffer wasn't properly aligned!"); // Future iterations of this loop will be aligned, // except for the last iteration. - bool finalIteration = false; + while (true) + { + Debug.Assert(i <= elementCount, "We overran a buffer somewhere."); - RunLoopAgain: + if ((elementCount - i) < NumInputElementsToConsumeEachVectorizedLoopIteration) + { + // If we're about to enter the final iteration of the loop, back up so that + // we can read one unaligned block. If we've already consumed all the data, + // jump straight to the end. + + if (i == elementCount) + { + goto Return; + } + + i = elementCount - NumInputElementsToConsumeEachVectorizedLoopIteration; + } - for (; finalIteration || (elementCount - i) >= NumInputElementsToConsumeEachVectorizedLoopIteration; i += NumInputElementsToConsumeEachVectorizedLoopIteration) - { // Unaligned read & check for non-ASCII data. srcVector = Vector128.LoadUnsafe(ref *pSrc, i); @@ -168,21 +181,11 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD srcVector ^= (matches & caseConversionVector); // Now write to the destination. - // We expect this write to be aligned except for the last iteration. + // We expect this write to be aligned except for the last run through the loop. - ChangeWidthAndWriteTo(srcVector, pDest, 0); + ChangeWidthAndWriteTo(srcVector, pDest, i); + i += NumInputElementsToConsumeEachVectorizedLoopIteration; } - - Debug.Assert(i <= elementCount, "We overran a buffer."); - if (i == elementCount) - { - goto Return; - } - - Debug.Assert(!finalIteration, "We already ran the final iteration but didn't consume all elements?"); - i = elementCount - NumInputElementsToConsumeEachVectorizedLoopIteration; // we know there's enough data in the buffer to support this - finalIteration = true; - goto RunLoopAgain; } Drain64: @@ -406,7 +409,10 @@ private static unsafe void ChangeWidthAndWriteTo(Vector128 ve else if (sizeof(TFrom) == 2 && sizeof(TTo) == 1) { // narrowing operation required - Vector128 narrow = Vector128.Narrow(vector.AsUInt16(), vector.AsUInt16()); + // since we know data is all-ASCII, special-case SSE2 to avoid unneeded PAND in Narrow call + Vector128 narrow = (Sse2.IsSupported) + ? Sse2.PackUnsignedSaturate(vector.AsInt16(), vector.AsInt16()) + : Vector128.Narrow(vector.AsUInt16(), vector.AsUInt16()); Vector128.StoreUnsafe(narrow, ref *(byte*)pDest, elementOffset); } else From 4914c653d2caec2690ae0147a59873b1a7a4b3cb Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 29 Aug 2022 19:10:45 +0200 Subject: [PATCH 13/46] tests for Ascii.GetIndexOfFirstNonAsciiByte --- .../Ascii/GetIndexOfFirstNonAsciiByteTests.cs | 70 +++++++++++++++++++ .../tests/System.Memory.Tests.csproj | 1 + 2 files changed, 71 insertions(+) create mode 100644 src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs diff --git a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs new file mode 100644 index 00000000000000..e0bf59150962fa --- /dev/null +++ b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs @@ -0,0 +1,70 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.Intrinsics; +using Xunit; + +namespace System.Buffers.Text.Tests +{ + public class GetIndexOfFirstNonAsciiByteTests + { + private static byte GetNextValidAsciiByte() => (byte)Random.Shared.Next(0, 127 + 1); + private static byte GetNextInvalidAsciiByte() => (byte)Random.Shared.Next(128, 255 + 1); + + [Fact] + public void EmptyInput_IndexNotFound() => Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiByte(ReadOnlySpan.Empty)); + + private static int[] BufferLengths = new[] { + 1, + Vector128.Count - 1, + Vector128.Count, + Vector128.Count + 1, + Vector256.Count - 1, + Vector256.Count, + Vector256.Count + 1 }; + + public static IEnumerable AsciiOnlyBuffers + { + get + { + yield return new object[] { new byte[] { GetNextValidAsciiByte() } }; + + foreach (int length in BufferLengths) + { + yield return new object[] { Enumerable.Repeat(GetNextValidAsciiByte(), length).ToArray() }; + } + } + } + + [Theory] + [MemberData(nameof(AsciiOnlyBuffers))] + public void AllAscii_IndexNotFound(byte[] buffer) => Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiByte(buffer)); + + public static IEnumerable ContainingNonAsciiCharactersBuffers + { + get + { + foreach (int length in BufferLengths) + { + for (int index = 0; index < length; index++) + { + yield return new object[] { index, Create(length, index) }; + } + } + + static byte[] Create(int length, int index) + { + byte[] buffer = Enumerable.Repeat(GetNextValidAsciiByte(), length).ToArray(); + buffer[index] = GetNextInvalidAsciiByte(); + return buffer; + } + } + } + + [Theory] + [MemberData(nameof(ContainingNonAsciiCharactersBuffers))] + public void NonAscii_IndexFound(int expectedIndex, byte[] buffer) => Assert.Equal(expectedIndex, Ascii.GetIndexOfFirstNonAsciiByte(buffer)); + } +} diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index 5e0b857be6a71c..e72e5e7e2c9eec 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -13,6 +13,7 @@ + From 7204d2d20fef11bdba94fb6ec92e807daebdaf42 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 29 Aug 2022 19:15:59 +0200 Subject: [PATCH 14/46] tests for Ascii.GetIndexOfFirstNonAsciiChar --- .../Ascii/GetIndexOfFirstNonAsciiCharTests.cs | 70 +++++++++++++++++++ .../tests/System.Memory.Tests.csproj | 1 + 2 files changed, 71 insertions(+) create mode 100644 src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs diff --git a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs new file mode 100644 index 00000000000000..c8d2efb6f92240 --- /dev/null +++ b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs @@ -0,0 +1,70 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.Intrinsics; +using Xunit; + +namespace System.Buffers.Text.Tests +{ + public class GetIndexOfFirstNonAsciiCharTests + { + private static char GetNextValidAsciiChar() => (char)Random.Shared.Next(0, 127 + 1); + private static char GetNextInvalidAsciiChar() => (char)Random.Shared.Next(128, ushort.MaxValue + 1); + + [Fact] + public void EmptyInput_IndexNotFound() => Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiChar(ReadOnlySpan.Empty)); + + private static int[] BufferLengths = new[] { + 1, + Vector128.Count - 1, + Vector128.Count, + Vector128.Count + 1, + Vector256.Count - 1, + Vector256.Count, + Vector256.Count + 1 }; + + public static IEnumerable AsciiOnlyBuffers + { + get + { + yield return new object[] { new char[] { GetNextValidAsciiChar() } }; + + foreach (int length in BufferLengths) + { + yield return new object[] { Enumerable.Repeat(GetNextValidAsciiChar(), length).ToArray() }; + } + } + } + + [Theory] + [MemberData(nameof(AsciiOnlyBuffers))] + public void AllAscii_IndexNotFound(char[] buffer) => Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiChar(buffer)); + + public static IEnumerable ContainingNonAsciiCharactersBuffers + { + get + { + foreach (int length in BufferLengths) + { + for (int index = 0; index < length; index++) + { + yield return new object[] { index, Create(length, index) }; + } + } + + static char[] Create(int length, int index) + { + char[] buffer = Enumerable.Repeat(GetNextValidAsciiChar(), length).ToArray(); + buffer[index] = GetNextInvalidAsciiChar(); + return buffer; + } + } + } + + [Theory] + [MemberData(nameof(ContainingNonAsciiCharactersBuffers))] + public void NonAscii_IndexFound(int expectedIndex, char[] buffer) => Assert.Equal(expectedIndex, Ascii.GetIndexOfFirstNonAsciiChar(buffer)); + } +} diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index e72e5e7e2c9eec..9124b9f2d22c7b 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -14,6 +14,7 @@ + From e3709b7f6b9f887ce6c11d4a4c6d0582c80edc25 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 29 Aug 2022 19:41:11 +0200 Subject: [PATCH 15/46] add tests for Ascii.IsAscii --- .../Ascii/GetIndexOfFirstNonAsciiByteTests.cs | 18 +++++++++++++++--- .../Ascii/GetIndexOfFirstNonAsciiCharTests.cs | 18 +++++++++++++++--- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs index e0bf59150962fa..d647a34dea2686 100644 --- a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs @@ -14,7 +14,11 @@ public class GetIndexOfFirstNonAsciiByteTests private static byte GetNextInvalidAsciiByte() => (byte)Random.Shared.Next(128, 255 + 1); [Fact] - public void EmptyInput_IndexNotFound() => Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiByte(ReadOnlySpan.Empty)); + public void EmptyInput_IndexNotFound() + { + Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiByte(ReadOnlySpan.Empty)); + Assert.True(Ascii.IsAscii(ReadOnlySpan.Empty)); + } private static int[] BufferLengths = new[] { 1, @@ -40,7 +44,11 @@ public static IEnumerable AsciiOnlyBuffers [Theory] [MemberData(nameof(AsciiOnlyBuffers))] - public void AllAscii_IndexNotFound(byte[] buffer) => Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiByte(buffer)); + public void AllAscii_IndexNotFound(byte[] buffer) + { + Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiByte(buffer)); + Assert.True(Ascii.IsAscii(buffer)); + } public static IEnumerable ContainingNonAsciiCharactersBuffers { @@ -65,6 +73,10 @@ static byte[] Create(int length, int index) [Theory] [MemberData(nameof(ContainingNonAsciiCharactersBuffers))] - public void NonAscii_IndexFound(int expectedIndex, byte[] buffer) => Assert.Equal(expectedIndex, Ascii.GetIndexOfFirstNonAsciiByte(buffer)); + public void NonAscii_IndexFound(int expectedIndex, byte[] buffer) + { + Assert.Equal(expectedIndex, Ascii.GetIndexOfFirstNonAsciiByte(buffer)); + Assert.False(Ascii.IsAscii(buffer)); + } } } diff --git a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs index c8d2efb6f92240..be1f282404c479 100644 --- a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs @@ -14,7 +14,11 @@ public class GetIndexOfFirstNonAsciiCharTests private static char GetNextInvalidAsciiChar() => (char)Random.Shared.Next(128, ushort.MaxValue + 1); [Fact] - public void EmptyInput_IndexNotFound() => Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiChar(ReadOnlySpan.Empty)); + public void EmptyInput_IndexNotFound() + { + Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiChar(ReadOnlySpan.Empty)); + Assert.True(Ascii.IsAscii(ReadOnlySpan.Empty)); + } private static int[] BufferLengths = new[] { 1, @@ -40,7 +44,11 @@ public static IEnumerable AsciiOnlyBuffers [Theory] [MemberData(nameof(AsciiOnlyBuffers))] - public void AllAscii_IndexNotFound(char[] buffer) => Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiChar(buffer)); + public void AllAscii_IndexNotFound(char[] buffer) + { + Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiChar(buffer)); + Assert.True(Ascii.IsAscii(buffer)); + } public static IEnumerable ContainingNonAsciiCharactersBuffers { @@ -65,6 +73,10 @@ static char[] Create(int length, int index) [Theory] [MemberData(nameof(ContainingNonAsciiCharactersBuffers))] - public void NonAscii_IndexFound(int expectedIndex, char[] buffer) => Assert.Equal(expectedIndex, Ascii.GetIndexOfFirstNonAsciiChar(buffer)); + public void NonAscii_IndexFound(int expectedIndex, char[] buffer) + { + Assert.Equal(expectedIndex, Ascii.GetIndexOfFirstNonAsciiChar(buffer)); + Assert.False(Ascii.IsAscii(buffer)); + } } } From fc6db59ed64be7ce48b369a1a094eecf11efa5fd Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 29 Aug 2022 19:52:12 +0200 Subject: [PATCH 16/46] add tests for Ascii.FromUtf16 --- .../ASCIIEncoding/ASCIIEncodingEncode.cs | 59 +++++++++++++------ 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingEncode.cs b/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingEncode.cs index b0170dfe532be6..3e304f7ab3a07d 100644 --- a/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingEncode.cs +++ b/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingEncode.cs @@ -1,6 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; +using System.Buffers.Text; using System.Collections.Generic; using System.Linq; using Xunit; @@ -43,6 +45,21 @@ public void Encode(string source, int index, int count) // Encoding valid chars should not throw with an EncoderExceptionFallback Encoding exceptionEncoding = Encoding.GetEncoding("ascii", new EncoderExceptionFallback(), new DecoderReplacementFallback("?")); EncodingHelpers.Encode(exceptionEncoding, source, index, count, expected); + + byte[] actual = new byte[expected.Length * 2]; + Assert.Equal(OperationStatus.Done , Ascii.FromUtf16(source.AsSpan(index, count), actual, out int charsConsumed, out int bytesWritten)); + Assert.Equal(count, charsConsumed); + Assert.Equal(expected.Length, bytesWritten); + Assert.Equal(expected, actual.Take(bytesWritten).ToArray()); + + if (expected.Length > 1) + { + actual = new byte[expected.Length - 1]; + Assert.Equal(OperationStatus.DestinationTooSmall, Ascii.FromUtf16(source.AsSpan(index, count), actual, out charsConsumed, out bytesWritten)); + Assert.Equal(count - 1, charsConsumed); + Assert.Equal(expected.Length - 1, bytesWritten); + Assert.Equal(expected.Take(bytesWritten).ToArray(), actual.Take(bytesWritten).ToArray()); + } } public static IEnumerable Encode_InvalidChars_TestData() @@ -51,39 +68,39 @@ public static IEnumerable Encode_InvalidChars_TestData() for (int i = 0x80; i <= 0xFF; i++) { char b = (char)i; - yield return new object[] { b, 0, 1 }; + yield return new object[] { b, 0, 1, 0 }; } // Unicode chars - yield return new object[] { "\u1234\u2345", 0, 2 }; - yield return new object[] { "a\u1234\u2345b", 0, 4 }; + yield return new object[] { "\u1234\u2345", 0, 2, 0 }; + yield return new object[] { "a\u1234\u2345b", 0, 4, 1 }; - yield return new object[] { "\uD800\uDC00", 0, 2 }; - yield return new object[] { "a\uD800\uDC00b", 0, 2 }; + yield return new object[] { "\uD800\uDC00", 0, 2, 0 }; + yield return new object[] { "a\uD800\uDC00b", 0, 2, 1 }; - yield return new object[] { "\uD800\uDC00\u0061\u0CFF", 0, 4 }; + yield return new object[] { "\uD800\uDC00\u0061\u0CFF", 0, 4, 0 }; // Invalid Unicode - yield return new object[] { "\uD800", 0, 1 }; // Lone high surrogate - yield return new object[] { "\uDC00", 0, 1 }; // Lone low surrogate - yield return new object[] { "\uD800\uDC00", 0, 1 }; // Surrogate pair out of range - yield return new object[] { "\uD800\uDC00", 1, 1 }; // Surrogate pair out of range + yield return new object[] { "\uD800", 0, 1, 0 }; // Lone high surrogate + yield return new object[] { "\uDC00", 0, 1, 0 }; // Lone low surrogate + yield return new object[] { "\uD800\uDC00", 0, 1, 0 }; // Surrogate pair out of range + yield return new object[] { "\uD800\uDC00", 1, 1, 0 }; // Surrogate pair out of range - yield return new object[] { "\uD800\uD800", 0, 2 }; // High, high - yield return new object[] { "\uDC00\uD800", 0, 2 }; // Low, high - yield return new object[] { "\uDC00\uDC00", 0, 2 }; // Low, low + yield return new object[] { "\uD800\uD800", 0, 2, 0 }; // High, high + yield return new object[] { "\uDC00\uD800", 0, 2, 0 }; // Low, high + yield return new object[] { "\uDC00\uDC00", 0, 2, 0 }; // Low, low - yield return new object[] { "\u0080\u00FF\u0B71\uFFFF\uD800\uDFFF", 0, 6 }; + yield return new object[] { "\u0080\u00FF\u0B71\uFFFF\uD800\uDFFF", 0, 6, 0 }; // High BMP non-chars - yield return new object[] { "\uFFFD", 0, 1 }; - yield return new object[] { "\uFFFE", 0, 1 }; - yield return new object[] { "\uFFFF", 0, 1 }; + yield return new object[] { "\uFFFD", 0, 1, 0 }; + yield return new object[] { "\uFFFE", 0, 1, 0 }; + yield return new object[] { "\uFFFF", 0, 1, 0 }; } [Theory] [MemberData(nameof(Encode_InvalidChars_TestData))] - public void Encode_InvalidChars(string source, int index, int count) + public void Encode_InvalidChars(string source, int index, int count, int expectedCharsConsumed) { byte[] expected = GetBytes(source, index, count); EncodingHelpers.Encode(new ASCIIEncoding(), source, index, count, expected); @@ -91,6 +108,12 @@ public void Encode_InvalidChars(string source, int index, int count) // Encoding invalid chars should throw with an EncoderExceptionFallback Encoding exceptionEncoding = Encoding.GetEncoding("ascii", new EncoderExceptionFallback(), new DecoderReplacementFallback("?")); NegativeEncodingTests.Encode_Invalid(exceptionEncoding, source, index, count); + + byte[] actual = new byte[expected.Length * 2]; + Assert.Equal(OperationStatus.InvalidData, Ascii.FromUtf16(source.AsSpan(index, count), actual, out int charsConsumed, out int bytesWritten)); + Assert.Equal(expectedCharsConsumed, charsConsumed); + Assert.Equal(charsConsumed, bytesWritten); + Assert.Equal(expected.Take(bytesWritten).ToArray(), actual.Take(bytesWritten).ToArray()); } private static byte[] GetBytes(string source, int index, int count) From e3167895916a392112552fe1133b50dba55b76b4 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Tue, 30 Aug 2022 08:57:57 +0200 Subject: [PATCH 17/46] add tests for Ascii.ToUtf16 --- .../ASCIIEncoding/ASCIIEncodingDecode.cs | 33 ++++++++++++++++--- .../ASCIIEncoding/ASCIIEncodingEncode.cs | 4 +-- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingDecode.cs b/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingDecode.cs index c9d12101d00a87..4c99abe12a5bb5 100644 --- a/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingDecode.cs +++ b/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingDecode.cs @@ -1,6 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; +using System.Buffers.Text; using System.Collections.Generic; using System.Linq; using Xunit; @@ -37,6 +39,21 @@ public void Decode(byte[] bytes, int index, int count) // Decoding valid bytes should not throw with a DecoderExceptionFallback Encoding exceptionEncoding = Encoding.GetEncoding("ascii", new EncoderReplacementFallback("?"), new DecoderExceptionFallback()); EncodingHelpers.Decode(exceptionEncoding, bytes, index, count, expected); + + char[] actual = new char[expected.Length]; + Assert.Equal(OperationStatus.Done, Ascii.ToUtf16(bytes.AsSpan(index, count), actual, out int bytesConsumed, out int charsWritten)); + Assert.Equal(count, bytesConsumed); + Assert.Equal(expected.Length, charsWritten); + Assert.Equal(expected, new string(actual.AsSpan(0, charsWritten))); + + if (expected.Length > 1) + { + actual = new char[expected.Length - 1]; + Assert.Equal(OperationStatus.DestinationTooSmall, Ascii.ToUtf16(bytes.AsSpan(index, count), actual, out bytesConsumed, out charsWritten)); + Assert.Equal(count - 1, bytesConsumed); + Assert.Equal(expected.Length - 1, charsWritten); + Assert.Equal(expected.Substring(0, expected.Length - 1), new string(actual.AsSpan(0, charsWritten))); + } } public static IEnumerable Decode_InvalidBytes_TestData() @@ -45,17 +62,17 @@ public static IEnumerable Decode_InvalidBytes_TestData() for (int i = 0x80; i <= byte.MaxValue; i++) { byte b = (byte)i; - yield return new object[] { new byte[] { b }, 0, 1 }; - yield return new object[] { new byte[] { 96, b, 97 }, 1, 1 }; - yield return new object[] { new byte[] { 97, b, 97 }, 0, 3 }; + yield return new object[] { new byte[] { b }, 0, 1, 0 }; + yield return new object[] { new byte[] { 96, b, 97 }, 1, 1, 0 }; + yield return new object[] { new byte[] { 97, b, 97 }, 0, 3, 1 }; } - yield return new object[] { new byte[] { 0xC1, 0x41, 0xF0, 0x42 }, 0, 4 }; + yield return new object[] { new byte[] { 0xC1, 0x41, 0xF0, 0x42 }, 0, 4, 0 }; } [Theory] [MemberData(nameof(Decode_InvalidBytes_TestData))] - public void Decode_InvalidBytes(byte[] bytes, int index, int count) + public void Decode_InvalidBytes(byte[] bytes, int index, int count, int expectedBytesConsumed) { string expected = GetString(bytes, index, count); EncodingHelpers.Decode(new ASCIIEncoding(), bytes, index, count, expected); @@ -63,6 +80,12 @@ public void Decode_InvalidBytes(byte[] bytes, int index, int count) // Decoding invalid bytes should throw with a DecoderExceptionFallback Encoding exceptionEncoding = Encoding.GetEncoding("ascii", new EncoderReplacementFallback("?"), new DecoderExceptionFallback()); NegativeEncodingTests.Decode_Invalid(exceptionEncoding, bytes, index, count); + + char[] actual = new char[expected.Length]; + Assert.Equal(OperationStatus.InvalidData, Ascii.ToUtf16(bytes.AsSpan(index, count), actual, out int bytesConsumed, out int charsWritten)); + Assert.Equal(expectedBytesConsumed, bytesConsumed); + Assert.Equal(bytesConsumed, charsWritten); + Assert.Equal(expected.Take(charsWritten).ToArray(), actual.Take(charsWritten).ToArray()); } public static string GetString(byte[] bytes, int index, int count) diff --git a/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingEncode.cs b/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingEncode.cs index 3e304f7ab3a07d..c8dc4239abfb10 100644 --- a/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingEncode.cs +++ b/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingEncode.cs @@ -46,7 +46,7 @@ public void Encode(string source, int index, int count) Encoding exceptionEncoding = Encoding.GetEncoding("ascii", new EncoderExceptionFallback(), new DecoderReplacementFallback("?")); EncodingHelpers.Encode(exceptionEncoding, source, index, count, expected); - byte[] actual = new byte[expected.Length * 2]; + byte[] actual = new byte[expected.Length]; Assert.Equal(OperationStatus.Done , Ascii.FromUtf16(source.AsSpan(index, count), actual, out int charsConsumed, out int bytesWritten)); Assert.Equal(count, charsConsumed); Assert.Equal(expected.Length, bytesWritten); @@ -109,7 +109,7 @@ public void Encode_InvalidChars(string source, int index, int count, int expecte Encoding exceptionEncoding = Encoding.GetEncoding("ascii", new EncoderExceptionFallback(), new DecoderReplacementFallback("?")); NegativeEncodingTests.Encode_Invalid(exceptionEncoding, source, index, count); - byte[] actual = new byte[expected.Length * 2]; + byte[] actual = new byte[expected.Length]; Assert.Equal(OperationStatus.InvalidData, Ascii.FromUtf16(source.AsSpan(index, count), actual, out int charsConsumed, out int bytesWritten)); Assert.Equal(expectedCharsConsumed, charsConsumed); Assert.Equal(charsConsumed, bytesWritten); From a5f61b9ec23b4932a1216d4047007cb599318735 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Tue, 30 Aug 2022 09:56:27 +0200 Subject: [PATCH 18/46] add tests for Ascii.Trim* and fix bug they have discovered --- .../System.Memory/tests/Ascii/TrimTests.cs | 118 ++++++++++++++++++ .../tests/System.Memory.Tests.csproj | 1 + .../src/System/Buffers/Text/Ascii.Trimming.cs | 2 +- 3 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 src/libraries/System.Memory/tests/Ascii/TrimTests.cs diff --git a/src/libraries/System.Memory/tests/Ascii/TrimTests.cs b/src/libraries/System.Memory/tests/Ascii/TrimTests.cs new file mode 100644 index 00000000000000..0c1f91ad9a1a54 --- /dev/null +++ b/src/libraries/System.Memory/tests/Ascii/TrimTests.cs @@ -0,0 +1,118 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Text; +using Xunit; + +namespace System.Buffers.Text.Tests +{ + public class TrimTests + { + [Fact] + public void EmptyInput() + { + Assert.Equal(default(Range), Ascii.Trim(ReadOnlySpan.Empty)); + Assert.Equal(default(Range), Ascii.Trim(ReadOnlySpan.Empty)); + Assert.Equal(default(Range), Ascii.TrimStart(ReadOnlySpan.Empty)); + Assert.Equal(default(Range), Ascii.TrimStart(ReadOnlySpan.Empty)); + Assert.Equal(default(Range), Ascii.TrimEnd(ReadOnlySpan.Empty)); + Assert.Equal(default(Range), Ascii.TrimEnd(ReadOnlySpan.Empty)); + } + + [Theory] + [InlineData("1")] + [InlineData("abc")] + [InlineData("a\tb c\rd\ne")] + public void NothingToTrimNonEmptyInput(string text) + { + ReadOnlySpan bytes = Encoding.ASCII.GetBytes(text); + + Range expected = 0..text.Length; + Assert.Equal(expected, Ascii.Trim(bytes)); + Assert.Equal(expected, Ascii.Trim(text)); + Assert.Equal(expected, Ascii.TrimStart(bytes)); + Assert.Equal(expected, Ascii.TrimStart(text)); + Assert.Equal(expected, Ascii.TrimEnd(bytes)); + Assert.Equal(expected, Ascii.TrimEnd(text)); + } + + [Theory] + [InlineData(" ")] + [InlineData("\t")] + [InlineData("\r")] + [InlineData("\n")] + [InlineData("\r\n")] + [InlineData(" \t\r\n ")] + [InlineData("\n \t \r")] + public void OnlyWhitespaces(string text) + { + ReadOnlySpan bytes = Encoding.ASCII.GetBytes(text); + + Assert.Equal(text.Length..text.Length, Ascii.Trim(bytes)); + Assert.Equal(text.Length..text.Length, Ascii.Trim(text)); + Assert.Equal(text.Length..text.Length, Ascii.TrimStart(bytes)); + Assert.Equal(text.Length..text.Length, Ascii.TrimStart(text)); + // Special-case when the input contains all-whitespace data, since we want to + // return a zero-length slice at the *beginning* of the span, not the end of the span + Assert.Equal(0..0, Ascii.TrimEnd(bytes)); + Assert.Equal(0..0, Ascii.TrimEnd(text)); + } + + [Theory] + [InlineData(" a", 1)] + [InlineData("\tb", 1)] + [InlineData("\rc", 1)] + [InlineData("\nd", 1)] + [InlineData(" \t\r\ne", 4)] + [InlineData(" \t\r\n\n\r\t f", 8)] + public void StartingWithWhitespace(string text, int leadingWhitespaceCount) + { + ReadOnlySpan bytes = Encoding.ASCII.GetBytes(text); + + Assert.Equal(leadingWhitespaceCount..text.Length, Ascii.TrimStart(bytes)); + Assert.Equal(leadingWhitespaceCount..text.Length, Ascii.TrimStart(text)); + Assert.Equal(leadingWhitespaceCount..text.Length, Ascii.Trim(bytes)); + Assert.Equal(leadingWhitespaceCount..text.Length, Ascii.Trim(text)); + Assert.Equal(0..text.Length, Ascii.TrimEnd(bytes)); + Assert.Equal(0..text.Length, Ascii.TrimEnd(text)); + } + + [Theory] + [InlineData("a ", 1)] + [InlineData("b\t", 1)] + [InlineData("c\r", 1)] + [InlineData("d\n", 1)] + [InlineData("e \t\r\n", 4)] + [InlineData("f \t\r\n\n\r\t ", 8)] + public void EndingWithWhitespace(string text, int trailingWhitespaceCount) + { + ReadOnlySpan bytes = Encoding.ASCII.GetBytes(text); + + Assert.Equal(0..(text.Length - trailingWhitespaceCount), Ascii.TrimEnd(bytes)); + Assert.Equal(0..(text.Length - trailingWhitespaceCount), Ascii.TrimEnd(text)); + Assert.Equal(0..(text.Length - trailingWhitespaceCount), Ascii.Trim(bytes)); + Assert.Equal(0..(text.Length - trailingWhitespaceCount), Ascii.Trim(text)); + Assert.Equal(0..text.Length, Ascii.TrimStart(bytes)); + Assert.Equal(0..text.Length, Ascii.TrimStart(text)); + } + + [Theory] + [InlineData(" a ", 1, 1)] + [InlineData("\tb\t", 1, 1)] + [InlineData("\rc\r", 1, 1)] + [InlineData("\nd\n", 1, 1)] + [InlineData(" \t\r\ne \t\r\n", 4, 4)] + [InlineData(" \t\r\n\n\r\t f \t\r\n\n\r\t ", 8, 8)] + public void StartingAndEndingWithWhitespace(string text, int leadingWhitespaceCount, int trailingWhitespaceCount) + { + ReadOnlySpan bytes = Encoding.ASCII.GetBytes(text); + + Assert.Equal(leadingWhitespaceCount..text.Length, Ascii.TrimStart(bytes)); + Assert.Equal(leadingWhitespaceCount..text.Length, Ascii.TrimStart(text)); + Assert.Equal(leadingWhitespaceCount..(text.Length - trailingWhitespaceCount), Ascii.Trim(bytes)); + Assert.Equal(leadingWhitespaceCount..(text.Length - trailingWhitespaceCount), Ascii.Trim(text)); + Assert.Equal(0..(text.Length - trailingWhitespaceCount), Ascii.TrimEnd(bytes)); + Assert.Equal(0..(text.Length - trailingWhitespaceCount), Ascii.TrimEnd(text)); + } + } +} diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index 9124b9f2d22c7b..3351811c02d394 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -15,6 +15,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Trimming.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Trimming.cs index 4d59841cb46bd7..3e0b4988066770 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Trimming.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Trimming.cs @@ -42,7 +42,7 @@ private static Range TrimHelper(ReadOnlySpan value, TrimType trimType) int end = value.Length - 1; if ((trimType & TrimType.Tail) != 0) { - for (; start < end; end--) + for (; start <= end; end--) { uint elementValue = uint.CreateTruncating(value[end]); if ((elementValue > 0x20) || ((trimMask & (1u << ((int)elementValue - 1))) == 0)) From 6516ae2df949b5b580fe4540667a910fcc481e4d Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Tue, 30 Aug 2022 14:52:58 +0200 Subject: [PATCH 19/46] ToUpper & ToLower tests --- .../tests/Ascii/CaseConversionTests.cs | 220 ++++++++++++++++++ .../tests/System.Memory.Tests.csproj | 1 + 2 files changed, 221 insertions(+) create mode 100644 src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs diff --git a/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs b/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs new file mode 100644 index 00000000000000..e654fb6c8e3ee5 --- /dev/null +++ b/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs @@ -0,0 +1,220 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.InteropServices; +using Xunit; + +namespace System.Buffers.Text.Tests +{ + public class CaseConversionTests + { + private const byte MaxValidAsciiChar = 127; + + [Fact] + public void OverlappingBuffers_Throws() + { + byte[] byteBuffer = new byte[10]; + char[] charBuffer = new char[10]; + + // byte -> byte + Assert.Throws(() => Ascii.ToLower(byteBuffer, byteBuffer, out _, out _)); + Assert.Throws(() => Ascii.ToLower(byteBuffer.AsSpan(1, 3), byteBuffer.AsSpan(3, 5), out _, out _)); + Assert.Throws(() => Ascii.ToUpper(byteBuffer, byteBuffer, out _, out _)); + Assert.Throws(() => Ascii.ToUpper(byteBuffer.AsSpan(1, 3), byteBuffer.AsSpan(3, 5), out _, out _)); + // byte -> char + Assert.Throws(() => Ascii.ToLower(byteBuffer, MemoryMarshal.Cast(byteBuffer), out _, out _)); + Assert.Throws(() => Ascii.ToLower(byteBuffer, MemoryMarshal.Cast(byteBuffer).Slice(3, 5), out _, out _)); + Assert.Throws(() => Ascii.ToUpper(byteBuffer, MemoryMarshal.Cast(byteBuffer), out _, out _)); + Assert.Throws(() => Ascii.ToUpper(byteBuffer, MemoryMarshal.Cast(byteBuffer).Slice(3, 5), out _, out _)); + // char -> char + Assert.Throws(() => Ascii.ToLower(charBuffer, charBuffer, out _, out _)); + Assert.Throws(() => Ascii.ToLower(charBuffer.AsSpan(1, 3), charBuffer.AsSpan(3, 5), out _, out _)); + Assert.Throws(() => Ascii.ToUpper(charBuffer, charBuffer, out _, out _)); + Assert.Throws(() => Ascii.ToUpper(charBuffer.AsSpan(1, 3), charBuffer.AsSpan(3, 5), out _, out _)); + // char -> byte + Assert.Throws(() => Ascii.ToLower(charBuffer, MemoryMarshal.Cast(charBuffer), out _, out _)); + Assert.Throws(() => Ascii.ToLower(charBuffer, MemoryMarshal.Cast(charBuffer).Slice(3, 5), out _, out _)); + Assert.Throws(() => Ascii.ToUpper(charBuffer, MemoryMarshal.Cast(charBuffer), out _, out _)); + Assert.Throws(() => Ascii.ToUpper(charBuffer, MemoryMarshal.Cast(charBuffer).Slice(3, 5), out _, out _)); + } + + private static void VerifySingleChar(OperationStatus status, int value, T expected, T actual, int consumed, int written) + { + Assert.True(typeof(T) == typeof(char) || typeof(T) == typeof(byte)); + + if (value <= MaxValidAsciiChar) + { + Assert.Equal(OperationStatus.Done, status); + Assert.Equal(expected, actual); + Assert.Equal(1, consumed); + Assert.Equal(1, written); + } + else + { + Assert.Equal(OperationStatus.InvalidData, status); + Assert.Equal(default, actual); + Assert.Equal(0, consumed); + Assert.Equal(0, written); + } + } + + [Fact] + public void SingleByteConversion() + { + byte[] destinationByte = new byte[1]; + char[] destinationChar = new char[1]; + + for (int i = 0; i <= byte.MaxValue; i++) + { + byte expectedToLower = char.IsBetween((char)i, 'A', 'Z') ? (byte)(i - 'A' + 'a') : (byte)i; + byte expectedToUpper= char.IsBetween((char)i, 'a', 'z') ? (byte)(i + 'A' + 'a') : (byte)i; + + byte[] sourceByte = new byte[1] { (byte)i }; + + // byte -> byte + destinationByte[0] = default; + VerifySingleChar(Ascii.ToLower(sourceByte, destinationByte, out int consumed, out int written), i, expectedToLower, destinationByte[0], consumed, written); + destinationByte[0] = default; + VerifySingleChar(Ascii.ToUpper(sourceByte, destinationByte, out consumed, out written), i, expectedToUpper, destinationByte[0], consumed, written); + // byte -> char + destinationChar[0] = default; + VerifySingleChar(Ascii.ToLower(sourceByte, destinationChar, out consumed, out written), i, (char)expectedToLower, destinationChar[0], consumed, written); + destinationChar[0] = default; + VerifySingleChar(Ascii.ToUpper(sourceByte, destinationChar, out consumed, out written), i, (char)expectedToUpper, destinationChar[0], consumed, written); + } + } + + [Fact] + public void SingleCharConversion() + { + char[] sourceChar = new char[1], destinationChar = new char[1]; // this test is "optimized" as it performs a LOT of iterations + byte[] destinationByte = new byte[1]; + + for (int i = 0; i <= char.MaxValue; i++) + { + char expectedLower = char.IsBetween((char)i, 'A', 'Z') ? (char)(i - 'A' + 'a') : (char)i; + char expectedUpper = char.IsBetween((char)i, 'a', 'z') ? (char)(i + 'A' + 'a') : (char)i; + + sourceChar[0] = (char)i; + + // char -> char + destinationChar[0] = default; + VerifySingleChar(Ascii.ToLower(sourceChar, destinationChar, out int consumed, out int written), i, expectedLower, destinationChar[0], consumed, written); + destinationChar[0] = default; + VerifySingleChar(Ascii.ToUpper(sourceChar, destinationChar, out consumed, out written), i, expectedUpper, destinationChar[0], consumed, written); + // char -> byte + destinationByte[0] = default; + VerifySingleChar(Ascii.ToLower(sourceChar, destinationByte, out consumed, out written), i, (byte)expectedLower, destinationByte[0], consumed, written); + destinationByte[0] = default; + VerifySingleChar(Ascii.ToUpper(sourceChar, destinationByte, out consumed, out written), i, (byte)expectedUpper, destinationByte[0], consumed, written); + } + } + + [Theory] + [InlineData("\u00C0bCDe")] // U+00C0 is not ASCII + [InlineData("\u00E0bCDe")] // U+00E0 is not ASCII + public void InvalidCharacters(string sourceChars) + { + char[] destinationChars = new char[sourceChars.Length]; + byte[] sourceBytes = System.Text.Encoding.ASCII.GetBytes(sourceChars); + byte[] destinationBytes = new byte[sourceBytes.Length]; + + // char => char + Verify(Ascii.ToLower(sourceChars, destinationChars, out int consumed, out int written), consumed, written); + Verify(Ascii.ToUpper(sourceChars, destinationChars, out consumed, out written), consumed, written); + // char => byte + Verify(Ascii.ToLower(sourceChars, destinationBytes, out consumed, out written), consumed, written); + Verify(Ascii.ToUpper(sourceChars, destinationBytes, out consumed, out written), consumed, written); + // byte => byte + Verify(Ascii.ToLower(sourceBytes, destinationBytes, out consumed, out written), consumed, written); + Verify(Ascii.ToUpper(sourceBytes, destinationBytes, out consumed, out written), consumed, written); + // byte => char + Verify(Ascii.ToLower(sourceBytes, destinationChars, out consumed, out written), consumed, written); + Verify(Ascii.ToUpper(sourceBytes, destinationChars, out consumed, out written), consumed, written); + + static void Verify(OperationStatus status, int consumed, int written) + { + Assert.Equal(OperationStatus.InvalidData, status); + Assert.Equal(0, consumed); + Assert.Equal(0, written); + } + } + + [Theory] + [InlineData("", "", "")] + [InlineData("Hello", "hello", "HELLO")] + [InlineData("\rHello\n", "\rhello\n", "\rHELLO\n")] + [InlineData("\0xyz\0", "\0xyz\0", "\0XYZ\0")] + [InlineData("\0XYZ\0", "\0xyz\0", "\0XYZ\0")] + [InlineData("AbCdEFgHIJkLmNoPQRStUVwXyZ", "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ")] // should hit vectorized code path + public void MultipleValidCharacterConversion(string sourceChars, string expectedLowerChars, string expectedUpperChars) + { + Assert.Equal(sourceChars.Length, expectedLowerChars.Length); + Assert.Equal(expectedLowerChars.Length, expectedUpperChars.Length); + + byte[] sourceBytes = System.Text.Encoding.ASCII.GetBytes(sourceChars); + byte[] expectedLowerBytes = System.Text.Encoding.ASCII.GetBytes(expectedLowerChars); + byte[] expectedUpperBytes = System.Text.Encoding.ASCII.GetBytes(expectedUpperChars); + char[] destinationChars = new char[expectedLowerChars.Length]; + byte[] destinationBytes = new byte[expectedLowerChars.Length]; + + // char -> char + Verify(Ascii.ToLower(sourceChars, destinationChars, out int consumed, out int written), expectedLowerChars, destinationChars, consumed, written); + Verify(Ascii.ToUpper(sourceChars, destinationChars, out consumed, out written), expectedUpperChars, destinationChars, consumed, written); + // char -> byte + Verify(Ascii.ToLower(sourceChars, destinationBytes, out consumed, out written), expectedLowerBytes, destinationBytes, consumed, written); + Verify(Ascii.ToUpper(sourceChars, destinationBytes, out consumed, out written), expectedUpperBytes, destinationBytes, consumed, written); + // byte -> byte + Verify(Ascii.ToLower(sourceBytes, destinationBytes, out consumed, out written), expectedLowerBytes, destinationBytes, consumed, written); + Verify(Ascii.ToUpper(sourceBytes, destinationBytes, out consumed, out written), expectedUpperBytes, destinationBytes, consumed, written); + // byte -> char + Verify(Ascii.ToLower(sourceBytes, destinationChars, out consumed, out written), expectedLowerChars, destinationChars, consumed, written); + Verify(Ascii.ToUpper(sourceBytes, destinationChars, out consumed, out written), expectedUpperChars, destinationChars, consumed, written); + + static void Verify(OperationStatus status, ReadOnlySpan expected, ReadOnlySpan actual, int consumed, int written) + { + Assert.Equal(OperationStatus.Done, status); + Assert.Equal(expected.Length, consumed); + Assert.Equal(expected.Length, written); + Assert.Equal(expected.ToArray(), actual.ToArray()); + } + } + + [Theory] + [InlineData("Hello", 4, "hell", "HELL")] + [InlineData(" AbC ", 3, " ab", " AB")] + public void DestinationTooSmall(string sourceChars, int destinationSize, string expectedLowerChars, string expectedUpperChars) + { + Assert.NotEqual(sourceChars.Length, destinationSize); + Assert.Equal(destinationSize, expectedLowerChars.Length); + Assert.Equal(expectedLowerChars.Length, expectedUpperChars.Length); + + byte[] sourceBytes = System.Text.Encoding.ASCII.GetBytes(sourceChars); + byte[] expectedLowerBytes = System.Text.Encoding.ASCII.GetBytes(expectedLowerChars); + byte[] expectedUpperBytes = System.Text.Encoding.ASCII.GetBytes(expectedUpperChars); + char[] destinationChars = new char[destinationSize]; + byte[] destinationBytes = new byte[destinationSize]; + + // char -> char + Verify(Ascii.ToLower(sourceChars, destinationChars, out int consumed, out int written), expectedLowerChars, destinationChars, consumed, written); + Verify(Ascii.ToUpper(sourceChars, destinationChars, out consumed, out written), expectedUpperChars, destinationChars, consumed, written); + // char -> byte + Verify(Ascii.ToLower(sourceChars, destinationBytes, out consumed, out written), expectedLowerBytes, destinationBytes, consumed, written); + Verify(Ascii.ToUpper(sourceChars, destinationBytes, out consumed, out written), expectedUpperBytes, destinationBytes, consumed, written); + // byte -> byte + Verify(Ascii.ToLower(sourceBytes, destinationBytes, out consumed, out written), expectedLowerBytes, destinationBytes, consumed, written); + Verify(Ascii.ToUpper(sourceBytes, destinationBytes, out consumed, out written), expectedUpperBytes, destinationBytes, consumed, written); + // byte -> char + Verify(Ascii.ToLower(sourceBytes, destinationChars, out consumed, out written), expectedLowerChars, destinationChars, consumed, written); + Verify(Ascii.ToUpper(sourceBytes, destinationChars, out consumed, out written), expectedUpperChars, destinationChars, consumed, written); + + static void Verify(OperationStatus status, ReadOnlySpan expected, ReadOnlySpan actual, int consumed, int written) + { + Assert.Equal(OperationStatus.DestinationTooSmall, status); + Assert.Equal(actual.Length, consumed); + Assert.Equal(actual.Length, written); + Assert.Equal(expected.ToArray(), actual.ToArray()); + } + } + } +} diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index 3351811c02d394..8a20516e5a50a1 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -13,6 +13,7 @@ + From 6e1ca323eb7a16ef0eacfd6baf3d04011a61548d Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Wed, 31 Aug 2022 11:39:58 +0200 Subject: [PATCH 20/46] implement the missing pieces for case conversions + fix the tests --- .../tests/Ascii/CaseConversionTests.cs | 47 +++++-- .../Buffers/Text/Ascii.CaseConversion.cs | 20 ++- .../src/System/Text/Unicode/Utf16Utility.cs | 29 ++++ .../src/System/Text/Unicode/Utf8Utility.cs | 130 ++++++++++++++++++ 4 files changed, 210 insertions(+), 16 deletions(-) diff --git a/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs b/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs index e654fb6c8e3ee5..442183247aa647 100644 --- a/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Collections.Generic; using System.Runtime.InteropServices; using Xunit; @@ -23,9 +24,9 @@ public void OverlappingBuffers_Throws() Assert.Throws(() => Ascii.ToUpper(byteBuffer.AsSpan(1, 3), byteBuffer.AsSpan(3, 5), out _, out _)); // byte -> char Assert.Throws(() => Ascii.ToLower(byteBuffer, MemoryMarshal.Cast(byteBuffer), out _, out _)); - Assert.Throws(() => Ascii.ToLower(byteBuffer, MemoryMarshal.Cast(byteBuffer).Slice(3, 5), out _, out _)); + Assert.Throws(() => Ascii.ToLower(byteBuffer, MemoryMarshal.Cast(byteBuffer).Slice(1, 3), out _, out _)); Assert.Throws(() => Ascii.ToUpper(byteBuffer, MemoryMarshal.Cast(byteBuffer), out _, out _)); - Assert.Throws(() => Ascii.ToUpper(byteBuffer, MemoryMarshal.Cast(byteBuffer).Slice(3, 5), out _, out _)); + Assert.Throws(() => Ascii.ToUpper(byteBuffer, MemoryMarshal.Cast(byteBuffer).Slice(1, 3), out _, out _)); // char -> char Assert.Throws(() => Ascii.ToLower(charBuffer, charBuffer, out _, out _)); Assert.Throws(() => Ascii.ToLower(charBuffer.AsSpan(1, 3), charBuffer.AsSpan(3, 5), out _, out _)); @@ -33,9 +34,9 @@ public void OverlappingBuffers_Throws() Assert.Throws(() => Ascii.ToUpper(charBuffer.AsSpan(1, 3), charBuffer.AsSpan(3, 5), out _, out _)); // char -> byte Assert.Throws(() => Ascii.ToLower(charBuffer, MemoryMarshal.Cast(charBuffer), out _, out _)); - Assert.Throws(() => Ascii.ToLower(charBuffer, MemoryMarshal.Cast(charBuffer).Slice(3, 5), out _, out _)); + Assert.Throws(() => Ascii.ToLower(charBuffer, MemoryMarshal.Cast(charBuffer).Slice(1, 3), out _, out _)); Assert.Throws(() => Ascii.ToUpper(charBuffer, MemoryMarshal.Cast(charBuffer), out _, out _)); - Assert.Throws(() => Ascii.ToUpper(charBuffer, MemoryMarshal.Cast(charBuffer).Slice(3, 5), out _, out _)); + Assert.Throws(() => Ascii.ToUpper(charBuffer, MemoryMarshal.Cast(charBuffer).Slice(1, 3), out _, out _)); } private static void VerifySingleChar(OperationStatus status, int value, T expected, T actual, int consumed, int written) @@ -67,7 +68,7 @@ public void SingleByteConversion() for (int i = 0; i <= byte.MaxValue; i++) { byte expectedToLower = char.IsBetween((char)i, 'A', 'Z') ? (byte)(i - 'A' + 'a') : (byte)i; - byte expectedToUpper= char.IsBetween((char)i, 'a', 'z') ? (byte)(i + 'A' + 'a') : (byte)i; + byte expectedToUpper = char.IsBetween((char)i, 'a', 'z') ? (byte)(i + 'A' - 'a') : (byte)i; byte[] sourceByte = new byte[1] { (byte)i }; @@ -93,7 +94,7 @@ public void SingleCharConversion() for (int i = 0; i <= char.MaxValue; i++) { char expectedLower = char.IsBetween((char)i, 'A', 'Z') ? (char)(i - 'A' + 'a') : (char)i; - char expectedUpper = char.IsBetween((char)i, 'a', 'z') ? (char)(i + 'A' + 'a') : (char)i; + char expectedUpper = char.IsBetween((char)i, 'a', 'z') ? (char)(i + 'A' - 'a') : (char)i; sourceChar[0] = (char)i; @@ -119,6 +120,11 @@ public void InvalidCharacters(string sourceChars) byte[] sourceBytes = System.Text.Encoding.ASCII.GetBytes(sourceChars); byte[] destinationBytes = new byte[sourceBytes.Length]; + if (sourceBytes[0] <= MaxValidAsciiChar) + { + sourceBytes[0] = MaxValidAsciiChar + 1; // ensure the first byte is invalid (U+00C0 is mapped to valid ascii char by ASCII.GetBytes) + } + // char => char Verify(Ascii.ToLower(sourceChars, destinationChars, out int consumed, out int written), consumed, written); Verify(Ascii.ToUpper(sourceChars, destinationChars, out consumed, out written), consumed, written); @@ -140,13 +146,30 @@ static void Verify(OperationStatus status, int consumed, int written) } } + public static IEnumerable MultipleValidCharacterConversion_Arguments + { + get + { + yield return new object[] { "", "", "" }; + yield return new object[] { "Hello", "hello", "HELLO" }; + yield return new object[] { "\rHello\n", "\rhello\n", "\rHELLO\n" }; + yield return new object[] { "\0xyz\0", "\0xyz\0", "\0XYZ\0" }; + yield return new object[] { "\0XYZ\0", "\0xyz\0", "\0XYZ\0" }; + yield return new object[] { "AbCdEFgHIJkLmNoPQRStUVwXyZ", "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ" }; + + // exercise all possible code paths + for (int i = 1; i <= MaxValidAsciiChar; i++) + { + char expectedLower = char.IsBetween((char)i, 'A', 'Z') ? (char)(i - 'A' + 'a') : (char)i; + char expectedUpper = char.IsBetween((char)i, 'a', 'z') ? (char)(i + 'A' - 'a') : (char)i; + + yield return new object[] { new string((char)i, i), new string(expectedLower, i), new string(expectedUpper, i) }; + } + } + } + [Theory] - [InlineData("", "", "")] - [InlineData("Hello", "hello", "HELLO")] - [InlineData("\rHello\n", "\rhello\n", "\rHELLO\n")] - [InlineData("\0xyz\0", "\0xyz\0", "\0XYZ\0")] - [InlineData("\0XYZ\0", "\0xyz\0", "\0XYZ\0")] - [InlineData("AbCdEFgHIJkLmNoPQRStUVwXyZ", "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ")] // should hit vectorized code path + [MemberData(nameof(MultipleValidCharacterConversion_Arguments))] public void MultipleValidCharacterConversion(string sourceChars, string expectedLowerChars, string expectedUpperChars) { Assert.Equal(sourceChars.Length, expectedLowerChars.Length); diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs index ff29874305d039..9bcbb6a0118f8a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs @@ -51,7 +51,7 @@ private static unsafe OperationStatus ChangeCase(ReadOnlySp where TTo : unmanaged, IBinaryInteger where TCasing : struct { - if (typeof(TFrom) == typeof(TTo) && source.Overlaps(MemoryMarshal.Cast(destination))) + if ((typeof(TFrom) == typeof(TTo) || (Unsafe.SizeOf() * source.Length % Unsafe.SizeOf() == 0)) && source.Overlaps(MemoryMarshal.Cast(destination))) { throw new InvalidOperationException(SR.InvalidOperation_SpanOverlappedOperation); } @@ -197,7 +197,13 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD ulong nextBlockAsUInt64 = Unsafe.ReadUnaligned(&pSrc[i]); if (SourceIsAscii) { - throw new NotImplementedException(); + if (!Utf8Utility.AllBytesInUInt64AreAscii(nextBlockAsUInt64)) + { + goto Drain32; + } + nextBlockAsUInt64 = (ConversionIsToUpper) + ? Utf8Utility.ConvertAllAsciiBytesInUInt64ToUppercase(nextBlockAsUInt64) + : Utf8Utility.ConvertAllAsciiBytesInUInt64ToLowercase(nextBlockAsUInt64); } else { @@ -207,7 +213,7 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD } nextBlockAsUInt64 = (ConversionIsToUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt64ToUppercase(nextBlockAsUInt64) - : throw new NotImplementedException(); + : Utf16Utility.ConvertAllAsciiCharsInUInt64ToLowercase(nextBlockAsUInt64); } if (ConversionIsWidthPreserving) @@ -250,7 +256,13 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD uint nextBlockAsUInt32 = Unsafe.ReadUnaligned(&pSrc[i]); if (SourceIsAscii) { - throw new NotImplementedException(); + if (!Utf8Utility.AllBytesInUInt32AreAscii(nextBlockAsUInt32)) + { + goto DrainRemaining; + } + nextBlockAsUInt32 = (ConversionIsToUpper) + ? Utf8Utility.ConvertAllAsciiBytesInUInt32ToUppercase(nextBlockAsUInt32) + : Utf8Utility.ConvertAllAsciiBytesInUInt32ToLowercase(nextBlockAsUInt32); } else { diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs index 11fea69cd63f3e..0865acee66c756 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf16Utility.cs @@ -114,6 +114,35 @@ internal static ulong ConvertAllAsciiCharsInUInt64ToUppercase(ulong value) return value ^ mask; // bit flip lowercase letters [a-z] => [A-Z] } + /// + /// Given a UInt64 that represents four ASCII UTF-16 characters, returns the invariant + /// lowercase representation of those characters. Requires the input value to contain + /// four ASCII UTF-16 characters in machine endianness. + /// + /// + /// This is a branchless implementation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static ulong ConvertAllAsciiCharsInUInt64ToLowercase(ulong value) + { + // ASSUMPTION: Caller has validated that input value is ASCII. + Debug.Assert(AllCharsInUInt64AreAscii(value)); + + // the 0x80 bit of each word of 'lowerIndicator' will be set iff the word has value >= 'A' + ulong lowerIndicator = value + 0x0080_0080_0080_0080ul - 0x0041_0041_0041_0041ul; + + // the 0x80 bit of each word of 'upperIndicator' will be set iff the word has value > 'Z' + ulong upperIndicator = value + 0x0080_0080_0080_0080ul - 0x005B_005B_005B_005Bul; + + // the 0x80 bit of each word of 'combinedIndicator' will be set iff the word has value >= 'a' and <= 'z' + ulong combinedIndicator = (lowerIndicator ^ upperIndicator); + + // the 0x20 bit of each word of 'mask' will be set iff the word has value >= 'a' and <= 'z' + ulong mask = (combinedIndicator & 0x0080_0080_0080_0080ul) >> 2; + + return value ^ mask; // bit flip uppercase letters [A-Z] => [a-z] + } + /// /// Given a UInt32 that represents two ASCII UTF-16 characters, returns true iff /// the input contains one or more lowercase ASCII characters. diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs index d553441c77cdf2..4d46796d0ab8bf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -42,5 +43,134 @@ public static unsafe int GetIndexOfFirstInvalidUtf8Sequence(ReadOnlySpan u } } + + /// + /// Returns true iff the UInt32 represents four ASCII UTF-8 characters in machine endianness. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static bool AllBytesInUInt32AreAscii(uint value) => (value & ~0x7F7F_7F7Fu) == 0; + + /// + /// Returns true iff the UInt64 represents eighty ASCII UTF-8 characters in machine endianness. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static bool AllBytesInUInt64AreAscii(ulong value) => (value & ~0x7F7F_7F7F_7F7F_7F7Ful) == 0; + + /// + /// Given a UInt32 that represents four ASCII UTF-8 characters, returns the invariant + /// lowercase representation of those characters. Requires the input value to contain + /// four ASCII UTF-8 characters in machine endianness. + /// + /// + /// This is a branchless implementation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static uint ConvertAllAsciiBytesInUInt32ToLowercase(uint value) + { + // ASSUMPTION: Caller has validated that input value is ASCII. + Debug.Assert(AllBytesInUInt32AreAscii(value)); + + // the 0x80 bit of each byte of 'lowerIndicator' will be set iff the word has value >= 'A' + uint lowerIndicator = value + 0x8080_8080u - 0x4141_4141u; + + // the 0x80 bit of each byte of 'upperIndicator' will be set iff the word has value > 'Z' + uint upperIndicator = value + 0x8080_8080u - 0x5B5B_5B5Bu; + + // the 0x80 bit of each byte of 'combinedIndicator' will be set iff the word has value >= 'A' and <= 'Z' + uint combinedIndicator = (lowerIndicator ^ upperIndicator); + + // the 0x20 bit of each byte of 'mask' will be set iff the word has value >= 'A' and <= 'Z' + uint mask = (combinedIndicator & 0x8080_8080u) >> 2; + + return value ^ mask; // bit flip uppercase letters [A-Z] => [a-z] + } + + /// + /// Given a UInt32 that represents four ASCII UTF-8 characters, returns the invariant + /// uppercase representation of those characters. Requires the input value to contain + /// four ASCII UTF-8 characters in machine endianness. + /// + /// + /// This is a branchless implementation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static uint ConvertAllAsciiBytesInUInt32ToUppercase(uint value) + { + // Intrinsified in mono interpreter + // ASSUMPTION: Caller has validated that input value is ASCII. + Debug.Assert(AllBytesInUInt32AreAscii(value)); + + // the 0x80 bit of each byte of 'lowerIndicator' will be set iff the word has value >= 'a' + uint lowerIndicator = value + 0x8080_8080u - 0x6161_6161u; + + // the 0x80 bit of each byte of 'upperIndicator' will be set iff the word has value > 'z' + uint upperIndicator = value + 0x8080_8080u - 0x7B7B_7B7Bu; + + // the 0x80 bit of each byte of 'combinedIndicator' will be set iff the word has value >= 'a' and <= 'z' + uint combinedIndicator = (lowerIndicator ^ upperIndicator); + + // the 0x20 bit of each byte of 'mask' will be set iff the word has value >= 'a' and <= 'z' + uint mask = (combinedIndicator & 0x8080_8080u) >> 2; + + return value ^ mask; // bit flip lowercase letters [a-z] => [A-Z] + } + + /// + /// Given a UInt64 that represents eight ASCII UTF-8 characters, returns the invariant + /// uppercase representation of those characters. Requires the input value to contain + /// eight ASCII UTF-8 characters in machine endianness. + /// + /// + /// This is a branchless implementation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static ulong ConvertAllAsciiBytesInUInt64ToUppercase(ulong value) + { + // ASSUMPTION: Caller has validated that input value is ASCII. + Debug.Assert(AllBytesInUInt64AreAscii(value)); + + // the 0x80 bit of each byte of 'lowerIndicator' will be set iff the word has value >= 'a' + ulong lowerIndicator = value + 0x8080_8080_8080_8080ul - 0x6161_6161_6161_6161ul; + + // the 0x80 bit of each byte of 'upperIndicator' will be set iff the word has value > 'z' + ulong upperIndicator = value + 0x8080_8080_8080_8080ul - 0x7B7B_7B7B_7B7B_7B7Bul; + + // the 0x80 bit of each byte of 'combinedIndicator' will be set iff the word has value >= 'a' and <= 'z' + ulong combinedIndicator = (lowerIndicator ^ upperIndicator); + + // the 0x20 bit of each byte of 'mask' will be set iff the word has value >= 'a' and <= 'z' + ulong mask = (combinedIndicator & 0x8080_8080_8080_8080ul) >> 2; + + return value ^ mask; // bit flip lowercase letters [a-z] => [A-Z] + } + + /// + /// Given a UInt64 that represents eight ASCII UTF-8 characters, returns the invariant + /// uppercase representation of those characters. Requires the input value to contain + /// eight ASCII UTF-8 characters in machine endianness. + /// + /// + /// This is a branchless implementation. + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static ulong ConvertAllAsciiBytesInUInt64ToLowercase(ulong value) + { + // ASSUMPTION: Caller has validated that input value is ASCII. + Debug.Assert(AllBytesInUInt64AreAscii(value)); + + // the 0x80 bit of each byte of 'lowerIndicator' will be set iff the word has value >= 'A' + ulong lowerIndicator = value + 0x8080_8080_8080_8080ul - 0x4141_4141_4141_4141ul; + + // the 0x80 bit of each byte of 'upperIndicator' will be set iff the word has value > 'Z' + ulong upperIndicator = value + 0x8080_8080_8080_8080ul - 0x5B5B_5B5B_5B5B_5B5Bul; + + // the 0x80 bit of each byte of 'combinedIndicator' will be set iff the word has value >= 'a' and <= 'z' + ulong combinedIndicator = (lowerIndicator ^ upperIndicator); + + // the 0x20 bit of each byte of 'mask' will be set iff the word has value >= 'a' and <= 'z' + ulong mask = (combinedIndicator & 0x8080_8080_8080_8080ul) >> 2; + + return value ^ mask; // bit flip uppercase letters [A-Z] => [a-z] + } } } From 4339af55311d04ed9e320c2563d20cc3ea52e86f Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Wed, 31 Aug 2022 15:16:32 +0200 Subject: [PATCH 21/46] implement TryToLowerInPlace/TryToUpperInPlace --- .../tests/Ascii/CaseConversionTests.cs | 68 ++++++++++++++----- .../Buffers/Text/Ascii.CaseConversion.cs | 30 ++++++++ .../System.Runtime/ref/System.Runtime.cs | 4 ++ 3 files changed, 84 insertions(+), 18 deletions(-) diff --git a/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs b/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs index 442183247aa647..6365f482d84818 100644 --- a/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Collections.Generic; +using System.Linq; using System.Runtime.InteropServices; using Xunit; @@ -126,24 +127,37 @@ public void InvalidCharacters(string sourceChars) } // char => char - Verify(Ascii.ToLower(sourceChars, destinationChars, out int consumed, out int written), consumed, written); - Verify(Ascii.ToUpper(sourceChars, destinationChars, out consumed, out written), consumed, written); + VerifyStatus(Ascii.ToLower(sourceChars, destinationChars, out int consumed, out int written), consumed, written); + VerifyStatus(Ascii.ToUpper(sourceChars, destinationChars, out consumed, out written), consumed, written); // char => byte - Verify(Ascii.ToLower(sourceChars, destinationBytes, out consumed, out written), consumed, written); - Verify(Ascii.ToUpper(sourceChars, destinationBytes, out consumed, out written), consumed, written); + VerifyStatus(Ascii.ToLower(sourceChars, destinationBytes, out consumed, out written), consumed, written); + VerifyStatus(Ascii.ToUpper(sourceChars, destinationBytes, out consumed, out written), consumed, written); // byte => byte - Verify(Ascii.ToLower(sourceBytes, destinationBytes, out consumed, out written), consumed, written); - Verify(Ascii.ToUpper(sourceBytes, destinationBytes, out consumed, out written), consumed, written); + VerifyStatus(Ascii.ToLower(sourceBytes, destinationBytes, out consumed, out written), consumed, written); + VerifyStatus(Ascii.ToUpper(sourceBytes, destinationBytes, out consumed, out written), consumed, written); // byte => char - Verify(Ascii.ToLower(sourceBytes, destinationChars, out consumed, out written), consumed, written); - Verify(Ascii.ToUpper(sourceBytes, destinationChars, out consumed, out written), consumed, written); + VerifyStatus(Ascii.ToLower(sourceBytes, destinationChars, out consumed, out written), consumed, written); + VerifyStatus(Ascii.ToUpper(sourceBytes, destinationChars, out consumed, out written), consumed, written); - static void Verify(OperationStatus status, int consumed, int written) + // Try(byte) + VerifyBool(Ascii.TryToLowerInPlace(sourceBytes, out int processed), processed); + VerifyBool(Ascii.TryToUpperInPlace(sourceBytes, out processed), processed); + // Try(char) + VerifyBool(Ascii.TryToLowerInPlace(sourceChars.ToCharArray(), out processed), processed); + VerifyBool(Ascii.TryToUpperInPlace(sourceChars.ToCharArray(), out processed), processed); + + static void VerifyStatus(OperationStatus status, int consumed, int written) { Assert.Equal(OperationStatus.InvalidData, status); Assert.Equal(0, consumed); Assert.Equal(0, written); } + + static void VerifyBool(bool result, int processed) + { + Assert.False(result); + Assert.Equal(0, processed); + } } public static IEnumerable MultipleValidCharacterConversion_Arguments @@ -182,25 +196,43 @@ public void MultipleValidCharacterConversion(string sourceChars, string expected byte[] destinationBytes = new byte[expectedLowerChars.Length]; // char -> char - Verify(Ascii.ToLower(sourceChars, destinationChars, out int consumed, out int written), expectedLowerChars, destinationChars, consumed, written); - Verify(Ascii.ToUpper(sourceChars, destinationChars, out consumed, out written), expectedUpperChars, destinationChars, consumed, written); + VerifyStatus(Ascii.ToLower(sourceChars, destinationChars, out int consumed, out int written), expectedLowerChars, destinationChars, consumed, written); + VerifyStatus(Ascii.ToUpper(sourceChars, destinationChars, out consumed, out written), expectedUpperChars, destinationChars, consumed, written); // char -> byte - Verify(Ascii.ToLower(sourceChars, destinationBytes, out consumed, out written), expectedLowerBytes, destinationBytes, consumed, written); - Verify(Ascii.ToUpper(sourceChars, destinationBytes, out consumed, out written), expectedUpperBytes, destinationBytes, consumed, written); + VerifyStatus(Ascii.ToLower(sourceChars, destinationBytes, out consumed, out written), expectedLowerBytes, destinationBytes, consumed, written); + VerifyStatus(Ascii.ToUpper(sourceChars, destinationBytes, out consumed, out written), expectedUpperBytes, destinationBytes, consumed, written); // byte -> byte - Verify(Ascii.ToLower(sourceBytes, destinationBytes, out consumed, out written), expectedLowerBytes, destinationBytes, consumed, written); - Verify(Ascii.ToUpper(sourceBytes, destinationBytes, out consumed, out written), expectedUpperBytes, destinationBytes, consumed, written); + VerifyStatus(Ascii.ToLower(sourceBytes, destinationBytes, out consumed, out written), expectedLowerBytes, destinationBytes, consumed, written); + VerifyStatus(Ascii.ToUpper(sourceBytes, destinationBytes, out consumed, out written), expectedUpperBytes, destinationBytes, consumed, written); // byte -> char - Verify(Ascii.ToLower(sourceBytes, destinationChars, out consumed, out written), expectedLowerChars, destinationChars, consumed, written); - Verify(Ascii.ToUpper(sourceBytes, destinationChars, out consumed, out written), expectedUpperChars, destinationChars, consumed, written); + VerifyStatus(Ascii.ToLower(sourceBytes, destinationChars, out consumed, out written), expectedLowerChars, destinationChars, consumed, written); + VerifyStatus(Ascii.ToUpper(sourceBytes, destinationChars, out consumed, out written), expectedUpperChars, destinationChars, consumed, written); - static void Verify(OperationStatus status, ReadOnlySpan expected, ReadOnlySpan actual, int consumed, int written) + // Try(byte) + byte[] sourceBytesCopy = sourceBytes.ToArray(); + VerifyBool(Ascii.TryToLowerInPlace(sourceBytesCopy, out int processed), processed, expectedLowerBytes, sourceBytesCopy); + sourceBytesCopy = sourceBytes.ToArray(); + VerifyBool(Ascii.TryToUpperInPlace(sourceBytesCopy, out processed), processed, expectedUpperBytes, sourceBytesCopy); + // Try(char) + char[] sourceCharsCopy = sourceChars.ToCharArray(); + VerifyBool(Ascii.TryToLowerInPlace(sourceCharsCopy, out processed), processed, expectedLowerChars.ToCharArray(), sourceCharsCopy); + sourceCharsCopy = sourceChars.ToCharArray(); + VerifyBool(Ascii.TryToUpperInPlace(sourceCharsCopy, out processed), processed, expectedUpperChars.ToCharArray(), sourceCharsCopy); + + static void VerifyStatus(OperationStatus status, ReadOnlySpan expected, ReadOnlySpan actual, int consumed, int written) { Assert.Equal(OperationStatus.Done, status); Assert.Equal(expected.Length, consumed); Assert.Equal(expected.Length, written); Assert.Equal(expected.ToArray(), actual.ToArray()); } + + static void VerifyBool(bool result, int processed, T[] expected, T[] actual) + { + Assert.True(result); + Assert.Equal(expected.Length, processed); + Assert.Equal(expected, actual); + } } [Theory] diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs index 9bcbb6a0118f8a..35e1f0a89ccfa3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs @@ -46,6 +46,22 @@ public static OperationStatus ToLower(ReadOnlySpan source, Span dest public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int charsConsumed, out int bytesWritten) => ChangeCase(MemoryMarshal.Cast(source), destination, out charsConsumed, out bytesWritten); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool TryToLowerInPlace(Span value, out int bytesProcessed) + => TryChangeCase(value, out bytesProcessed); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool TryToLowerInPlace(Span value, out int charsProcessed) + => TryChangeCase(MemoryMarshal.Cast(value), out charsProcessed); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool TryToUpperInPlace(Span value, out int bytesProcessed) + => TryChangeCase(value, out bytesProcessed); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool TryToUpperInPlace(Span value, out int charsProcessed) + => TryChangeCase(MemoryMarshal.Cast(value), out charsProcessed); + private static unsafe OperationStatus ChangeCase(ReadOnlySpan source, Span destination, out int sourceElementsConsumed, out int destinationElementsWritten) where TFrom : unmanaged, IBinaryInteger where TTo : unmanaged, IBinaryInteger @@ -82,6 +98,20 @@ private static unsafe OperationStatus ChangeCase(ReadOnlySp } } + private static unsafe bool TryChangeCase(Span buffer, out int elementsProcessed) + where T : unmanaged, IBinaryInteger + where TCasing : struct + { + fixed (T* pBuffer = &MemoryMarshal.GetReference(buffer)) + { + nuint numElementsActuallyConverted = ChangeCase(pBuffer, pBuffer, (nuint)buffer.Length); + Debug.Assert(numElementsActuallyConverted <= (nuint)buffer.Length); + + elementsProcessed = (int)numElementsActuallyConverted; + return elementsProcessed == buffer.Length; + } + } + private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pDest, nuint elementCount) where TFrom : unmanaged, IBinaryInteger where TTo : unmanaged, IBinaryInteger diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index 554f9d8581a3d7..4ea9fb4d9fb169 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -7106,6 +7106,10 @@ public static class Ascii public static System.Buffers.OperationStatus ToUpper(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int charsWritten) { throw null; } public static System.Buffers.OperationStatus ToUpper(System.ReadOnlySpan source, System.Span destination, out int charsConsumed, out int bytesWritten) { throw null; } public static System.Buffers.OperationStatus ToUtf16(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int charsWritten) { throw null; } + public static bool TryToLowerInPlace(System.Span value, out int bytesProcessed) { throw null; } + public static bool TryToLowerInPlace(System.Span value, out int charsProcessed) { throw null; } + public static bool TryToUpperInPlace(System.Span value, out int bytesProcessed) { throw null; } + public static bool TryToUpperInPlace(System.Span value, out int charsProcessed) { throw null; } public static System.Range Trim(System.ReadOnlySpan value) { throw null; } public static System.Range Trim(System.ReadOnlySpan value) { throw null; } public static System.Range TrimEnd(System.ReadOnlySpan value) { throw null; } From cc3be10e32914f17b43cdf5084bd9b6a2f314af1 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Wed, 31 Aug 2022 16:17:42 +0200 Subject: [PATCH 22/46] implement Ascii.StartsWith* and EndsWith* methods --- .../tests/Ascii/StartsEndsWithTests.cs | 188 +++++++++ .../tests/System.Memory.Tests.csproj | 1 + .../System.Private.CoreLib.Shared.projitems | 1 + .../System/Buffers/Text/Ascii.Searching.cs | 384 ++++++++++++++++++ .../System.Runtime/ref/System.Runtime.cs | 12 + 5 files changed, 586 insertions(+) create mode 100644 src/libraries/System.Memory/tests/Ascii/StartsEndsWithTests.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs diff --git a/src/libraries/System.Memory/tests/Ascii/StartsEndsWithTests.cs b/src/libraries/System.Memory/tests/Ascii/StartsEndsWithTests.cs new file mode 100644 index 00000000000000..0d5bd41c1e465d --- /dev/null +++ b/src/libraries/System.Memory/tests/Ascii/StartsEndsWithTests.cs @@ -0,0 +1,188 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.Intrinsics; +using System.Text; +using Xunit; + +namespace System.Buffers.Text.Tests +{ + public class StartsEndsWithTests + { + [Fact] + public void InvalidCharactersInValueThrows() + { + Assert.Throws(() => Ascii.StartsWith("aaaa"u8, "\u00C0")); // non-vectorized code path + Assert.Throws(() => Ascii.StartsWith("aaaaaaaaaaaaaaaaaaaaaaaaa"u8, "aaaaaaaaaaaaaaaaaaaaaaaa\u00C0")); // vectorized code path + Assert.Throws(() => Ascii.StartsWith("aaaa", new byte[] { 128 })); + Assert.Throws(() => Ascii.StartsWith(new string('a', 50), Enumerable.Repeat((byte)'a', 49).Concat(new byte[] { 128 }).ToArray())); + Assert.Throws(() => Ascii.StartsWithIgnoreCase("aaaa"u8, "\u00C0")); + Assert.Throws(() => Ascii.StartsWithIgnoreCase("aaaa", "\u00C0")); + Assert.Throws(() => Ascii.StartsWithIgnoreCase("aaaa"u8, new byte[] { 128 })); + Assert.Throws(() => Ascii.StartsWithIgnoreCase("aaaa", new byte[] { 128 })); + + Assert.Throws(() => Ascii.EndsWith("aaaa"u8, "\u00C0")); // non-vectorized code path + Assert.Throws(() => Ascii.EndsWith("aaaaaaaaaaaaaaaaaaaaaaaaa"u8, "aaaaaaaaaaaaaaaaaaaaaaaa\u00C0")); // vectorized code path + Assert.Throws(() => Ascii.EndsWith("aaaa", new byte[] { 128 })); + Assert.Throws(() => Ascii.EndsWith(new string('a', 50), Enumerable.Repeat((byte)'a', 49).Concat(new byte[] { 128 }).ToArray())); + Assert.Throws(() => Ascii.EndsWithIgnoreCase("aaaa"u8, "\u00C0")); + Assert.Throws(() => Ascii.EndsWithIgnoreCase("aaaa", "\u00C0")); + Assert.Throws(() => Ascii.EndsWithIgnoreCase("aaaa"u8, new byte[] { 128 })); + Assert.Throws(() => Ascii.EndsWithIgnoreCase("aaaa", new byte[] { 128 })); + } + + public static IEnumerable ExactMatchFound_TestData + { + get + { + yield return new object[] { "test", "test" }; + yield return new object[] { "test", "t" }; + yield return new object[] { "test", "" }; + + for (int textLength = 1; textLength <= Vector128.Count * 4 + 1; textLength++) + { + for (int valueLength = 0; valueLength <= textLength; valueLength++) + { + char ascii = (char)(textLength % 128); + yield return new object[] { new string(ascii, textLength), new string(ascii, valueLength) }; + } + } + } + } + + [Theory] + [MemberData(nameof(ExactMatchFound_TestData))] + public void MatchFound(string text, string value) + { + Assert.True(Ascii.StartsWith(text, Encoding.ASCII.GetBytes(value))); + Assert.True(Ascii.StartsWith(Encoding.ASCII.GetBytes(text), value)); + Assert.True(Ascii.StartsWithIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); + Assert.True(Ascii.StartsWithIgnoreCase(text, value)); + Assert.True(Ascii.StartsWithIgnoreCase(Encoding.ASCII.GetBytes(text), value)); + Assert.True(Ascii.StartsWithIgnoreCase(text, Encoding.ASCII.GetBytes(value))); + + Assert.True(Ascii.EndsWith(text, Encoding.ASCII.GetBytes(value))); + Assert.True(Ascii.EndsWith(Encoding.ASCII.GetBytes(text), value)); + Assert.True(Ascii.EndsWithIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); + Assert.True(Ascii.EndsWithIgnoreCase(text, value)); + Assert.True(Ascii.EndsWithIgnoreCase(Encoding.ASCII.GetBytes(text), value)); + Assert.True(Ascii.EndsWithIgnoreCase(text, Encoding.ASCII.GetBytes(value))); + } + + public static IEnumerable IgnoreCaseMatchFound_TestData + { + get + { + yield return new object[] { "test", "TEST" }; + yield return new object[] { "test", "T" }; + yield return new object[] { "test", "" }; + + for (int textLength = 1; textLength <= Vector128.Count * 4 + 1; textLength++) + { + for (int valueLength = 0; valueLength <= textLength; valueLength++) + { + char t = (char)(textLength % 128); + char v = char.IsAsciiLetterUpper(t) ? char.ToLower(t) : char.IsAsciiLetterLower(t) ? char.ToUpper(t) : t; + yield return new object[] { new string(t, textLength), new string(v, valueLength) }; + } + } + } + } + + [Theory] + [MemberData(nameof(IgnoreCaseMatchFound_TestData))] + public void IgnoreCaseMatchFound(string text, string value) + { + Assert.True(Ascii.StartsWithIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); + Assert.True(Ascii.StartsWithIgnoreCase(text, value)); + Assert.True(Ascii.StartsWithIgnoreCase(Encoding.ASCII.GetBytes(text), value)); + Assert.True(Ascii.StartsWithIgnoreCase(text, Encoding.ASCII.GetBytes(value))); + + Assert.True(Ascii.EndsWithIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); + Assert.True(Ascii.EndsWithIgnoreCase(text, value)); + Assert.True(Ascii.EndsWithIgnoreCase(Encoding.ASCII.GetBytes(text), value)); + Assert.True(Ascii.EndsWithIgnoreCase(text, Encoding.ASCII.GetBytes(value))); + } + + public static IEnumerable ExactMatchNotFound_TestData + { + get + { + yield return new object[] { "test", "tesT" }; + yield return new object[] { "test", "Test" }; + yield return new object[] { "test", "T" }; + yield return new object[] { "test", "!" }; + + for (int textLength = 1; textLength <= Vector128.Count * 4 + 1; textLength++) + { + yield return new object[] { new string('a', textLength), new string('b', 1) }; + + for (int valueLength = 1; valueLength <= textLength; valueLength++) + { + yield return new object[] { new string('a', textLength), string.Create(valueLength, valueLength / 2, (destination, index) => + { + destination.Fill('a'); + destination[index] = 'b'; + })}; + } + } + } + } + + [Theory] + [MemberData(nameof(ExactMatchNotFound_TestData))] + public void ExactMatchNotFound(string text, string value) + { + Assert.False(Ascii.StartsWith(text, Encoding.ASCII.GetBytes(value))); + Assert.False(Ascii.StartsWith(Encoding.ASCII.GetBytes(text), value)); + + Assert.False(Ascii.EndsWith(text, Encoding.ASCII.GetBytes(value))); + Assert.False(Ascii.EndsWith(Encoding.ASCII.GetBytes(text), value)); + } + + public static IEnumerable IgnoreCaseMatchNotFound_TestData + { + get + { + yield return new object[] { "test", "tes#" }; + yield return new object[] { "test", "T2st" }; + yield return new object[] { "test", "1" }; + yield return new object[] { "test", "#" }; + + for (int textLength = 1; textLength <= Vector128.Count * 4 + 1; textLength++) + { + yield return new object[] { new string('a', textLength), new string('b', 1) }; + + for (int valueLength = 1; valueLength <= textLength; valueLength++) + { + char t = (char)(textLength % 128); + char v = (char)(t != 127 ? t + 1 : 126); + + yield return new object[] { new string(t, textLength), string.Create(valueLength, (t, v), (destination, chars) => + { + destination.Fill(chars.t); + destination[destination.Length / 2] = chars.v; + })}; + } + } + } + } + + [Theory] + [MemberData(nameof(IgnoreCaseMatchNotFound_TestData))] + public void IgnoreCaseMatchNotFound(string text, string value) + { + Assert.False(Ascii.StartsWithIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); + Assert.False(Ascii.StartsWithIgnoreCase(text, value)); + Assert.False(Ascii.StartsWithIgnoreCase(Encoding.ASCII.GetBytes(text), value)); + Assert.False(Ascii.StartsWithIgnoreCase(text, Encoding.ASCII.GetBytes(value))); + + Assert.False(Ascii.EndsWithIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); + Assert.False(Ascii.EndsWithIgnoreCase(text, value)); + Assert.False(Ascii.EndsWithIgnoreCase(Encoding.ASCII.GetBytes(text), value)); + Assert.False(Ascii.EndsWithIgnoreCase(text, Encoding.ASCII.GetBytes(value))); + } + } +} diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index 8a20516e5a50a1..07ec6a2adb83bb 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -16,6 +16,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 5c3779e99bc7c4..23bd397abaf7bb 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -116,6 +116,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs new file mode 100644 index 00000000000000..97471fc0c37d65 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs @@ -0,0 +1,384 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Diagnostics.CodeAnalysis; +using System.Globalization; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Text; +using System.Text.Unicode; + +namespace System.Buffers.Text +{ + public static partial class Ascii + { + public static unsafe bool StartsWith(ReadOnlySpan text, ReadOnlySpan value) + => value.IsEmpty || (text.Length >= value.Length && Map(Equals(value, text.Slice(0, value.Length)))); + + public static unsafe bool EndsWith(ReadOnlySpan text, ReadOnlySpan value) + => value.IsEmpty || (text.Length >= value.Length && Map(Equals(value, text.Slice(text.Length - value.Length)))); + + public static unsafe bool StartsWith(ReadOnlySpan text, ReadOnlySpan value) + => value.IsEmpty || (text.Length >= value.Length && Map(Equals(text.Slice(0, value.Length), value))); + + public static unsafe bool EndsWith(ReadOnlySpan text, ReadOnlySpan value) + => value.IsEmpty || (text.Length >= value.Length && Map(Equals(text.Slice(text.Length - value.Length), value))); + + public static bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(0, value.Length), value))); + + public static bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(text.Length - value.Length), value))); + + // TODO adsitnik: discuss whether this overload should exists, as the only difference with ROS.StartsWith(ROS, StringComparison.OrdinalIgnoreCase) + // is throwing an exception for non-ASCII characters found in value + public static bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + { + if (value.IsEmpty) + { + return true; + } + else if (!IsAscii(value)) + { + ThrowNonAsciiFound(); + } + else if (value.Length > text.Length) + { + return false; + } + + return Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(text), ref MemoryMarshal.GetReference(value), value.Length); + } + + public static bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + { + if (value.IsEmpty) + { + return true; + } + else if (!IsAscii(value)) + { + ThrowNonAsciiFound(); + } + else if (value.Length > text.Length) + { + return false; + } + + return Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(text.Slice(text.Length - value.Length)), ref MemoryMarshal.GetReference(value), value.Length); + } + + public static unsafe bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(value, text.Slice(0, value.Length)))); + + public static unsafe bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(value, text.Slice(text.Length - value.Length)))); + + public static unsafe bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(0, value.Length), value))); + + public static unsafe bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(text.Length - value.Length), value))); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool Map(EqualsResult equalsResult) + => equalsResult switch + { + EqualsResult.NonAsciiFound => ThrowNonAsciiFound(), + EqualsResult.Match => true, + _ => false + }; + + [DoesNotReturn] + private static bool ThrowNonAsciiFound() => throw new ArgumentException("TODO adsitnik", "value"); + + private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpan bytes) where TCheck : struct + { + Debug.Assert(typeof(TCheck) == typeof(byte) || typeof(TCheck) == typeof(char)); + Debug.Assert(chars.Length == bytes.Length); + + if (!Vector128.IsHardwareAccelerated || chars.Length < Vector128.Count) + { + for (int i = 0; i < chars.Length; i++) + { + char c = chars[i]; + byte b = bytes[i]; + + if (typeof(TCheck) == typeof(char)) + { + if (!UnicodeUtility.IsAsciiCodePoint(c)) + { + return EqualsResult.NonAsciiFound; + } + } + else if (typeof(TCheck) == typeof(byte)) + { + if (!UnicodeUtility.IsAsciiCodePoint(b)) + { + return EqualsResult.NonAsciiFound; + } + } + + if (c != b) + { + return EqualsResult.NoMatch; + } + } + } + else if (Vector256.IsHardwareAccelerated && chars.Length >= Vector256.Count) + { + ref ushort currentCharsSearchSpace = ref Unsafe.As(ref MemoryMarshal.GetReference(chars)); + ref ushort oneVectorAwayFromCharsEnd = ref Unsafe.Add(ref currentCharsSearchSpace, chars.Length - Vector256.Count); + ref byte currentBytesSearchSpace = ref MemoryMarshal.GetReference(bytes); + ref byte oneVectorAwayFromBytesEnd = ref Unsafe.Add(ref currentBytesSearchSpace, bytes.Length - Vector128.Count); + + Vector128 byteValues; + Vector256 charValues; + + // Loop until either we've finished all elements or there's less than a vector's-worth remaining. + do + { + charValues = Vector256.LoadUnsafe(ref currentCharsSearchSpace); + byteValues = Vector128.LoadUnsafe(ref currentBytesSearchSpace); + + if (typeof(TCheck) == typeof(char)) + { + if (charValues.AsByte().ExtractMostSignificantBits() != 0) + { + return EqualsResult.NonAsciiFound; + } + } + else if (typeof(TCheck) == typeof(byte)) + { + if (byteValues.ExtractMostSignificantBits() != 0) + { + return EqualsResult.NonAsciiFound; + } + } + + // it's OK to widen the bytes, it's NOT OK to narrow the chars (we could loose some information) + if (Vector256.Equals(Widen(byteValues), charValues) != Vector256.AllBitsSet) + { + return EqualsResult.NoMatch; + } + + currentCharsSearchSpace = ref Unsafe.Add(ref currentCharsSearchSpace, Vector256.Count); + currentBytesSearchSpace = ref Unsafe.Add(ref currentBytesSearchSpace, Vector128.Count); + } + while (!Unsafe.IsAddressGreaterThan(ref currentCharsSearchSpace, ref oneVectorAwayFromCharsEnd)); + + // If any elements remain, process the first vector in the search space. + if ((uint)chars.Length % Vector256.Count != 0) + { + charValues = Vector256.LoadUnsafe(ref oneVectorAwayFromCharsEnd); + byteValues = Vector128.LoadUnsafe(ref oneVectorAwayFromBytesEnd); + + if (typeof(TCheck) == typeof(char)) + { + if (charValues.AsByte().ExtractMostSignificantBits() != 0) + { + return EqualsResult.NonAsciiFound; + } + } + else if (typeof(TCheck) == typeof(byte)) + { + if (byteValues.ExtractMostSignificantBits() != 0) + { + return EqualsResult.NonAsciiFound; + } + } + + // it's OK to widen the bytes, it's NOT OK to narrow the chars (we could loose some information) + if (Vector256.Equals(Widen(byteValues), charValues) != Vector256.AllBitsSet) + { + return EqualsResult.NoMatch; + } + } + } + else + { + ref ushort currentCharsSearchSpace = ref Unsafe.As(ref MemoryMarshal.GetReference(chars)); + ref ushort oneVectorAwayFromCharsEnd = ref Unsafe.Add(ref currentCharsSearchSpace, chars.Length - Vector128.Count); + ref byte currentBytesSearchSpace = ref MemoryMarshal.GetReference(bytes); + ref byte oneVectorAwayFromBytesEnd = ref Unsafe.Add(ref currentBytesSearchSpace, bytes.Length - Vector64.Count); + + Vector64 byteValues; + Vector128 charValues; + + // Loop until either we've finished all elements or there's less than a vector's-worth remaining. + do + { + charValues = Vector128.LoadUnsafe(ref currentCharsSearchSpace); + byteValues = Vector64.LoadUnsafe(ref currentBytesSearchSpace); + + if (typeof(TCheck) == typeof(char)) + { + if (ASCIIUtility.VectorContainsNonAsciiChar(charValues)) + { + return EqualsResult.NonAsciiFound; + } + } + else if (typeof(TCheck) == typeof(byte)) + { + if (VectorContainsNonAsciiChar(byteValues)) + { + return EqualsResult.NonAsciiFound; + } + } + + // it's OK to widen the bytes, it's NOT OK to narrow the chars (we could loose some information) + if (Vector128.Equals(Widen(byteValues), charValues) != Vector128.AllBitsSet) + { + return EqualsResult.NoMatch; + } + + currentCharsSearchSpace = ref Unsafe.Add(ref currentCharsSearchSpace, Vector128.Count); + currentBytesSearchSpace = ref Unsafe.Add(ref currentBytesSearchSpace, Vector64.Count); + } + while (!Unsafe.IsAddressGreaterThan(ref currentCharsSearchSpace, ref oneVectorAwayFromCharsEnd)); + + // If any elements remain, process the first vector in the search space. + if ((uint)chars.Length % Vector128.Count != 0) + { + charValues = Vector128.LoadUnsafe(ref oneVectorAwayFromCharsEnd); + byteValues = Vector64.LoadUnsafe(ref oneVectorAwayFromBytesEnd); + + if (typeof(TCheck) == typeof(char)) + { + if (ASCIIUtility.VectorContainsNonAsciiChar(charValues)) + { + return EqualsResult.NonAsciiFound; + } + } + else if (typeof(TCheck) == typeof(byte)) + { + if (VectorContainsNonAsciiChar(byteValues)) + { + return EqualsResult.NonAsciiFound; + } + } + + // it's OK to widen the bytes, it's NOT OK to narrow the chars (we could loose some information) + if (Vector128.Equals(Widen(byteValues), charValues) != Vector128.AllBitsSet) + { + return EqualsResult.NoMatch; + } + } + } + + return EqualsResult.Match; + } + + private static EqualsResult EqualsIgnoreCase(ReadOnlySpan chars, ReadOnlySpan bytes) where TCheck : struct + { + Debug.Assert(chars.Length == bytes.Length); + + for (int i = 0; i < chars.Length; i++) + { + uint valueA = chars[i]; + uint valueB = bytes[i]; + + if (typeof(TCheck) == typeof(char)) + { + if (!UnicodeUtility.IsAsciiCodePoint(valueA)) + { + return EqualsResult.NonAsciiFound; + } + } + else if (typeof(TCheck) == typeof(byte)) + { + if (!UnicodeUtility.IsAsciiCodePoint(valueB)) + { + return EqualsResult.NonAsciiFound; + } + } + + if (valueA == valueB) + { + continue; // exact match + } + + valueA |= 0x20u; + if ((uint)(valueA - 'a') > (uint)('z' - 'a')) + { + return EqualsResult.NoMatch; // not exact match, and first input isn't in [A-Za-z] + } + + if (valueA != (valueB | 0x20u)) + { + return EqualsResult.NoMatch; + } + } + + return EqualsResult.Match; + } + + private static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + { + Debug.Assert(text.Length == value.Length); + + for (int i = 0; i < text.Length; i++) + { + uint valueA = text[i]; + uint valueB = value[i]; + + if (!UnicodeUtility.IsAsciiCodePoint(valueB)) + { + return EqualsResult.NonAsciiFound; // value must not contain non-ASCII characters + } + + if (valueA == valueB) + { + continue; // exact match + } + + valueA |= 0x20u; + if ((uint)(valueA - 'a') > (uint)('z' - 'a')) + { + return EqualsResult.NoMatch; // not exact match, and first input isn't in [A-Za-z] + } + + if (valueA != (valueB | 0x20u)) + { + return EqualsResult.NoMatch; + } + } + + return EqualsResult.Match; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128 Widen(Vector64 bytes) + { + if (AdvSimd.IsSupported) + { + return AdvSimd.ZeroExtendWideningLower(bytes); + } + else + { + (Vector64 lower, Vector64 upper) = Vector64.Widen(bytes); + return Vector128.Create(lower, upper); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector256 Widen(Vector128 bytes) + { + (Vector128 lower, Vector128 upper) = Vector128.Widen(bytes); + return Vector256.Create(lower, upper); + } + + private static bool VectorContainsNonAsciiChar(Vector64 bytes) + => !Utf8Utility.AllBytesInUInt64AreAscii(bytes.AsUInt64().ToScalar()); + + private enum EqualsResult + { + NoMatch, + Match, + NonAsciiFound + } + } +} diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index 4ea9fb4d9fb169..a521464a349cab 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -7097,6 +7097,18 @@ public static class Ascii public static int GetIndexOfFirstNonAsciiChar(System.ReadOnlySpan buffer) { throw null; } public static bool IsAscii(System.ReadOnlySpan value) { throw null; } public static bool IsAscii(System.ReadOnlySpan value) { throw null; } + public static bool EndsWith(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static bool EndsWith(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static bool EndsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static bool EndsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static bool EndsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static bool EndsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static bool StartsWith(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static bool StartsWith(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static bool StartsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static bool StartsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static bool StartsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static bool StartsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int bytesWritten) { throw null; } public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int charsConsumed, out int charsWritten) { throw null; } public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int charsWritten) { throw null; } From ad4d90b26c8fa38a08e653e6993c6be2f876edd7 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Thu, 1 Sep 2022 16:42:43 +0200 Subject: [PATCH 23/46] implement Ascii.Equals* methods --- .../System.Memory/tests/Ascii/EqualsTests.cs | 105 ++++++++++++++++++ .../System/Buffers/Text/Ascii.Searching.cs | 27 ++++- .../System.Runtime/ref/System.Runtime.cs | 4 + 3 files changed, 130 insertions(+), 6 deletions(-) create mode 100644 src/libraries/System.Memory/tests/Ascii/EqualsTests.cs diff --git a/src/libraries/System.Memory/tests/Ascii/EqualsTests.cs b/src/libraries/System.Memory/tests/Ascii/EqualsTests.cs new file mode 100644 index 00000000000000..437ce7fe10b099 --- /dev/null +++ b/src/libraries/System.Memory/tests/Ascii/EqualsTests.cs @@ -0,0 +1,105 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Linq; +using System.Text; +using Xunit; + +namespace System.Buffers.Text.Tests +{ + public class EqualsTests + { + [Fact] + public void InvalidCharacters_DoesNotThrow() + { + Assert.False(Ascii.Equals(Enumerable.Repeat((byte)128, "valid".Length).ToArray(), "valid")); + Assert.False(Ascii.Equals("valid"u8, "aa\u00C0aa")); + + Assert.False(Ascii.EqualsIgnoreCase(new byte[] { 127 }, new byte[] { 128 })); + Assert.True(Ascii.EqualsIgnoreCase(new byte[] { 128 }, new byte[] { 128 })); + Assert.False(Ascii.EqualsIgnoreCase(new byte[] { 128 }, new byte[] { 127 })); + + Assert.False(Ascii.EqualsIgnoreCase(Enumerable.Repeat((byte)128, "valid".Length).ToArray(), "valid")); + Assert.False(Ascii.EqualsIgnoreCase("valid"u8, "aa\u00C0aa")); + } + + public static IEnumerable ExactMatch_TestData + { + get + { + yield return new object[] { "test", "test" }; + + for (char textLength = (char)0; textLength <= 127; textLength++) + { + yield return new object[] { new string(textLength, textLength), new string(textLength, textLength) }; + } + } + } + + [Theory] + [MemberData(nameof(ExactMatch_TestData))] + public void ExactMatchFound(string left, string right) + { + Assert.True(Ascii.Equals(Encoding.ASCII.GetBytes(left), right)); + + Assert.True(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), Encoding.ASCII.GetBytes(right))); + Assert.True(Ascii.EqualsIgnoreCase(left, right)); + Assert.True(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), right)); + } + + public static IEnumerable ExactMatchNotFound_TestData + { + get + { + yield return new object[] { "tak", "nie" }; + + for (char i = (char)0; i <= 127; i++) + { + yield return new object[] { new string(i, i), string.Create(i, i, (destination, iteration) => + { + destination.Fill((char)iteration) + destination[iteration / 2] = 128; + })}; + } + } + } + + [Theory] + [MemberData(nameof(ExactMatchNotFound_TestData))] + public void ExactMatchNotFound(string left, string right) + { + Assert.False(Ascii.Equals(Encoding.ASCII.GetBytes(left), right)); + + Assert.False(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), Encoding.ASCII.GetBytes(right))); + Assert.False(Ascii.EqualsIgnoreCase(left, right)); + Assert.False(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), right)); + } + + public static IEnumerable IgnoreCaseMatch_TestData + { + get + { + yield return new object[] { "aBc", "AbC" }; + + for (char i = (char)0; i <= 127; i++) + { + char left = (char)i; + char right = char.IsAsciiLetterUpper(left) ? char.ToLower(left) : char.IsAsciiLetterLower(left) ? char.ToUpper(left) : left; + yield return new object[] { new string(left, i), new string(right, i) }; + } + } + } + + [Theory] + [MemberData(nameof(IgnoreCaseMatch_TestData))] + public void IgnoreCaseMatchFound(string left, string right) + { + Assert.True(Ascii.Equals(Encoding.ASCII.GetBytes(left), right)); + + Assert.True(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), Encoding.ASCII.GetBytes(right))); + Assert.True(Ascii.EqualsIgnoreCase(left, right)); + Assert.True(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), right)); + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs index 97471fc0c37d65..5702b3c15ca876 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs @@ -15,6 +15,18 @@ namespace System.Buffers.Text { public static partial class Ascii { + public static bool Equals(System.ReadOnlySpan left, ReadOnlySpan right) + => left.Length == right.Length && Equals(right, left) == EqualsResult.Match; + + public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan right) + => left.Length == right.Length && EqualsIgnoreCase(left, right) == EqualsResult.Match; + + public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan right) + => left.Length == right.Length && Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(left), ref MemoryMarshal.GetReference(right), left.Length); + + public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan right) + => left.Length == right.Length && Equals(right, left) == EqualsResult.Match; + public static unsafe bool StartsWith(ReadOnlySpan text, ReadOnlySpan value) => value.IsEmpty || (text.Length >= value.Length && Map(Equals(value, text.Slice(0, value.Length)))); @@ -28,10 +40,10 @@ public static unsafe bool EndsWith(ReadOnlySpan text, ReadOnlySpan v => value.IsEmpty || (text.Length >= value.Length && Map(Equals(text.Slice(text.Length - value.Length), value))); public static bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(0, value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(0, value.Length), value))); public static bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(text.Length - value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(text.Length - value.Length), value))); // TODO adsitnik: discuss whether this overload should exists, as the only difference with ROS.StartsWith(ROS, StringComparison.OrdinalIgnoreCase) // is throwing an exception for non-ASCII characters found in value @@ -97,7 +109,7 @@ private static bool Map(EqualsResult equalsResult) private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpan bytes) where TCheck : struct { - Debug.Assert(typeof(TCheck) == typeof(byte) || typeof(TCheck) == typeof(char)); + Debug.Assert(typeof(TCheck) == typeof(byte) || typeof(TCheck) == typeof(char) || typeof(TCheck) == typeof(bool)); Debug.Assert(chars.Length == bytes.Length); if (!Vector128.IsHardwareAccelerated || chars.Length < Vector128.Count) @@ -316,7 +328,7 @@ private static EqualsResult EqualsIgnoreCase(ReadOnlySpan chars, R return EqualsResult.Match; } - private static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + private static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) where TCheck : struct { Debug.Assert(text.Length == value.Length); @@ -325,9 +337,12 @@ private static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySp uint valueA = text[i]; uint valueB = value[i]; - if (!UnicodeUtility.IsAsciiCodePoint(valueB)) + if (typeof(TCheck) == typeof(byte)) { - return EqualsResult.NonAsciiFound; // value must not contain non-ASCII characters + if (!UnicodeUtility.IsAsciiCodePoint(valueB)) + { + return EqualsResult.NonAsciiFound; // value must not contain non-ASCII characters + } } if (valueA == valueB) diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index a521464a349cab..e27933e0bdebac 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -7097,6 +7097,10 @@ public static class Ascii public static int GetIndexOfFirstNonAsciiChar(System.ReadOnlySpan buffer) { throw null; } public static bool IsAscii(System.ReadOnlySpan value) { throw null; } public static bool IsAscii(System.ReadOnlySpan value) { throw null; } + public static bool Equals(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } + public static bool EqualsIgnoreCase(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } + public static bool EqualsIgnoreCase(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } + public static bool EqualsIgnoreCase(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } public static bool EndsWith(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } public static bool EndsWith(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } public static bool EndsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } From adc2f531b3e54af831b266a4ab65072b9ed71a4d Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Thu, 1 Sep 2022 17:15:44 +0200 Subject: [PATCH 24/46] use self-describing names at a cost of using pragma disable ;) --- .../System/Buffers/Text/Ascii.Searching.cs | 62 ++++++++++--------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs index 5702b3c15ca876..f82bee9f168845 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs @@ -11,39 +11,44 @@ using System.Text; using System.Text.Unicode; +#pragma warning disable SA1121 // Use built-in type alias +using SkipChecks = System.Boolean; +using CheckBytes = System.Byte; +using CheckChars = System.Char; + namespace System.Buffers.Text { public static partial class Ascii { - public static bool Equals(System.ReadOnlySpan left, ReadOnlySpan right) - => left.Length == right.Length && Equals(right, left) == EqualsResult.Match; + public static bool Equals(ReadOnlySpan left, ReadOnlySpan right) + => left.Length == right.Length && Equals(right, left) == EqualsResult.Match; public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan right) - => left.Length == right.Length && EqualsIgnoreCase(left, right) == EqualsResult.Match; + => left.Length == right.Length && EqualsIgnoreCase(left, right) == EqualsResult.Match; public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan right) => left.Length == right.Length && Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(left), ref MemoryMarshal.GetReference(right), left.Length); public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan right) - => left.Length == right.Length && Equals(right, left) == EqualsResult.Match; + => left.Length == right.Length && Equals(right, left) == EqualsResult.Match; public static unsafe bool StartsWith(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(Equals(value, text.Slice(0, value.Length)))); + => value.IsEmpty || (text.Length >= value.Length && Map(Equals(value, text.Slice(0, value.Length)))); public static unsafe bool EndsWith(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(Equals(value, text.Slice(text.Length - value.Length)))); + => value.IsEmpty || (text.Length >= value.Length && Map(Equals(value, text.Slice(text.Length - value.Length)))); public static unsafe bool StartsWith(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(Equals(text.Slice(0, value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(Equals(text.Slice(0, value.Length), value))); public static unsafe bool EndsWith(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(Equals(text.Slice(text.Length - value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(Equals(text.Slice(text.Length - value.Length), value))); public static bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(0, value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(0, value.Length), value))); public static bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(text.Length - value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(text.Length - value.Length), value))); // TODO adsitnik: discuss whether this overload should exists, as the only difference with ROS.StartsWith(ROS, StringComparison.OrdinalIgnoreCase) // is throwing an exception for non-ASCII characters found in value @@ -84,16 +89,16 @@ public static bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(value, text.Slice(0, value.Length)))); + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(value, text.Slice(0, value.Length)))); public static unsafe bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(value, text.Slice(text.Length - value.Length)))); + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(value, text.Slice(text.Length - value.Length)))); public static unsafe bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(0, value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(0, value.Length), value))); public static unsafe bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(text.Length - value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(text.Length - value.Length), value))); [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool Map(EqualsResult equalsResult) @@ -109,7 +114,7 @@ private static bool Map(EqualsResult equalsResult) private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpan bytes) where TCheck : struct { - Debug.Assert(typeof(TCheck) == typeof(byte) || typeof(TCheck) == typeof(char) || typeof(TCheck) == typeof(bool)); + Debug.Assert(typeof(TCheck) == typeof(CheckBytes) || typeof(TCheck) == typeof(CheckChars) || typeof(TCheck) == typeof(SkipChecks)); Debug.Assert(chars.Length == bytes.Length); if (!Vector128.IsHardwareAccelerated || chars.Length < Vector128.Count) @@ -119,14 +124,14 @@ private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpa char c = chars[i]; byte b = bytes[i]; - if (typeof(TCheck) == typeof(char)) + if (typeof(TCheck) == typeof(CheckChars)) { if (!UnicodeUtility.IsAsciiCodePoint(c)) { return EqualsResult.NonAsciiFound; } } - else if (typeof(TCheck) == typeof(byte)) + else if (typeof(TCheck) == typeof(CheckBytes)) { if (!UnicodeUtility.IsAsciiCodePoint(b)) { @@ -156,14 +161,14 @@ private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpa charValues = Vector256.LoadUnsafe(ref currentCharsSearchSpace); byteValues = Vector128.LoadUnsafe(ref currentBytesSearchSpace); - if (typeof(TCheck) == typeof(char)) + if (typeof(TCheck) == typeof(CheckChars)) { if (charValues.AsByte().ExtractMostSignificantBits() != 0) { return EqualsResult.NonAsciiFound; } } - else if (typeof(TCheck) == typeof(byte)) + else if (typeof(TCheck) == typeof(CheckBytes)) { if (byteValues.ExtractMostSignificantBits() != 0) { @@ -188,14 +193,14 @@ private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpa charValues = Vector256.LoadUnsafe(ref oneVectorAwayFromCharsEnd); byteValues = Vector128.LoadUnsafe(ref oneVectorAwayFromBytesEnd); - if (typeof(TCheck) == typeof(char)) + if (typeof(TCheck) == typeof(CheckChars)) { if (charValues.AsByte().ExtractMostSignificantBits() != 0) { return EqualsResult.NonAsciiFound; } } - else if (typeof(TCheck) == typeof(byte)) + else if (typeof(TCheck) == typeof(CheckBytes)) { if (byteValues.ExtractMostSignificantBits() != 0) { @@ -226,14 +231,14 @@ private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpa charValues = Vector128.LoadUnsafe(ref currentCharsSearchSpace); byteValues = Vector64.LoadUnsafe(ref currentBytesSearchSpace); - if (typeof(TCheck) == typeof(char)) + if (typeof(TCheck) == typeof(CheckChars)) { if (ASCIIUtility.VectorContainsNonAsciiChar(charValues)) { return EqualsResult.NonAsciiFound; } } - else if (typeof(TCheck) == typeof(byte)) + else if (typeof(TCheck) == typeof(CheckBytes)) { if (VectorContainsNonAsciiChar(byteValues)) { @@ -258,14 +263,14 @@ private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpa charValues = Vector128.LoadUnsafe(ref oneVectorAwayFromCharsEnd); byteValues = Vector64.LoadUnsafe(ref oneVectorAwayFromBytesEnd); - if (typeof(TCheck) == typeof(char)) + if (typeof(TCheck) == typeof(CheckChars)) { if (ASCIIUtility.VectorContainsNonAsciiChar(charValues)) { return EqualsResult.NonAsciiFound; } } - else if (typeof(TCheck) == typeof(byte)) + else if (typeof(TCheck) == typeof(CheckBytes)) { if (VectorContainsNonAsciiChar(byteValues)) { @@ -293,14 +298,14 @@ private static EqualsResult EqualsIgnoreCase(ReadOnlySpan chars, R uint valueA = chars[i]; uint valueB = bytes[i]; - if (typeof(TCheck) == typeof(char)) + if (typeof(TCheck) == typeof(CheckChars)) { if (!UnicodeUtility.IsAsciiCodePoint(valueA)) { return EqualsResult.NonAsciiFound; } } - else if (typeof(TCheck) == typeof(byte)) + else if (typeof(TCheck) == typeof(CheckBytes)) { if (!UnicodeUtility.IsAsciiCodePoint(valueB)) { @@ -337,7 +342,7 @@ private static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, Re uint valueA = text[i]; uint valueB = value[i]; - if (typeof(TCheck) == typeof(byte)) + if (typeof(TCheck) == typeof(CheckBytes)) { if (!UnicodeUtility.IsAsciiCodePoint(valueB)) { @@ -397,3 +402,4 @@ private enum EqualsResult } } } +#pragma warning restore SA1121 // Use built-in type alias From aad125ace97371d798b03c3204aea7a3f35b707e Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Fri, 2 Sep 2022 11:00:08 +0200 Subject: [PATCH 25/46] throw ArgumentException with meaningful error message --- .../System.Private.CoreLib/src/Resources/Strings.resx | 3 +++ .../src/System/Buffers/Text/Ascii.Searching.cs | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/Resources/Strings.resx b/src/libraries/System.Private.CoreLib/src/Resources/Strings.resx index 9b8eee748e7132..4a6e819086d186 100644 --- a/src/libraries/System.Private.CoreLib/src/Resources/Strings.resx +++ b/src/libraries/System.Private.CoreLib/src/Resources/Strings.resx @@ -247,6 +247,9 @@ Only one of the following binding flags can be set: BindingFlags.SetProperty, BindingFlags.PutDispProperty, BindingFlags.PutRefDispProperty. + + Text must not contain non-ASCII characters. + Cannot specify both CreateInstance and another access type. diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs index f82bee9f168845..c9696315cc07b7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs @@ -110,7 +110,7 @@ private static bool Map(EqualsResult equalsResult) }; [DoesNotReturn] - private static bool ThrowNonAsciiFound() => throw new ArgumentException("TODO adsitnik", "value"); + private static bool ThrowNonAsciiFound() => throw new ArgumentException(SR.Arg_ContainsNonAscii, "value"); private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpan bytes) where TCheck : struct { From f8f98edfb4a3714e7d72969392bd41b0467d15e6 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Fri, 2 Sep 2022 16:17:48 +0200 Subject: [PATCH 26/46] rename files --- .../src/System.Private.CoreLib.Shared.projitems | 2 +- .../Buffers/Text/{Ascii.Searching.cs => Ascii.Comparison.cs} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/libraries/System.Private.CoreLib/src/System/Buffers/Text/{Ascii.Searching.cs => Ascii.Comparison.cs} (100%) diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 23bd397abaf7bb..f109fa29fb2fef 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -116,7 +116,7 @@ - + diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs similarity index 100% rename from src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs rename to src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs From 2b2bcd101c9f8c477efad2e99ed941146e7c99f1 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Fri, 2 Sep 2022 16:32:53 +0200 Subject: [PATCH 27/46] Implement IndexOf and LastIndexOf using narrowing and widening --- .../System.Memory/tests/Ascii/IndexOfTests.cs | 74 ++++++++++ .../tests/System.Memory.Tests.csproj | 1 + .../System.Private.CoreLib.Shared.projitems | 1 + .../System/Buffers/Text/Ascii.Searching.cs | 139 ++++++++++++++++++ .../System.Runtime/ref/System.Runtime.cs | 4 + 5 files changed, 219 insertions(+) create mode 100644 src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs diff --git a/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs b/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs new file mode 100644 index 00000000000000..218ed53654fb02 --- /dev/null +++ b/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs @@ -0,0 +1,74 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Linq; +using System.Text; +using Xunit; + +namespace System.Buffers.Text.Tests +{ + public class IndexOfTests + { + [Fact] + public void InvalidCharactersInValueThrows() + { + Assert.Throws(() => Ascii.IndexOf("aaaa"u8, "\u00C0")); + Assert.Throws(() => Ascii.IndexOf("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"u8, "aaaaaaaaaaaaa\u00C0")); + Assert.Throws(() => Ascii.IndexOf("aaaa", new byte[] { 128 })); + Assert.Throws(() => Ascii.IndexOf(new string('a', 50), Enumerable.Repeat((byte)'a', 20).Concat(new byte[] { 128 }).ToArray())); + + Assert.Throws(() => Ascii.LastIndexOf("aaaa"u8, "\u00C0")); + Assert.Throws(() => Ascii.LastIndexOf("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"u8, "aaaaaaaaaaaaa\u00C0")); + Assert.Throws(() => Ascii.LastIndexOf("aaaa", new byte[] { 128 })); + Assert.Throws(() => Ascii.LastIndexOf(new string('a', 50), Enumerable.Repeat((byte)'a', 20).Concat(new byte[] { 128 }).ToArray())); + } + + public static IEnumerable ExactMatchFound_TestData + { + get + { + yield return new object[] { "test", "", 0, 4 }; + yield return new object[] { "test", "test", 0, 0 }; + yield return new object[] { "abcdefghijk", "cde", 2, 2 }; + yield return new object[] { "abcdabcdabcd" , "abcd", 0, 8 }; + yield return new object[] { "test0test1test2test3test4test5test6", "test3test4test5test6", 15, 15 }; + yield return new object[] { "This is not a very complex test case", "complex test", 19, 19 }; + } + } + + [Theory] + [MemberData(nameof(ExactMatchFound_TestData))] + public void ExactMatchFound(string text, string value, int expectedFirstIndex, int expectedLastIndex) + { + Assert.Equal(expectedFirstIndex, Ascii.IndexOf(text, Encoding.ASCII.GetBytes(value))); + Assert.Equal(expectedFirstIndex, Ascii.IndexOf(Encoding.ASCII.GetBytes(text), value)); + + Assert.Equal(expectedLastIndex, Ascii.LastIndexOf(text, Encoding.ASCII.GetBytes(value))); + Assert.Equal(expectedLastIndex, Ascii.LastIndexOf(Encoding.ASCII.GetBytes(text), value)); + } + + public static IEnumerable ExactMatchNotFound_TestData + { + get + { + yield return new object[] { "test", "TEST" }; + yield return new object[] { "abcdefghijk", "xyz" }; + yield return new object[] { "abcdabcdabcd", "abcD" }; + yield return new object[] { "test0test1test2test3test4test5test6", "test8" }; + yield return new object[] { "This is not a very complex test case", "benchmark" }; + } + } + + [Theory] + [MemberData(nameof(ExactMatchNotFound_TestData))] + public void ExactMatchNotFound(string text, string value) + { + Assert.Equal(-1, Ascii.IndexOf(text, Encoding.ASCII.GetBytes(value))); + Assert.Equal(-1, Ascii.IndexOf(Encoding.ASCII.GetBytes(text), value)); + + Assert.Equal(-1, Ascii.LastIndexOf(text, Encoding.ASCII.GetBytes(value))); + Assert.Equal(-1, Ascii.LastIndexOf(Encoding.ASCII.GetBytes(text), value)); + } + } +} diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index 07ec6a2adb83bb..02f5c5101c150b 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -16,6 +16,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index f109fa29fb2fef..595b1bb543a7b3 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -117,6 +117,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs new file mode 100644 index 00000000000000..9d244a5faed4ad --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs @@ -0,0 +1,139 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Text; + +namespace System.Buffers.Text +{ + public static partial class Ascii + { + private const int StackallocBytesLimit = 512; + + public static int IndexOf(ReadOnlySpan text, ReadOnlySpan value) + => IndexOf(text, value); + + public static int LastIndexOf(ReadOnlySpan text, ReadOnlySpan value) + => LastIndexOf(text, value); + + public static int IndexOf(ReadOnlySpan text, ReadOnlySpan value) + => IndexOf(text, value); + + public static int LastIndexOf(ReadOnlySpan text, ReadOnlySpan value) + => LastIndexOf(text, value); + + private static int IndexOf(ReadOnlySpan text, ReadOnlySpan value) + where TText : unmanaged, IEquatable? + where TValue : unmanaged, IEquatable? + where TConverter : struct, IConverter + { + if (value.IsEmpty) + { + return 0; + } + else if (value.Length > text.Length) + { + return -1; + } + + TText[]? rented = null; + Span converted = value.Length <= (StackallocBytesLimit / Unsafe.SizeOf()) + ? stackalloc TText[StackallocBytesLimit / Unsafe.SizeOf()] + : (rented = ArrayPool.Shared.Rent(value.Length)); + + try + { + TConverter.Convert(value, converted); + + return MemoryExtensions.IndexOf(text, converted.Slice(0, value.Length)); + } + finally + { + if (rented is not null) + { + ArrayPool.Shared.Return(rented); + } + } + } + + private static int LastIndexOf(ReadOnlySpan text, ReadOnlySpan value) + where TText : unmanaged, IEquatable? + where TValue : unmanaged, IEquatable? + where TConverter : struct, IConverter + { + if (value.IsEmpty) + { + return text.Length; + } + else if (value.Length > text.Length) + { + return -1; + } + + TText[]? rented = null; + Span converted = value.Length <= (StackallocBytesLimit / Unsafe.SizeOf()) + ? stackalloc TText[StackallocBytesLimit / Unsafe.SizeOf()] + : (rented = ArrayPool.Shared.Rent(value.Length)); + + try + { + TConverter.Convert(value, converted); + + return MemoryExtensions.LastIndexOf(text, converted.Slice(0, value.Length)); + } + finally + { + if (rented is not null) + { + ArrayPool.Shared.Return(rented); + } + } + } + + private interface IConverter + where TFrom : unmanaged + where TTo : unmanaged + { + static abstract void Convert(ReadOnlySpan source, Span destination); + } + + private readonly struct NarrowUtf16ToAscii : IConverter + { + public static unsafe void Convert(ReadOnlySpan source, Span destination) + { + nuint asciiCharCount = 0; + + fixed (char* pValue = &MemoryMarshal.GetReference(source)) + fixed (byte* pNarrowed = &MemoryMarshal.GetReference(destination)) + { + asciiCharCount = ASCIIUtility.NarrowUtf16ToAscii(pValue, pNarrowed, (nuint)source.Length); + } + + if (asciiCharCount != (nuint)source.Length) + { + ThrowNonAsciiFound(); + } + } + } + + private readonly struct WidenAsciiToUtf16 : IConverter + { + public static unsafe void Convert(ReadOnlySpan source, Span destination) + { + nuint asciiCharCount = 0; + + fixed (byte* pValue = &MemoryMarshal.GetReference(source)) + fixed (char* pWidened = &MemoryMarshal.GetReference(destination)) + { + asciiCharCount = ASCIIUtility.WidenAsciiToUtf16(pValue, pWidened, (nuint)source.Length); + } + + if (asciiCharCount != (nuint)source.Length) + { + ThrowNonAsciiFound(); + } + } + } + } +} diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index e27933e0bdebac..ce86500bfc8b07 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -7095,6 +7095,10 @@ public static class Ascii public static System.Buffers.OperationStatus FromUtf16(System.ReadOnlySpan source, System.Span destination, out int charsConsumed, out int bytesWritten) { throw null; } public static int GetIndexOfFirstNonAsciiByte(System.ReadOnlySpan buffer) { throw null; } public static int GetIndexOfFirstNonAsciiChar(System.ReadOnlySpan buffer) { throw null; } + public static int IndexOf(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static int IndexOf(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static int LastIndexOf(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static int LastIndexOf(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } public static bool IsAscii(System.ReadOnlySpan value) { throw null; } public static bool IsAscii(System.ReadOnlySpan value) { throw null; } public static bool Equals(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } From 2d9105e5816205d5420e8de6e6ca7bfe33f5a961 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Fri, 2 Sep 2022 19:26:36 +0200 Subject: [PATCH 28/46] Implement IndexOfIgnoreCase and LastIndexOfIgnoreCase --- .../System.Memory/tests/Ascii/IndexOfTests.cs | 84 ++++++++ .../System/Buffers/Text/Ascii.Comparison.cs | 71 +++---- .../System/Buffers/Text/Ascii.Searching.cs | 181 ++++++++++++++++++ .../System.Runtime/ref/System.Runtime.cs | 8 + 4 files changed, 312 insertions(+), 32 deletions(-) diff --git a/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs b/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs index 218ed53654fb02..e560521a484cd5 100644 --- a/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs @@ -18,10 +18,28 @@ public void InvalidCharactersInValueThrows() Assert.Throws(() => Ascii.IndexOf("aaaa", new byte[] { 128 })); Assert.Throws(() => Ascii.IndexOf(new string('a', 50), Enumerable.Repeat((byte)'a', 20).Concat(new byte[] { 128 }).ToArray())); + Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa"u8, new byte[] { 128 })); + Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa"u8, new byte[] { (byte)'a', 128 })); + Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa", "\u00C0")); + Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa", "a\u00C0")); + Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa"u8, "\u00C0")); + Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa"u8, "a\u00C0")); + Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa", new byte[] { 128 })); + Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa", new byte[] { (byte)'a', 128 })); + Assert.Throws(() => Ascii.LastIndexOf("aaaa"u8, "\u00C0")); Assert.Throws(() => Ascii.LastIndexOf("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"u8, "aaaaaaaaaaaaa\u00C0")); Assert.Throws(() => Ascii.LastIndexOf("aaaa", new byte[] { 128 })); Assert.Throws(() => Ascii.LastIndexOf(new string('a', 50), Enumerable.Repeat((byte)'a', 20).Concat(new byte[] { 128 }).ToArray())); + + Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa"u8, new byte[] { 128 })); + Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa"u8, new byte[] { (byte)'a', 128 })); + Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa", "\u00C0")); + Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa", "a\u00C0")); + Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa"u8, "\u00C0")); + Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa"u8, "a\u00C0")); + Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa", new byte[] { 128 })); + Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa", new byte[] { (byte)'a', 128 })); } public static IEnumerable ExactMatchFound_TestData @@ -44,8 +62,18 @@ public void ExactMatchFound(string text, string value, int expectedFirstIndex, i Assert.Equal(expectedFirstIndex, Ascii.IndexOf(text, Encoding.ASCII.GetBytes(value))); Assert.Equal(expectedFirstIndex, Ascii.IndexOf(Encoding.ASCII.GetBytes(text), value)); + Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); + Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(text, value)); + Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), value)); + Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(text, Encoding.ASCII.GetBytes(value))); + Assert.Equal(expectedLastIndex, Ascii.LastIndexOf(text, Encoding.ASCII.GetBytes(value))); Assert.Equal(expectedLastIndex, Ascii.LastIndexOf(Encoding.ASCII.GetBytes(text), value)); + + Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); + Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(text, value)); + Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), value)); + Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(text, Encoding.ASCII.GetBytes(value))); } public static IEnumerable ExactMatchNotFound_TestData @@ -70,5 +98,61 @@ public void ExactMatchNotFound(string text, string value) Assert.Equal(-1, Ascii.LastIndexOf(text, Encoding.ASCII.GetBytes(value))); Assert.Equal(-1, Ascii.LastIndexOf(Encoding.ASCII.GetBytes(text), value)); } + + public static IEnumerable IgnoreCaseMatchFound_TestData + { + get + { + yield return new object[] { "test", "", 0, 4 }; + yield return new object[] { "tESt", "TesT", 0, 0 }; + yield return new object[] { "abcdefghijk", "CdE", 2, 2 }; + yield return new object[] { "abcdabcdabcd", "ABcD", 0, 8 }; + yield return new object[] { "test0test1test2test3test4test5test6", "TeSt3tEst4TeSt5tEsT6", 15, 15 }; + yield return new object[] { "This is not a VERY COMPLEX test case", "COMplex tEst", 19, 19 }; + } + } + + [Theory] + [MemberData(nameof(IgnoreCaseMatchFound_TestData))] + public void IgnoreCaseMatchFound(string text, string value, int expectedFirstIndex, int expectedLastIndex) + { + Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); + Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(text, value)); + Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), value)); + Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(text, Encoding.ASCII.GetBytes(value))); + + Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); + Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(text, value)); + Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), value)); + Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(text, Encoding.ASCII.GetBytes(value))); + } + + public static IEnumerable IgnoreCaseMatchNotFound_TestData + { + get + { + yield return new object[] { "test", "!" }; + yield return new object[] { "tESt", "TosT" }; + yield return new object[] { "abcdefghijk", "xyz" }; + yield return new object[] { "abcdabcdabcd", "EfGh" }; + yield return new object[] { "test0test1test2test3test4test5test6", "tESt8" }; + yield return new object[] { "This is not a VERY COMPLEX test case", "SiMplE" }; + } + } + + [Theory] + [MemberData(nameof(IgnoreCaseMatchNotFound_TestData))] + public void IgnoreCaseMatchNotFound(string text, string value) + { + Assert.Equal(-1, Ascii.IndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); + Assert.Equal(-1, Ascii.IndexOfIgnoreCase(text, value)); + Assert.Equal(-1, Ascii.IndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), value)); + Assert.Equal(-1, Ascii.IndexOfIgnoreCase(text, Encoding.ASCII.GetBytes(value))); + + Assert.Equal(-1, Ascii.LastIndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); + Assert.Equal(-1, Ascii.LastIndexOfIgnoreCase(text, value)); + Assert.Equal(-1, Ascii.LastIndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), value)); + Assert.Equal(-1, Ascii.LastIndexOfIgnoreCase(text, Encoding.ASCII.GetBytes(value))); + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs index c9696315cc07b7..e5392c9d2151c4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs @@ -53,40 +53,10 @@ public static bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan text, ReadOnlySpan value) - { - if (value.IsEmpty) - { - return true; - } - else if (!IsAscii(value)) - { - ThrowNonAsciiFound(); - } - else if (value.Length > text.Length) - { - return false; - } - - return Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(text), ref MemoryMarshal.GetReference(value), value.Length); - } + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(0, value.Length), value))); public static bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - { - if (value.IsEmpty) - { - return true; - } - else if (!IsAscii(value)) - { - ThrowNonAsciiFound(); - } - else if (value.Length > text.Length) - { - return false; - } - - return Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(text.Slice(text.Length - value.Length)), ref MemoryMarshal.GetReference(value), value.Length); - } + => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(text.Length - value.Length), value))); public static unsafe bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(value, text.Slice(0, value.Length)))); @@ -333,6 +303,43 @@ private static EqualsResult EqualsIgnoreCase(ReadOnlySpan chars, R return EqualsResult.Match; } + private static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) where TCheck : struct + { + Debug.Assert(text.Length == value.Length); + + for (int i = 0; i < text.Length; i++) + { + uint valueA = text[i]; + uint valueB = value[i]; + + if (typeof(TCheck) == typeof(CheckChars)) + { + if (!UnicodeUtility.IsAsciiCodePoint(valueB)) + { + return EqualsResult.NonAsciiFound; // value must not contain non-ASCII characters + } + } + + if (valueA == valueB) + { + continue; // exact match + } + + valueA |= 0x20u; + if ((uint)(valueA - 'a') > (uint)('z' - 'a')) + { + return EqualsResult.NoMatch; // not exact match, and first input isn't in [A-Za-z] + } + + if (valueA != (valueB | 0x20u)) + { + return EqualsResult.NoMatch; + } + } + + return EqualsResult.Match; + } + private static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) where TCheck : struct { Debug.Assert(text.Length == value.Length); diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs index 9d244a5faed4ad..587249c626831b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs @@ -23,6 +23,30 @@ public static int IndexOf(ReadOnlySpan text, ReadOnlySpan value) public static int LastIndexOf(ReadOnlySpan text, ReadOnlySpan value) => LastIndexOf(text, value); + public static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + => IndexOfIgnoreCase(text, value); + + public static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + => IndexOfIgnoreCase(text, value); + + public static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + => IndexOfIgnoreCase(text, value); + + public static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + => IndexOfIgnoreCase(text, value); + + public static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + => LastIndexOfIgnoreCase(text, value); + + public static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + => LastIndexOfIgnoreCase(text, value); + + public static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + => LastIndexOfIgnoreCase(text, value); + + public static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + => LastIndexOfIgnoreCase(text, value); + private static int IndexOf(ReadOnlySpan text, ReadOnlySpan value) where TText : unmanaged, IEquatable? where TValue : unmanaged, IEquatable? @@ -91,6 +115,116 @@ private static int LastIndexOf(ReadOnlySpan te } } + private static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + where TText : unmanaged, IEquatable? + where TValue : unmanaged, IEquatable? + where TComparer : struct, IComparer + { + if (value.IsEmpty) + { + return 0; + } + else if (value.Length > text.Length) + { + return -1; + } + + TValue firstValue = value[0]; + if (!TComparer.IsAscii(firstValue)) + { + ThrowNonAsciiFound(); + } + TText valueHead = Unsafe.As(ref firstValue); + TText valueHeadDifferentCase = TComparer.GetDifferentCaseOrSame(firstValue); + + int valueTailLength = value.Length - 1; + if (valueTailLength == 0) + { + return MemoryExtensions.IndexOfAny(text, valueHead, valueHeadDifferentCase); // for single-byte values use plain IndexOf + } + + int searchSpaceMinusValueTailLength = text.Length - valueTailLength; + int offset = 0; + int remainingSearchSpaceLength = searchSpaceMinusValueTailLength; + + while (remainingSearchSpaceLength > 0) + { + // Do a quick search for the first element of "value". + int relativeIndex = MemoryExtensions.IndexOfAny(text.Slice(offset), valueHead, valueHeadDifferentCase); + if (relativeIndex < 0) + break; + + remainingSearchSpaceLength -= relativeIndex; + offset += relativeIndex; + + if (remainingSearchSpaceLength <= 0) + break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. + + // Found the first element of "value". See if the tail matches. + if (Map(TComparer.EqualsIgnoreCase(text.Slice(offset + 1, value.Length - 1), value.Slice(1)))) // Map throws if non-ASCII char is found in value + return offset; // The tail matched. Return a successful find. + + remainingSearchSpaceLength--; + offset++; + } + + return -1; + } + + private static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + where TText : unmanaged, IEquatable? + where TValue : unmanaged, IEquatable? + where TComparer : struct, IComparer + { + if (value.IsEmpty) + { + return text.Length; + } + else if (value.Length > text.Length) + { + return -1; + } + + TValue firstValue = value[0]; + if (!TComparer.IsAscii(firstValue)) + { + ThrowNonAsciiFound(); + } + TText valueHead = Unsafe.As(ref firstValue); + TText valueHeadDifferentCase = TComparer.GetDifferentCaseOrSame(firstValue); + + int valueTailLength = value.Length - 1; + if (valueTailLength == 0) + { + return MemoryExtensions.LastIndexOfAny(text, valueHead, valueHeadDifferentCase); // for single-byte values use plain IndexOf + } + + int offset = 0; + + while (true) + { + int remainingSearchSpaceLength = text.Length - offset - valueTailLength; + if (remainingSearchSpaceLength <= 0) + break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. + + // Do a quick search for the first element of "value". + int relativeIndex = MemoryExtensions.LastIndexOfAny(text.Slice(0, remainingSearchSpaceLength), valueHead, valueHeadDifferentCase); + if (relativeIndex < 0) + break; + + // Found the first element of "value". See if the tail matches. + if (Map(TComparer.EqualsIgnoreCase(text.Slice(relativeIndex + 1, value.Length - 1), value.Slice(1)))) + return relativeIndex; // The tail matched. Return a successful find. + + offset += remainingSearchSpaceLength - relativeIndex; + } + + return -1; + } + + private static char GetDifferentCaseOrSame(char c) + => char.IsAsciiLetterLower(c) ? (char)(c + 'A' - 'a') : char.IsAsciiLetterUpper(c) ? (char)(c - 'A' + 'a') : c; + private interface IConverter where TFrom : unmanaged where TTo : unmanaged @@ -135,5 +269,52 @@ public static unsafe void Convert(ReadOnlySpan source, Span destinat } } } + + private interface IComparer + where TText : unmanaged + where TValue : unmanaged + { + static abstract bool IsAscii(TValue value); + + static abstract TText GetDifferentCaseOrSame(TValue value); + + static abstract EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value); + } + + private readonly struct ByteByteComparer : IComparer + { + public static bool IsAscii(byte value) => value <= 127; + + public static byte GetDifferentCaseOrSame(byte value) => (byte)Ascii.GetDifferentCaseOrSame((char)value); + + public static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) => EqualsIgnoreCase(text, value); + } + + private readonly struct ByteCharComparer : IComparer + { + public static bool IsAscii(char value) => value <= 127; + + public static byte GetDifferentCaseOrSame(char value) => (byte)Ascii.GetDifferentCaseOrSame(value); + + public static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) => EqualsIgnoreCase(value, text); + } + + private readonly struct CharCharComparer : IComparer + { + public static bool IsAscii(char value) => value <= 127; + + public static char GetDifferentCaseOrSame(char value) => Ascii.GetDifferentCaseOrSame(value); + + public static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) => EqualsIgnoreCase(text, value); + } + + private readonly struct CharByteComparer : IComparer + { + public static bool IsAscii(byte value) => value <= 127; + + public static char GetDifferentCaseOrSame(byte value) => Ascii.GetDifferentCaseOrSame((char)value); + + public static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) => EqualsIgnoreCase(text, value); + } } } diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index ce86500bfc8b07..04650f56b2a495 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -7097,8 +7097,16 @@ public static class Ascii public static int GetIndexOfFirstNonAsciiChar(System.ReadOnlySpan buffer) { throw null; } public static int IndexOf(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } public static int IndexOf(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static int IndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static int IndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static int IndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static int IndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } public static int LastIndexOf(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } public static int LastIndexOf(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static int LastIndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static int LastIndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static int LastIndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } + public static int LastIndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } public static bool IsAscii(System.ReadOnlySpan value) { throw null; } public static bool IsAscii(System.ReadOnlySpan value) { throw null; } public static bool Equals(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } From cc603c7d631ff4d9da037930b8fd72dd37cbd65e Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 5 Sep 2022 10:41:49 +0200 Subject: [PATCH 29/46] refactoring --- .../System/Buffers/Text/Ascii.Comparison.cs | 114 +++--------------- .../System/Buffers/Text/Ascii.Searching.cs | 96 +++++---------- 2 files changed, 46 insertions(+), 164 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs index e5392c9d2151c4..64cba8aa48c683 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs @@ -10,11 +10,13 @@ using System.Runtime.Intrinsics.Arm; using System.Text; using System.Text.Unicode; +using System.Numerics; #pragma warning disable SA1121 // Use built-in type alias using SkipChecks = System.Boolean; using CheckBytes = System.Byte; using CheckChars = System.Char; +using CheckNonAscii = System.Byte; namespace System.Buffers.Text { @@ -24,7 +26,7 @@ public static bool Equals(ReadOnlySpan left, ReadOnlySpan right) => left.Length == right.Length && Equals(right, left) == EqualsResult.Match; public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan right) - => left.Length == right.Length && EqualsIgnoreCase(left, right) == EqualsResult.Match; + => left.Length == right.Length && SequenceEqualIgnoreCase(left, right) == EqualsResult.Match; public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan right) => left.Length == right.Length && Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(left), ref MemoryMarshal.GetReference(right), left.Length); @@ -45,30 +47,30 @@ public static unsafe bool EndsWith(ReadOnlySpan text, ReadOnlySpan v => value.IsEmpty || (text.Length >= value.Length && Map(Equals(text.Slice(text.Length - value.Length), value))); public static bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(0, value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); public static bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(text.Length - value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); // TODO adsitnik: discuss whether this overload should exists, as the only difference with ROS.StartsWith(ROS, StringComparison.OrdinalIgnoreCase) // is throwing an exception for non-ASCII characters found in value public static bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(0, value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); public static bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(text.Length - value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); public static unsafe bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(value, text.Slice(0, value.Length)))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); public static unsafe bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(value, text.Slice(text.Length - value.Length)))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); public static unsafe bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(0, value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); public static unsafe bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(EqualsIgnoreCase(text.Slice(text.Length - value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool Map(EqualsResult equalsResult) @@ -259,101 +261,23 @@ private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpa return EqualsResult.Match; } - private static EqualsResult EqualsIgnoreCase(ReadOnlySpan chars, ReadOnlySpan bytes) where TCheck : struct - { - Debug.Assert(chars.Length == bytes.Length); - - for (int i = 0; i < chars.Length; i++) - { - uint valueA = chars[i]; - uint valueB = bytes[i]; - - if (typeof(TCheck) == typeof(CheckChars)) - { - if (!UnicodeUtility.IsAsciiCodePoint(valueA)) - { - return EqualsResult.NonAsciiFound; - } - } - else if (typeof(TCheck) == typeof(CheckBytes)) - { - if (!UnicodeUtility.IsAsciiCodePoint(valueB)) - { - return EqualsResult.NonAsciiFound; - } - } - - if (valueA == valueB) - { - continue; // exact match - } - - valueA |= 0x20u; - if ((uint)(valueA - 'a') > (uint)('z' - 'a')) - { - return EqualsResult.NoMatch; // not exact match, and first input isn't in [A-Za-z] - } - - if (valueA != (valueB | 0x20u)) - { - return EqualsResult.NoMatch; - } - } - - return EqualsResult.Match; - } - - private static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) where TCheck : struct + private static EqualsResult SequenceEqualIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + where TText : unmanaged, INumberBase + where TValue : unmanaged, INumberBase + where TCheck : struct { Debug.Assert(text.Length == value.Length); for (int i = 0; i < text.Length; i++) { - uint valueA = text[i]; - uint valueB = value[i]; + uint valueA = uint.CreateTruncating(text[i]); + uint valueB = uint.CreateTruncating(value[i]); - if (typeof(TCheck) == typeof(CheckChars)) + if (typeof(TCheck) != typeof(SkipChecks)) { if (!UnicodeUtility.IsAsciiCodePoint(valueB)) { - return EqualsResult.NonAsciiFound; // value must not contain non-ASCII characters - } - } - - if (valueA == valueB) - { - continue; // exact match - } - - valueA |= 0x20u; - if ((uint)(valueA - 'a') > (uint)('z' - 'a')) - { - return EqualsResult.NoMatch; // not exact match, and first input isn't in [A-Za-z] - } - - if (valueA != (valueB | 0x20u)) - { - return EqualsResult.NoMatch; - } - } - - return EqualsResult.Match; - } - - private static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) where TCheck : struct - { - Debug.Assert(text.Length == value.Length); - - for (int i = 0; i < text.Length; i++) - { - uint valueA = text[i]; - uint valueB = value[i]; - - if (typeof(TCheck) == typeof(CheckBytes)) - { - if (!UnicodeUtility.IsAsciiCodePoint(valueB)) - { - return EqualsResult.NonAsciiFound; // value must not contain non-ASCII characters + return EqualsResult.NonAsciiFound; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs index 587249c626831b..92fdcf81094766 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs @@ -1,10 +1,14 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Text; +#pragma warning disable SA1121 // Use built-in type alias +using CheckNonAscii = System.Byte; + namespace System.Buffers.Text { public static partial class Ascii @@ -24,28 +28,28 @@ public static int LastIndexOf(ReadOnlySpan text, ReadOnlySpan value) => LastIndexOf(text, value); public static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => IndexOfIgnoreCase(text, value); + => IndexOfIgnoreCase(text, value); public static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => IndexOfIgnoreCase(text, value); + => IndexOfIgnoreCase(text, value); public static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => IndexOfIgnoreCase(text, value); + => IndexOfIgnoreCase(text, value); public static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => IndexOfIgnoreCase(text, value); + => IndexOfIgnoreCase(text, value); public static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => LastIndexOfIgnoreCase(text, value); + => LastIndexOfIgnoreCase(text, value); public static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => LastIndexOfIgnoreCase(text, value); + => LastIndexOfIgnoreCase(text, value); public static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => LastIndexOfIgnoreCase(text, value); + => LastIndexOfIgnoreCase(text, value); public static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => LastIndexOfIgnoreCase(text, value); + => LastIndexOfIgnoreCase(text, value); private static int IndexOf(ReadOnlySpan text, ReadOnlySpan value) where TText : unmanaged, IEquatable? @@ -115,10 +119,9 @@ private static int LastIndexOf(ReadOnlySpan te } } - private static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - where TText : unmanaged, IEquatable? - where TValue : unmanaged, IEquatable? - where TComparer : struct, IComparer + private static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + where TText : unmanaged, IEquatable?, INumberBase + where TValue : unmanaged, IEquatable?, INumberBase { if (value.IsEmpty) { @@ -130,12 +133,13 @@ private static int IndexOfIgnoreCase(ReadOnlySpan(ref firstValue); - TText valueHeadDifferentCase = TComparer.GetDifferentCaseOrSame(firstValue); + char differentCase = GetDifferentCaseOrSame(Unsafe.As(ref firstValue)); + TText valueHeadDifferentCase = Unsafe.As(ref differentCase); int valueTailLength = value.Length - 1; if (valueTailLength == 0) @@ -161,7 +165,7 @@ private static int IndexOfIgnoreCase(ReadOnlySpan(text.Slice(offset + 1, value.Length - 1), value.Slice(1)))) // Map throws if non-ASCII char is found in value return offset; // The tail matched. Return a successful find. remainingSearchSpaceLength--; @@ -171,10 +175,9 @@ private static int IndexOfIgnoreCase(ReadOnlySpan(ReadOnlySpan text, ReadOnlySpan value) - where TText : unmanaged, IEquatable? - where TValue : unmanaged, IEquatable? - where TComparer : struct, IComparer + private static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) + where TText : unmanaged, IEquatable?, INumberBase + where TValue : unmanaged, IEquatable?, INumberBase { if (value.IsEmpty) { @@ -186,12 +189,13 @@ private static int LastIndexOfIgnoreCase(ReadOnlySpan< } TValue firstValue = value[0]; - if (!TComparer.IsAscii(firstValue)) + if (!UnicodeUtility.IsAsciiCodePoint(uint.CreateTruncating(firstValue))) { ThrowNonAsciiFound(); } TText valueHead = Unsafe.As(ref firstValue); - TText valueHeadDifferentCase = TComparer.GetDifferentCaseOrSame(firstValue); + char differentCase = GetDifferentCaseOrSame(Unsafe.As(ref firstValue)); + TText valueHeadDifferentCase = Unsafe.As(ref differentCase); int valueTailLength = value.Length - 1; if (valueTailLength == 0) @@ -213,7 +217,7 @@ private static int LastIndexOfIgnoreCase(ReadOnlySpan< break; // Found the first element of "value". See if the tail matches. - if (Map(TComparer.EqualsIgnoreCase(text.Slice(relativeIndex + 1, value.Length - 1), value.Slice(1)))) + if (Map(SequenceEqualIgnoreCase(text.Slice(relativeIndex + 1, value.Length - 1), value.Slice(1)))) return relativeIndex; // The tail matched. Return a successful find. offset += remainingSearchSpaceLength - relativeIndex; @@ -269,52 +273,6 @@ public static unsafe void Convert(ReadOnlySpan source, Span destinat } } } - - private interface IComparer - where TText : unmanaged - where TValue : unmanaged - { - static abstract bool IsAscii(TValue value); - - static abstract TText GetDifferentCaseOrSame(TValue value); - - static abstract EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value); - } - - private readonly struct ByteByteComparer : IComparer - { - public static bool IsAscii(byte value) => value <= 127; - - public static byte GetDifferentCaseOrSame(byte value) => (byte)Ascii.GetDifferentCaseOrSame((char)value); - - public static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) => EqualsIgnoreCase(text, value); - } - - private readonly struct ByteCharComparer : IComparer - { - public static bool IsAscii(char value) => value <= 127; - - public static byte GetDifferentCaseOrSame(char value) => (byte)Ascii.GetDifferentCaseOrSame(value); - - public static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) => EqualsIgnoreCase(value, text); - } - - private readonly struct CharCharComparer : IComparer - { - public static bool IsAscii(char value) => value <= 127; - - public static char GetDifferentCaseOrSame(char value) => Ascii.GetDifferentCaseOrSame(value); - - public static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) => EqualsIgnoreCase(text, value); - } - - private readonly struct CharByteComparer : IComparer - { - public static bool IsAscii(byte value) => value <= 127; - - public static char GetDifferentCaseOrSame(byte value) => Ascii.GetDifferentCaseOrSame((char)value); - - public static EqualsResult EqualsIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) => EqualsIgnoreCase(text, value); - } } } +#pragma warning restore SA1121 // Use built-in type alias From aff7e6e27f4ba67f75d5cde60084db51ed6ce7ca Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 5 Sep 2022 11:12:29 +0200 Subject: [PATCH 30/46] IsAscii methods --- .../Ascii/GetIndexOfFirstNonAsciiByteTests.cs | 6 ++++++ .../Ascii/GetIndexOfFirstNonAsciiCharTests.cs | 6 ++++++ .../src/System/Buffers/Text/Ascii.cs | 14 ++++++++++++++ src/libraries/System.Runtime/ref/System.Runtime.cs | 2 ++ 4 files changed, 28 insertions(+) diff --git a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs index d647a34dea2686..6955f04c4e736f 100644 --- a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs @@ -48,6 +48,7 @@ public void AllAscii_IndexNotFound(byte[] buffer) { Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiByte(buffer)); Assert.True(Ascii.IsAscii(buffer)); + Assert.All(buffer, character => Assert.True(Ascii.IsAscii(character))); } public static IEnumerable ContainingNonAsciiCharactersBuffers @@ -77,6 +78,11 @@ public void NonAscii_IndexFound(int expectedIndex, byte[] buffer) { Assert.Equal(expectedIndex, Ascii.GetIndexOfFirstNonAsciiByte(buffer)); Assert.False(Ascii.IsAscii(buffer)); + + for (int i = 0; i < buffer.Length; i++) + { + Assert.Equal(i != expectedIndex, Ascii.IsAscii(buffer[i])); + } } } } diff --git a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs index be1f282404c479..903f016ad180e8 100644 --- a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs @@ -48,6 +48,7 @@ public void AllAscii_IndexNotFound(char[] buffer) { Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiChar(buffer)); Assert.True(Ascii.IsAscii(buffer)); + Assert.All(buffer, character => Assert.True(Ascii.IsAscii(character))); } public static IEnumerable ContainingNonAsciiCharactersBuffers @@ -77,6 +78,11 @@ public void NonAscii_IndexFound(int expectedIndex, char[] buffer) { Assert.Equal(expectedIndex, Ascii.GetIndexOfFirstNonAsciiChar(buffer)); Assert.False(Ascii.IsAscii(buffer)); + + for (int i = 0; i < buffer.Length; i++) + { + Assert.Equal(i != expectedIndex, Ascii.IsAscii(buffer[i])); + } } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs index 08f2a1309234ba..98e7d602beb13d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs @@ -72,5 +72,19 @@ public static unsafe bool IsAscii(ReadOnlySpan value) return ASCIIUtility.GetIndexOfFirstNonAsciiChar(pValue, valueLength) == valueLength; } } + + /// + /// Determines whether the provided value is ASCII byte. + /// + /// The value to inspect. + /// True if is ASCII, False otherwise. + public static unsafe bool IsAscii(byte value) => value <= 127; + + /// + /// Determines whether the provided value is ASCII char. + /// + /// The value to inspect. + /// True if is ASCII, False otherwise. + public static unsafe bool IsAscii(char value) => value <= 127; } } diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index 04650f56b2a495..fae99e5f08b7c1 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -7109,6 +7109,8 @@ public static class Ascii public static int LastIndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } public static bool IsAscii(System.ReadOnlySpan value) { throw null; } public static bool IsAscii(System.ReadOnlySpan value) { throw null; } + public static bool IsAscii(byte value) { throw null; } + public static bool IsAscii(char value) { throw null; } public static bool Equals(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } public static bool EqualsIgnoreCase(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } public static bool EqualsIgnoreCase(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } From bf5d70963e131b67d3e167743ba2fc8ebdb23fd9 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 5 Sep 2022 12:04:04 +0200 Subject: [PATCH 31/46] *GetHashCode(chars) --- .../tests/Ascii/GetHashCodeCharTests.cs | 59 +++++++++++++++++++ .../tests/System.Memory.Tests.csproj | 1 + .../System/Buffers/Text/Ascii.Comparison.cs | 38 ++++++++++++ .../RandomizedStringEqualityComparer.cs | 2 +- .../src/System/Marvin.OrdinalIgnoreCase.cs | 12 +++- .../src/System/String.Comparison.cs | 4 +- .../System.Runtime/ref/System.Runtime.cs | 4 ++ 7 files changed, 116 insertions(+), 4 deletions(-) create mode 100644 src/libraries/System.Memory/tests/Ascii/GetHashCodeCharTests.cs diff --git a/src/libraries/System.Memory/tests/Ascii/GetHashCodeCharTests.cs b/src/libraries/System.Memory/tests/Ascii/GetHashCodeCharTests.cs new file mode 100644 index 00000000000000..d1aa6167b51b45 --- /dev/null +++ b/src/libraries/System.Memory/tests/Ascii/GetHashCodeCharTests.cs @@ -0,0 +1,59 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Linq; +using System.Text; +using Xunit; + +namespace System.Buffers.Text.Tests +{ + public class GetHashCodeCharTests + { + [Theory] + [InlineData("\u00C0")] + [InlineData("aaa\u00C0bbb")] + public void InvalidCharactersInValueThrowsOrReturnsFalse(string value) + { + Assert.Throws(() => Ascii.GetHashCode(value)); + Assert.Throws(() => Ascii.GetHashCodeIgnoreCase(value)); + + Assert.False(Ascii.TryGetHashCode(value, out int hashCode)); + Assert.Equal(default(int), hashCode); + Assert.False(Ascii.TryGetHashCodeIgnoreCase(value, out hashCode)); + Assert.Equal(default(int), hashCode); + } + + public IEnumerable ValidInputValidOutput_TestData + { + get + { + yield return new object[] { "test" }; + yield return new object[] { "tESt" }; + yield return new object[] { "!@#$%^&*()" }; + yield return new object[] { "0123456789" }; + yield return new object[] { " \t\r\n" }; + yield return new object[] { new string(Enumerable.Range(0, 127).Select(i => (char)i).ToArray()) }; + } + } + + [Theory] + [InlineData(nameof(ValidInputValidOutput_TestData))] + public void ValidInputValidOutput(string input) + { + // The contract makes it clear that hash code is randomized and is not guaranteed to match string.GetHashCode. + // But.. re-using same types used internally by string.GetHashCode was the simplest way to get good hashing implementaiton. + // So this test verifies implementation detail. + + int expectedHashCode = input.GetHashCode(); + Assert.Equal(expectedHashCode, Ascii.GetHashCode(input)); + Assert.True(Ascii.TryGetHashCode(input, out int actualHashCode)); + Assert.Equal(expectedHashCode, actualHashCode); + + expectedHashCode = input.GetHashCode(StringComparison.OrdinalIgnoreCase); + Assert.Equal(expectedHashCode, Ascii.GetHashCodeIgnoreCase(input)); + Assert.True(Ascii.TryGetHashCodeIgnoreCase(input, out actualHashCode)); + Assert.Equal(expectedHashCode, actualHashCode); + } + } +} diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index 02f5c5101c150b..77b65f1a5a0ad8 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -14,6 +14,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs index 64cba8aa48c683..3d0f26b9204335 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs @@ -22,6 +22,44 @@ namespace System.Buffers.Text { public static partial class Ascii { + public static bool TryGetHashCode(ReadOnlySpan value, out int hashCode) + { + if (!IsAscii(value)) + { + hashCode = 0; + return false; + } + + ulong seed = Marvin.DefaultSeed; + hashCode = Marvin.ComputeHash32(ref Unsafe.As(ref MemoryMarshal.GetReference(value)), (uint)value.Length * 2, (uint)seed, (uint)(seed >> 32)); + return true; + } + + public static bool TryGetHashCodeIgnoreCase(ReadOnlySpan value, out int hashCode) + { + ulong seed = Marvin.DefaultSeed; + hashCode = Marvin.ComputeHash32OrdinalIgnoreCase(ref MemoryMarshal.GetReference(value), value.Length, (uint)seed, (uint)(seed >> 32), out bool nonAsciiFound, stopOnNonAscii: true); + return !nonAsciiFound; + } + + public static int GetHashCode(ReadOnlySpan value) + { + if (!TryGetHashCode(value, out int hashCode)) + { + ThrowNonAsciiFound(); + } + return hashCode; + } + + public static int GetHashCodeIgnoreCase(ReadOnlySpan value) + { + if (!TryGetHashCodeIgnoreCase(value, out int hashCode)) + { + ThrowNonAsciiFound(); + } + return hashCode; + } + public static bool Equals(ReadOnlySpan left, ReadOnlySpan right) => left.Length == right.Length && Equals(right, left) == EqualsResult.Match; diff --git a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/RandomizedStringEqualityComparer.cs b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/RandomizedStringEqualityComparer.cs index 45fd297e5af275..e29cfca31390c7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/RandomizedStringEqualityComparer.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/RandomizedStringEqualityComparer.cs @@ -92,7 +92,7 @@ public override int GetHashCode(string? obj) return Marvin.ComputeHash32OrdinalIgnoreCase( ref obj.GetRawStringData(), obj.Length, - _seed.p0, _seed.p1); + _seed.p0, _seed.p1, out _); } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs b/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs index 1888a5f0c612a3..e0dbf350709b8f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs @@ -15,7 +15,9 @@ internal static partial class Marvin /// Compute a Marvin OrdinalIgnoreCase hash and collapse it into a 32-bit hash. /// n.b. is specified as char count, not byte count. /// - public static int ComputeHash32OrdinalIgnoreCase(ref char data, int count, uint p0, uint p1) + /// Additional is needed as it's impossible to distinguish + /// whether method returned 0 because it found some non-ASCII char or whether it calculated such hashcode. + public static int ComputeHash32OrdinalIgnoreCase(ref char data, int count, uint p0, uint p1, out bool nonAsciiFound, bool stopOnNonAscii = false) { uint ucount = (uint)count; // in chars nuint byteOffset = 0; // in bytes @@ -71,10 +73,18 @@ public static int ComputeHash32OrdinalIgnoreCase(ref char data, int count, uint Block(ref p0, ref p1); Block(ref p0, ref p1); + nonAsciiFound = false; return (int)(p1 ^ p0); NotAscii: Debug.Assert(ucount <= int.MaxValue); // this should fit into a signed int + + nonAsciiFound = true; + if (stopOnNonAscii) + { + return 0; + } + return ComputeHash32OrdinalIgnoreCaseSlow(ref Unsafe.AddByteOffset(ref data, byteOffset), (int)ucount, p0, p1); } diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 4469a15bab5226..a31f60b91e3c34 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -770,7 +770,7 @@ public override int GetHashCode() internal int GetHashCodeOrdinalIgnoreCase() { ulong seed = Marvin.DefaultSeed; - return Marvin.ComputeHash32OrdinalIgnoreCase(ref _firstChar, _stringLength /* in chars, not bytes */, (uint)seed, (uint)(seed >> 32)); + return Marvin.ComputeHash32OrdinalIgnoreCase(ref _firstChar, _stringLength /* in chars, not bytes */, (uint)seed, (uint)(seed >> 32), out _); } // A span-based equivalent of String.GetHashCode(). Computes an ordinal hash code. @@ -813,7 +813,7 @@ public static int GetHashCode(ReadOnlySpan value, StringComparison compari internal static int GetHashCodeOrdinalIgnoreCase(ReadOnlySpan value) { ulong seed = Marvin.DefaultSeed; - return Marvin.ComputeHash32OrdinalIgnoreCase(ref MemoryMarshal.GetReference(value), value.Length /* in chars, not bytes */, (uint)seed, (uint)(seed >> 32)); + return Marvin.ComputeHash32OrdinalIgnoreCase(ref MemoryMarshal.GetReference(value), value.Length /* in chars, not bytes */, (uint)seed, (uint)(seed >> 32), out _); } // Use this if and only if 'Denial of Service' attacks are not a concern (i.e. never used for free-form user input), diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index fae99e5f08b7c1..4bbd9a1eeda5e8 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -7111,6 +7111,10 @@ public static class Ascii public static bool IsAscii(System.ReadOnlySpan value) { throw null; } public static bool IsAscii(byte value) { throw null; } public static bool IsAscii(char value) { throw null; } + public static bool TryGetHashCode(System.ReadOnlySpan value, out int hashCode) { throw null; } + public static bool TryGetHashCodeIgnoreCase(System.ReadOnlySpan value, out int hashCode) { throw null; } + public static int GetHashCode(System.ReadOnlySpan value) { throw null; } + public static int GetHashCodeIgnoreCase(System.ReadOnlySpan value) { throw null; } public static bool Equals(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } public static bool EqualsIgnoreCase(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } public static bool EqualsIgnoreCase(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } From ba1102d49b297003db6c71699f7c93a8d55234b5 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 5 Sep 2022 13:10:13 +0200 Subject: [PATCH 32/46] *GetHashCode(bytes) --- .../tests/Ascii/GetHashCodeByteTests.cs | 74 +++++++++++++++++++ .../tests/System.Memory.Tests.csproj | 1 + .../System/Buffers/Text/Ascii.Comparison.cs | 37 ++++++++++ .../src/System/Marvin.OrdinalIgnoreCase.cs | 70 ++++++++++++++++++ .../System.Runtime/ref/System.Runtime.cs | 4 + 5 files changed, 186 insertions(+) create mode 100644 src/libraries/System.Memory/tests/Ascii/GetHashCodeByteTests.cs diff --git a/src/libraries/System.Memory/tests/Ascii/GetHashCodeByteTests.cs b/src/libraries/System.Memory/tests/Ascii/GetHashCodeByteTests.cs new file mode 100644 index 00000000000000..500c2714fa6344 --- /dev/null +++ b/src/libraries/System.Memory/tests/Ascii/GetHashCodeByteTests.cs @@ -0,0 +1,74 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Collections.Generic; +using System.Linq; +using System.Runtime.InteropServices; +using System.Text; +using Xunit; + +namespace System.Buffers.Text.Tests +{ + public class GetHashCodeByteTests + { + [Theory] + [InlineData(new byte[] { 128 })] + [InlineData(new byte[] { 91, 91, 128, 91 })] // >= 4 chars can execute a different code path + public void InvalidCharactersInValueThrowsOrReturnsFalse(byte[] value) + { + Assert.Throws(() => Ascii.GetHashCode(value)); + Assert.Throws(() => Ascii.GetHashCodeIgnoreCase(value)); + + Assert.False(Ascii.TryGetHashCode(value, out int hashCode)); + Assert.Equal(default(int), hashCode); + Assert.False(Ascii.TryGetHashCodeIgnoreCase(value, out hashCode)); + Assert.Equal(default(int), hashCode); + } + + public IEnumerable ValidInputValidOutput_TestData + { + get + { + yield return new object[] { "test" }; + yield return new object[] { "tESt" }; + yield return new object[] { "!@#$%^&*()" }; + yield return new object[] { "0123456789" }; + yield return new object[] { " \t\r\n" }; + yield return new object[] { new string(Enumerable.Range(0, 127).Select(i => (char)i).ToArray()) }; + } + } + + [Theory] + [InlineData(nameof(ValidInputValidOutput_TestData))] + public void ValidInputValidOutput(string input) + { + // The contract makes it clear that hash code is randomized and is not guaranteed to match string.GetHashCode. + // But.. re-using same types used internally by string.GetHashCode was the simplest way to get good hashing implementaiton. + // So this test verifies implementation detail. + + // string.GetHashcode treats string as buffer of bytes + // this is why this test casts ROS to ROS, rather than doing actual encoding conversion (this would narrow the bytes) + ReadOnlySpan bytes = MemoryMarshal.AsBytes(input.AsSpan()); + + int expectedHashCode = input.GetHashCode(); + Assert.Equal(expectedHashCode, Ascii.GetHashCode(bytes)); + Assert.True(Ascii.TryGetHashCode(input, out int actualHashCode)); + Assert.Equal(expectedHashCode, actualHashCode); + + // Ascii.*GetHashCodeIgnoreCase(bytes) processes four ASCII bytes at a time + // rather than two ascii chars as string.GetHashCode(StringComparison.OrdinalIgnoreCase) does. + // This is why they might produce different outputs and their results are not checked for equality. + + bytes = Encoding.ASCII.GetBytes(input); + expectedHashCode = Ascii.GetHashCodeIgnoreCase(bytes); + + // just verify that the output is the same for multiple invocations + for (int i = 0; i < 10; i++) + { + Assert.Equal(expectedHashCode, Ascii.GetHashCodeIgnoreCase(bytes)); + Assert.True(Ascii.TryGetHashCodeIgnoreCase(bytes, out actualHashCode)); + Assert.Equal(expectedHashCode, actualHashCode); + } + } + } +} diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index 77b65f1a5a0ad8..d652422e051baf 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -14,6 +14,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs index 3d0f26b9204335..2b7e0c2ac34f93 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs @@ -22,6 +22,19 @@ namespace System.Buffers.Text { public static partial class Ascii { + public static bool TryGetHashCode(ReadOnlySpan value, out int hashCode) + { + if (!IsAscii(value)) + { + hashCode = 0; + return false; + } + + ulong seed = Marvin.DefaultSeed; + hashCode = Marvin.ComputeHash32(ref MemoryMarshal.GetReference(value), (uint)value.Length, (uint)seed, (uint)(seed >> 32)); + return true; + } + public static bool TryGetHashCode(ReadOnlySpan value, out int hashCode) { if (!IsAscii(value)) @@ -35,6 +48,12 @@ public static bool TryGetHashCode(ReadOnlySpan value, out int hashCode) return true; } + public static bool TryGetHashCodeIgnoreCase(ReadOnlySpan value, out int hashCode) + { + ulong seed = Marvin.DefaultSeed; + return Marvin.TryComputeHash32ForAsciiIgnoreCase(ref MemoryMarshal.GetReference(value), value.Length, (uint)seed, (uint)(seed >> 32), out hashCode); + } + public static bool TryGetHashCodeIgnoreCase(ReadOnlySpan value, out int hashCode) { ulong seed = Marvin.DefaultSeed; @@ -42,6 +61,15 @@ public static bool TryGetHashCodeIgnoreCase(ReadOnlySpan value, out int ha return !nonAsciiFound; } + public static int GetHashCode(ReadOnlySpan value) + { + if (!TryGetHashCode(value, out int hashCode)) + { + ThrowNonAsciiFound(); + } + return hashCode; + } + public static int GetHashCode(ReadOnlySpan value) { if (!TryGetHashCode(value, out int hashCode)) @@ -51,6 +79,15 @@ public static int GetHashCode(ReadOnlySpan value) return hashCode; } + public static int GetHashCodeIgnoreCase(ReadOnlySpan value) + { + if (!TryGetHashCodeIgnoreCase(value, out int hashCode)) + { + ThrowNonAsciiFound(); + } + return hashCode; + } + public static int GetHashCodeIgnoreCase(ReadOnlySpan value) { if (!TryGetHashCodeIgnoreCase(value, out int hashCode)) diff --git a/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs b/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs index e0dbf350709b8f..96c7b00ebca3c1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs @@ -110,5 +110,75 @@ private static int ComputeHash32OrdinalIgnoreCaseSlow(ref char data, int count, return hash; } + + /// + /// Compute a Marvin OrdinalIgnoreCase hash and collapse it into a 32-bit hash. + /// n.b. is specified as byte count. + /// + /// True if all bytes were ASCII, false otherwise + internal static bool TryComputeHash32ForAsciiIgnoreCase(ref byte data, int count, uint p0, uint p1, out int hashCode) + { + uint ucount = (uint)count; // in bytes + nuint byteOffset = 0; // in bytes + uint tempValue; + + // We operate on 32-bit integers (four bytes) at a time. + + while (ucount >= 4) + { + tempValue = Unsafe.ReadUnaligned(ref Unsafe.AddByteOffset(ref data, byteOffset)); + if (!Utf8Utility.AllBytesInUInt32AreAscii(tempValue)) + { + goto NotAscii; + } + p0 += Utf8Utility.ConvertAllAsciiBytesInUInt32ToUppercase(tempValue); + Block(ref p0, ref p1); + + byteOffset += 4; + ucount -= 4; + } + + while (ucount > 0) + { + tempValue = Unsafe.AddByteOffset(ref data, byteOffset); + if (tempValue > 0x7Fu) + { + goto NotAscii; + } + + if (BitConverter.IsLittleEndian) + { + // addition is written with -0x80u to allow fall-through to next statement rather than jmp past it + p0 += Utf8Utility.ConvertAllAsciiBytesInUInt32ToUppercase(tempValue) + (0x800000u - 0x80u); + } + else + { + // as above, addition is modified to allow fall-through to next statement rather than jmp past it + p0 += (Utf8Utility.ConvertAllAsciiBytesInUInt32ToUppercase(tempValue) << 16) + 0x8000u - 0x80000000u; + } + + byteOffset += 1; + ucount -= 1; + } + if (BitConverter.IsLittleEndian) + { + p0 += 0x80u; + } + else + { + p0 += 0x80000000u; + } + + Block(ref p0, ref p1); + Block(ref p0, ref p1); + + hashCode = (int)(p1 ^ p0); + return true; + + NotAscii: + Debug.Assert(ucount <= int.MaxValue); // this should fit into a signed int + hashCode = 0; + return false; + } } } diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index 4bbd9a1eeda5e8..947e00d23abf48 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -7111,9 +7111,13 @@ public static class Ascii public static bool IsAscii(System.ReadOnlySpan value) { throw null; } public static bool IsAscii(byte value) { throw null; } public static bool IsAscii(char value) { throw null; } + public static bool TryGetHashCode(System.ReadOnlySpan value, out int hashCode) { throw null; } public static bool TryGetHashCode(System.ReadOnlySpan value, out int hashCode) { throw null; } + public static bool TryGetHashCodeIgnoreCase(System.ReadOnlySpan value, out int hashCode) { throw null; } public static bool TryGetHashCodeIgnoreCase(System.ReadOnlySpan value, out int hashCode) { throw null; } + public static int GetHashCode(System.ReadOnlySpan value) { throw null; } public static int GetHashCode(System.ReadOnlySpan value) { throw null; } + public static int GetHashCodeIgnoreCase(System.ReadOnlySpan value) { throw null; } public static int GetHashCodeIgnoreCase(System.ReadOnlySpan value) { throw null; } public static bool Equals(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } public static bool EqualsIgnoreCase(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } From 0ffd3ee574dd5c34e49baea7416ac1a2c3116e32 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Tue, 6 Sep 2022 08:30:34 +0200 Subject: [PATCH 33/46] solve buffer overrun: 8 chars need to be narrowed to 8 (not 16 like before) bytes in case of ASCII case conversion --- .../src/System/Buffers/Text/Ascii.CaseConversion.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs index 35e1f0a89ccfa3..a70544afa4e7fe 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs @@ -455,7 +455,7 @@ private static unsafe void ChangeWidthAndWriteTo(Vector128 ve Vector128 narrow = (Sse2.IsSupported) ? Sse2.PackUnsignedSaturate(vector.AsInt16(), vector.AsInt16()) : Vector128.Narrow(vector.AsUInt16(), vector.AsUInt16()); - Vector128.StoreUnsafe(narrow, ref *(byte*)pDest, elementOffset); + narrow.GetLower().StoreUnsafe(ref *(byte*)pDest, elementOffset); } else { From 6f96b8be36e982bc0f1ab4e7ad93fe1ea3c7f09c Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Tue, 6 Sep 2022 15:39:05 +0200 Subject: [PATCH 34/46] disable the tests that are failing due to Mono bug --- src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs b/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs index e560521a484cd5..bb07b58b6c725c 100644 --- a/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs @@ -8,6 +8,7 @@ namespace System.Buffers.Text.Tests { + [ActiveIssue("https://github.com/dotnet/runtime/issues/75125", TestRuntimes.Mono)] public class IndexOfTests { [Fact] From 3d5df19564dec9e76029af670def0b917585f8f1 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Wed, 7 Sep 2022 12:45:22 +0200 Subject: [PATCH 35/46] fix a bug (tests that are not compiled are always passing) --- .../System.Memory/tests/Ascii/EqualsTests.cs | 17 +++++++++-------- .../tests/System.Memory.Tests.csproj | 1 + .../src/System/Buffers/Text/Ascii.Comparison.cs | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/libraries/System.Memory/tests/Ascii/EqualsTests.cs b/src/libraries/System.Memory/tests/Ascii/EqualsTests.cs index 437ce7fe10b099..512c8274334cdb 100644 --- a/src/libraries/System.Memory/tests/Ascii/EqualsTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/EqualsTests.cs @@ -54,13 +54,16 @@ public static IEnumerable ExactMatchNotFound_TestData { yield return new object[] { "tak", "nie" }; - for (char i = (char)0; i <= 127; i++) + for (char i = (char)1; i <= 127; i++) { - yield return new object[] { new string(i, i), string.Create(i, i, (destination, iteration) => + if (i != '?') // ASCIIEncoding maps invalid ASCII to ? { - destination.Fill((char)iteration) - destination[iteration / 2] = 128; - })}; + yield return new object[] { new string(i, i), string.Create(i, i, (destination, iteration) => + { + destination.Fill((char)iteration); + destination[iteration / 2] = (char)128; + })}; + } } } } @@ -84,7 +87,7 @@ public static IEnumerable IgnoreCaseMatch_TestData for (char i = (char)0; i <= 127; i++) { - char left = (char)i; + char left = i; char right = char.IsAsciiLetterUpper(left) ? char.ToLower(left) : char.IsAsciiLetterLower(left) ? char.ToUpper(left) : left; yield return new object[] { new string(left, i), new string(right, i) }; } @@ -95,8 +98,6 @@ public static IEnumerable IgnoreCaseMatch_TestData [MemberData(nameof(IgnoreCaseMatch_TestData))] public void IgnoreCaseMatchFound(string left, string right) { - Assert.True(Ascii.Equals(Encoding.ASCII.GetBytes(left), right)); - Assert.True(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), Encoding.ASCII.GetBytes(right))); Assert.True(Ascii.EqualsIgnoreCase(left, right)); Assert.True(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), right)); diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index d652422e051baf..928f526de0073c 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -14,6 +14,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs index 2b7e0c2ac34f93..016e97d5dffc31 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs @@ -107,7 +107,7 @@ public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan => left.Length == right.Length && Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(left), ref MemoryMarshal.GetReference(right), left.Length); public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan right) - => left.Length == right.Length && Equals(right, left) == EqualsResult.Match; + => left.Length == right.Length && SequenceEqualIgnoreCase(right, left) == EqualsResult.Match; public static unsafe bool StartsWith(ReadOnlySpan text, ReadOnlySpan value) => value.IsEmpty || (text.Length >= value.Length && Map(Equals(value, text.Slice(0, value.Length)))); From 518ef059ae823c8a8a45733556eaec36eefa0430 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Wed, 7 Sep 2022 12:48:24 +0200 Subject: [PATCH 36/46] use new APIs across BCL: use Trim and EqualsIgnoreCase in CharArrayHelpers (used by System.Net.Http and System.Net.Http.WinHttpHandler) use Equals in various System.Net.Http places use EqualsIgnoreCase in CaseInsensitiveAscii (used by System.Net.WebHeaderCollection), force it's inlining use TryToLowerInPlace in System.Net.HttpListener use ToLower in QPackEncoder (used by System.Net.Http) use IndexOf in Http2LoopbackConnection (used by System.Net.Http* test projects) use IsAscii and FromUtf16 in PInvokeMarshal (NatvieAOT) --- .../Runtime/InteropServices/PInvokeMarshal.cs | 48 +------ .../Common/src/System/CharArrayHelpers.cs | 14 ++ .../src/System/Net/CaseInsensitiveAscii.cs | 29 +--- .../aspnetcore/Http2/Hpack/HPackEncoder.cs | 12 +- .../aspnetcore/Http3/QPack/QPackEncoder.cs | 30 +---- .../Net/Http/Http2LoopbackConnection.cs | 4 + .../src/System.Net.Http.csproj | 1 - .../src/System/Net/Http/ByteArrayHelpers.cs | 62 --------- .../Headers/ContentDispositionHeaderValue.cs | 3 +- .../Net/Http/Headers/HeaderDescriptor.cs | 5 +- .../Net/Http/Headers/HeaderUtilities.cs | 14 -- .../System/Net/Http/Headers/KnownHeaders.cs | 3 +- .../AuthenticationHelper.Digest.cs | 3 +- .../Http/SocketsHttpHandler/HttpConnection.cs | 24 ++-- .../SocketsHttpHandler/HttpConnectionBase.cs | 3 +- .../System.Net.Http.Unit.Tests.csproj | 2 - .../src/System.Net.HttpListener.csproj | 2 - .../src/System/Net/HttpListener.cs | 65 ++++++--- .../System/Net/Mail/DomainLiteralReader.cs | 3 +- .../src/System/Net/Mail/DotAtomReader.cs | 3 +- .../src/System/Net/Mail/MailBnfHelper.cs | 10 +- .../src/System/Net/Mail/QuotedPairReader.cs | 3 +- .../Net/Mail/QuotedStringFormatReader.cs | 3 +- .../src/System/Net/Mail/SmtpClient.cs | 3 +- .../src/System/Net/Mail/WhitespaceReader.cs | 3 +- .../src/System/Net/Mime/MimeBasePart.cs | 14 +- .../src/System/Net/WebClient.cs | 8 +- .../System/Buffers/Text/Ascii.Comparison.cs | 1 + .../src/System/Globalization/TextInfo.cs | 124 ++++-------------- .../src/System/Text/ASCIIEncoding.cs | 7 +- .../src/System/UriHelper.cs | 5 +- .../Internal/Utilities/MemoryBlock.cs | 4 + .../Text/RegularExpressions/RegexCharClass.cs | 6 +- 33 files changed, 174 insertions(+), 347 deletions(-) delete mode 100644 src/libraries/System.Net.Http/src/System/Net/Http/ByteArrayHelpers.cs diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs index 3676a0348ef7b2..17fd037a4b1d68 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs @@ -10,6 +10,8 @@ using Internal.Runtime.Augments; using Internal.Runtime.CompilerHelpers; using Internal.Runtime.CompilerServices; +using System.Buffers.Text; +using System.Buffers; namespace System.Runtime.InteropServices { @@ -496,17 +498,7 @@ public static unsafe char AnsiCharToWideChar(byte nativeValue) internal static unsafe byte* StringToAnsiString(char* pManaged, int lenUnicode, byte* pNative, bool terminateWithNull, bool bestFit, bool throwOnUnmappableChar) { - bool allAscii = true; - - for (int i = 0; i < lenUnicode; i++) - { - if (pManaged[i] >= 128) - { - allAscii = false; - break; - } - } - + bool allAscii = Ascii.IsAscii(new ReadOnlySpan(pManaged, lenUnicode)); int length; if (allAscii) // If all ASCII, map one UNICODE character to one ANSI char @@ -524,17 +516,8 @@ public static unsafe char AnsiCharToWideChar(byte nativeValue) } if (allAscii) // ASCII conversion { - byte* pDst = pNative; - char* pSrc = pManaged; - - while (lenUnicode > 0) - { - unchecked - { - *pDst++ = (byte)(*pSrc++); - lenUnicode--; - } - } + OperationStatus conversionStatus = Ascii.FromUtf16(new ReadOnlySpan(pManaged, length), new Span(pNative, length), out _, out _); + Debug.Assert(conversionStatus == OperationStatus.Done); } else // Let OS convert { @@ -560,26 +543,9 @@ public static unsafe char AnsiCharToWideChar(byte nativeValue) /// private static unsafe bool CalculateStringLength(byte* pchBuffer, out int ansiBufferLen, out int unicodeBufferLen) { - ansiBufferLen = 0; + ansiBufferLen = SpanHelpers.IndexOfNullByte(ref *pchBuffer); - bool allAscii = true; - - { - byte* p = pchBuffer; - byte b = *p++; - - while (b != 0) - { - if (b >= 128) - { - allAscii = false; - } - - ansiBufferLen++; - - b = *p++; - } - } + bool allAscii = Ascii.IsAscii(new ReadOnlySpan(pchBuffer, ansiBufferLen)); if (allAscii) { diff --git a/src/libraries/Common/src/System/CharArrayHelpers.cs b/src/libraries/Common/src/System/CharArrayHelpers.cs index 371754c4386a36..24f1d388cb5fb3 100644 --- a/src/libraries/Common/src/System/CharArrayHelpers.cs +++ b/src/libraries/Common/src/System/CharArrayHelpers.cs @@ -2,11 +2,18 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; +using System.Runtime.CompilerServices; namespace System { internal static class CharArrayHelpers { + +#if NET7_0_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static bool EqualsOrdinalAsciiIgnoreCase(string left, char[] right, int rightStartIndex, int rightLength) + => Buffers.Text.Ascii.EndsWithIgnoreCase(left, right.AsSpan(rightStartIndex, rightLength)); +#else // used by System.Net.Http.WinHttpHandler which targets older TFMs internal static bool EqualsOrdinalAsciiIgnoreCase(string left, char[] right, int rightStartIndex, int rightLength) { Debug.Assert(left != null, "Expected non-null string"); @@ -37,11 +44,17 @@ internal static bool EqualsOrdinalAsciiIgnoreCase(string left, char[] right, int return true; } +#endif internal static void Trim(char[] array, ref int startIndex, ref int length) { DebugAssertArrayInputs(array, startIndex, length); +#if NET7_0_OR_GREATER + Range range = Buffers.Text.Ascii.Trim(new ReadOnlySpan(array, startIndex, length)); + startIndex += range.Start.Value; + length = range.End.Value - range.Start.Value; +#else int offset = 0; while (offset < length && char.IsWhiteSpace(array[startIndex + offset])) { @@ -56,6 +69,7 @@ internal static void Trim(char[] array, ref int startIndex, ref int length) startIndex += offset; length = end - offset + 1; +#endif } [Conditional("DEBUG")] diff --git a/src/libraries/Common/src/System/Net/CaseInsensitiveAscii.cs b/src/libraries/Common/src/System/Net/CaseInsensitiveAscii.cs index 4bfdf23ca7cd84..29bdfadfbaea5f 100644 --- a/src/libraries/Common/src/System/Net/CaseInsensitiveAscii.cs +++ b/src/libraries/Common/src/System/Net/CaseInsensitiveAscii.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Collections; namespace System.Net @@ -84,17 +85,6 @@ public int Compare(object? firstObject, object? secondObject) return result; } - // ASCII string case insensitive hash function - private static int FastGetHashCode(string myString) - { - int myHashCode = myString.Length; - if (myHashCode != 0) - { - myHashCode ^= AsciiToLower[(byte)myString[0]] << 24 ^ AsciiToLower[(byte)myString[myHashCode - 1]] << 16; - } - return myHashCode; - } - // ASCII string case insensitive comparer public new bool Equals(object? firstObject, object? secondObject) { @@ -106,22 +96,7 @@ private static int FastGetHashCode(string myString) } if (secondString != null) { - int index = firstString.Length; - if (index == secondString.Length) - { - if (FastGetHashCode(firstString) == FastGetHashCode(secondString)) - { - while (index > 0) - { - index--; - if (AsciiToLower[firstString[index]] != AsciiToLower[secondString[index]]) - { - return false; - } - } - return true; - } - } + return Ascii.EqualsIgnoreCase(firstString, secondString); } return false; } diff --git a/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/HPackEncoder.cs b/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/HPackEncoder.cs index 780e7433c32afb..c2bfd14a38af16 100644 --- a/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/HPackEncoder.cs +++ b/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/HPackEncoder.cs @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. #nullable enable +using System.Buffers; +using System.Buffers.Text; using System.Collections.Generic; using System.Diagnostics; using System.Text; @@ -400,15 +402,9 @@ private static void EncodeValueStringPart(string value, Span destination) { Debug.Assert(destination.Length >= value.Length); - for (int i = 0; i < value.Length; i++) + if (Ascii.FromUtf16(value, destination, out _, out _) == OperationStatus.InvalidData) { - char c = value[i]; - if ((c & 0xFF80) != 0) - { - throw new HttpRequestException(SR.net_http_request_invalid_char_encoding); - } - - destination[i] = (byte)c; + throw new HttpRequestException(SR.net_http_request_invalid_char_encoding); } } diff --git a/src/libraries/Common/src/System/Net/Http/aspnetcore/Http3/QPack/QPackEncoder.cs b/src/libraries/Common/src/System/Net/Http/aspnetcore/Http3/QPack/QPackEncoder.cs index 53ecff40a399f8..5cb195c46fcb5a 100644 --- a/src/libraries/Common/src/System/Net/Http/aspnetcore/Http3/QPack/QPackEncoder.cs +++ b/src/libraries/Common/src/System/Net/Http/aspnetcore/Http3/QPack/QPackEncoder.cs @@ -2,7 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. #nullable enable -using System.Collections.Generic; +using System.Buffers; +using System.Buffers.Text; using System.Diagnostics; using System.Net.Http.HPack; using System.Text; @@ -311,23 +312,14 @@ private static void EncodeValueStringPart(string s, Span buffer) { Debug.Assert(buffer.Length >= s.Length); - for (int i = 0; i < s.Length; ++i) + if (Ascii.FromUtf16(s, buffer, out _, out _) == OperationStatus.InvalidData) { - char ch = s[i]; - - if (ch > 127) - { - throw new QPackEncodingException(SR.net_http_request_invalid_char_encoding); - } - - buffer[i] = (byte)ch; + throw new HttpRequestException(SR.net_http_request_invalid_char_encoding); } } private static bool EncodeNameString(string s, Span buffer, out int length) { - const int toLowerMask = 0x20; - if (buffer.Length != 0) { buffer[0] = 0x30; @@ -338,18 +330,8 @@ private static bool EncodeNameString(string s, Span buffer, out int length if (buffer.Length >= s.Length) { - for (int i = 0; i < s.Length; ++i) - { - int ch = s[i]; - Debug.Assert(ch <= 127, "HttpHeaders prevents adding non-ASCII header names."); - - if ((uint)(ch - 'A') <= 'Z' - 'A') - { - ch |= toLowerMask; - } - - buffer[i] = (byte)ch; - } + OperationStatus toLowerStatus = Ascii.ToLower(s, buffer, out _, out _); + Debug.Assert(toLowerStatus == OperationStatus.Done, "HttpHeaders prevents adding non-ASCII header names."); length = nameLength + s.Length; return true; diff --git a/src/libraries/Common/tests/System/Net/Http/Http2LoopbackConnection.cs b/src/libraries/Common/tests/System/Net/Http/Http2LoopbackConnection.cs index 6263483807c76a..919333c987447f 100644 --- a/src/libraries/Common/tests/System/Net/Http/Http2LoopbackConnection.cs +++ b/src/libraries/Common/tests/System/Net/Http/Http2LoopbackConnection.cs @@ -91,7 +91,11 @@ private async Task ReadPrefixAsync() throw new Exception("Connection stream closed while attempting to read connection preface."); } +#if NETFRAMEWORK if (Text.Encoding.ASCII.GetString(_prefix).Contains("HTTP/1.1")) +#else + if (Buffers.Text.Ascii.IndexOf(_prefix, "HTTP/1.1") >= 0) +#endif { // Tests that use HttpAgnosticLoopbackServer will attempt to send an HTTP/1.1 request to an HTTP/2 server. // This is invalid and we should terminate the connection. diff --git a/src/libraries/System.Net.Http/src/System.Net.Http.csproj b/src/libraries/System.Net.Http/src/System.Net.Http.csproj index fadb5f1bd51ec4..f61dabff43412a 100644 --- a/src/libraries/System.Net.Http/src/System.Net.Http.csproj +++ b/src/libraries/System.Net.Http/src/System.Net.Http.csproj @@ -29,7 +29,6 @@ - diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/ByteArrayHelpers.cs b/src/libraries/System.Net.Http/src/System/Net/Http/ByteArrayHelpers.cs deleted file mode 100644 index d6299477cf8d71..00000000000000 --- a/src/libraries/System.Net.Http/src/System/Net/Http/ByteArrayHelpers.cs +++ /dev/null @@ -1,62 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; - -namespace System -{ - internal static class ByteArrayHelpers - { - // TODO: https://github.com/dotnet/runtime/issues/28230 - // Use Ascii.Equals* when it's available. - - internal static bool EqualsOrdinalAsciiIgnoreCase(string left, ReadOnlySpan right) - { - Debug.Assert(left != null, "Expected non-null string"); - - if (left.Length != right.Length) - { - return false; - } - - for (int i = 0; i < left.Length; i++) - { - uint charA = left[i]; - uint charB = right[i]; - - // We're only interested in ASCII characters here. - if ((charA - 'a') <= ('z' - 'a')) - charA -= ('a' - 'A'); - if ((charB - 'a') <= ('z' - 'a')) - charB -= ('a' - 'A'); - - if (charA != charB) - { - return false; - } - } - - return true; - } - - internal static bool EqualsOrdinalAscii(string left, ReadOnlySpan right) - { - Debug.Assert(left != null, "Expected non-null string"); - - if (left.Length != right.Length) - { - return false; - } - - for (int i = 0; i < left.Length; i++) - { - if (left[i] != right[i]) - { - return false; - } - } - - return true; - } - } -} diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/ContentDispositionHeaderValue.cs b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/ContentDispositionHeaderValue.cs index 94d50128c5d330..85860b8c693829 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/ContentDispositionHeaderValue.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/ContentDispositionHeaderValue.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; @@ -422,7 +423,7 @@ private static string EncodeAndQuoteMime(string input) throw new ArgumentException(SR.Format(CultureInfo.InvariantCulture, SR.net_http_headers_invalid_value, input)); } - else if (HeaderUtilities.ContainsNonAscii(result)) + else if (!Ascii.IsAscii(result)) { needsQuotes = true; // Encoded data must always be quoted, the equals signs are invalid in tokens. result = EncodeMime(result); // =?utf-8?B?asdfasdfaesdf?= diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderDescriptor.cs b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderDescriptor.cs index d04c9f4877e48e..74a0e56b03511f 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderDescriptor.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderDescriptor.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers; +using System.Buffers.Text; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Text; @@ -143,7 +144,7 @@ public string GetHeaderValue(ReadOnlySpan headerValue, Encoding? valueEnco { for (int i = 0; i < knownValues.Length; i++) { - if (ByteArrayHelpers.EqualsOrdinalAscii(knownValues[i], headerValue)) + if (Ascii.Equals(headerValue, knownValues[i])) { return knownValues[i]; } @@ -251,7 +252,7 @@ public string GetHeaderValue(ReadOnlySpan headerValue, Encoding? valueEnco Debug.Assert(candidate is null || candidate.Length == contentTypeValue.Length); - return candidate != null && ByteArrayHelpers.EqualsOrdinalAscii(candidate, contentTypeValue) ? + return candidate != null && Ascii.Equals(contentTypeValue, candidate) ? candidate : null; } diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderUtilities.cs b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderUtilities.cs index 14b1075ec1f775..f597051f5afba2 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderUtilities.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderUtilities.cs @@ -58,20 +58,6 @@ internal static void SetQuality(UnvalidatedObjectCollection 0x7f) - { - return true; - } - } - return false; - } - // Encode a string using RFC 5987 encoding. // encoding'lang'PercentEncodedSpecials internal static string Encode5987(string input) diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/KnownHeaders.cs b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/KnownHeaders.cs index d1b624d06260ea..f040d7d66310db 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/KnownHeaders.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/KnownHeaders.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Net.Http.HPack; using System.Net.Http.QPack; using System.Runtime.InteropServices; @@ -429,7 +430,7 @@ public BytePtrAccessor(byte* p, int length) fixed (byte* p = &MemoryMarshal.GetReference(name)) { KnownHeader? candidate = GetCandidate(new BytePtrAccessor(p, name.Length)); - if (candidate != null && ByteArrayHelpers.EqualsOrdinalAsciiIgnoreCase(candidate.Name, name)) + if (candidate != null && Ascii.EqualsIgnoreCase(name, candidate.Name)) { return candidate; } diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/AuthenticationHelper.Digest.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/AuthenticationHelper.Digest.cs index 5e5a7c741aa735..023df8bbcbb1cc 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/AuthenticationHelper.Digest.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/AuthenticationHelper.Digest.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Collections.Generic; using System.Diagnostics; using System.IO; @@ -88,7 +89,7 @@ internal static partial class AuthenticationHelper } else { - if (HeaderUtilities.ContainsNonAscii(credential.UserName)) + if (!Ascii.IsAscii(credential.UserName)) { string usernameStar = HeaderUtilities.Encode5987(credential.UserName); sb.AppendKeyValue(UsernameStar, usernameStar, includeQuotes: false); diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnection.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnection.cs index d0bef3bc27a23a..18d332abea017f 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnection.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnection.cs @@ -1023,7 +1023,7 @@ private static void ParseStatusLine(ReadOnlySpan line, HttpResponseMessage { ReadOnlySpan reasonBytes = line.Slice(MinStatusLineLength + 1); string? knownReasonPhrase = HttpStatusDescription.Get(response.StatusCode); - if (knownReasonPhrase != null && ByteArrayHelpers.EqualsOrdinalAscii(knownReasonPhrase, reasonBytes)) + if (knownReasonPhrase != null && Ascii.Equals(reasonBytes, knownReasonPhrase)) { response.SetReasonPhraseWithoutValidation(knownReasonPhrase); } @@ -1448,12 +1448,10 @@ private Task WriteAsciiStringAsync(string s, bool async) int offset = _writeOffset; if (s.Length <= _writeBuffer.Length - offset) { - byte[] writeBuffer = _writeBuffer; - foreach (char c in s) - { - writeBuffer[offset++] = (byte)c; - } - _writeOffset = offset; + OperationStatus operationStatus = Ascii.FromUtf16(s, _writeBuffer.AsSpan(offset), out _, out int bytesWritten); + Debug.Assert(operationStatus == OperationStatus.Done); + _writeOffset = offset + bytesWritten; + return Task.CompletedTask; } @@ -1464,14 +1462,14 @@ private Task WriteAsciiStringAsync(string s, bool async) private async Task WriteStringAsyncSlow(string s, bool async) { + if (!Ascii.IsAscii(s)) + { + throw new HttpRequestException(SR.net_http_request_invalid_char_encoding); + } + for (int i = 0; i < s.Length; i++) { - char c = s[i]; - if ((c & 0xFF80) != 0) - { - throw new HttpRequestException(SR.net_http_request_invalid_char_encoding); - } - await WriteByteAsync((byte)c, async).ConfigureAwait(false); + await WriteByteAsync((byte)s[i], async).ConfigureAwait(false); } } diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnectionBase.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnectionBase.cs index 45eae0fa57649f..b982d1774bf1e1 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnectionBase.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnectionBase.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.IO; @@ -31,7 +32,7 @@ public string GetResponseHeaderValueWithCaching(HeaderDescriptor descriptor, Rea static string GetOrAddCachedValue([NotNull] ref string? cache, HeaderDescriptor descriptor, ReadOnlySpan value, Encoding? encoding) { string? lastValue = cache; - if (lastValue is null || !ByteArrayHelpers.EqualsOrdinalAscii(lastValue, value)) + if (lastValue is null || !Ascii.Equals(value, lastValue)) { cache = lastValue = descriptor.GetHeaderValue(value, encoding); } diff --git a/src/libraries/System.Net.Http/tests/UnitTests/System.Net.Http.Unit.Tests.csproj b/src/libraries/System.Net.Http/tests/UnitTests/System.Net.Http.Unit.Tests.csproj index 85139c5391ff85..f03f0d76d40085 100644 --- a/src/libraries/System.Net.Http/tests/UnitTests/System.Net.Http.Unit.Tests.csproj +++ b/src/libraries/System.Net.Http/tests/UnitTests/System.Net.Http.Unit.Tests.csproj @@ -72,8 +72,6 @@ Link="ProductionCode\System\Net\Http\DelegatingHandler.cs" /> - - + { + if (state.uriPrefix[state.j] == ':') + { + state.uriPrefix.CopyTo(destination); + } + else + { + int indexOfNextCopy = state.j; + state.uriPrefix.AsSpan(0, indexOfNextCopy).CopyTo(destination); + + if (state.i == 7) + { + ":80".CopyTo(destination.Slice(indexOfNextCopy)); + indexOfNextCopy += 3; + } + else + { + ":443".CopyTo(destination.Slice(indexOfNextCopy)); + indexOfNextCopy += 4; + } + + state.uriPrefix.AsSpan(state.j).CopyTo(destination.Slice(indexOfNextCopy)); + } + + int toLowerLength = destination.IndexOf(':'); + if (toLowerLength == -1) + { + toLowerLength = destination.Length; + } + + if (!Ascii.TryToLowerInPlace(destination.Slice(0, toLowerLength), out _)) + { + throw new IndexOutOfRangeException(); // backward compat for non-ASCII characters + } + }); + } } internal bool ContainsPrefix(string uriPrefix) => _uriPrefixes.Contains(uriPrefix); diff --git a/src/libraries/System.Net.Mail/src/System/Net/Mail/DomainLiteralReader.cs b/src/libraries/System.Net.Mail/src/System/Net/Mail/DomainLiteralReader.cs index 2f03cf45b458c8..ab58bd34868cbb 100644 --- a/src/libraries/System.Net.Mail/src/System/Net/Mail/DomainLiteralReader.cs +++ b/src/libraries/System.Net.Mail/src/System/Net/Mail/DomainLiteralReader.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Diagnostics; using System.Net.Mime; @@ -70,7 +71,7 @@ internal static bool TryReadReverse(string data, int index, out int outIndex, bo return true; } // Check for invalid characters - else if (data[index] > MailBnfHelper.Ascii7bitMaxValue || !MailBnfHelper.Dtext[data[index]]) + else if (!Ascii.IsAscii(data[index]) || !MailBnfHelper.Dtext[data[index]]) { if (throwExceptionIfFail) { diff --git a/src/libraries/System.Net.Mail/src/System/Net/Mail/DotAtomReader.cs b/src/libraries/System.Net.Mail/src/System/Net/Mail/DotAtomReader.cs index f50b9946f4fe15..3a0f50aff1967b 100644 --- a/src/libraries/System.Net.Mail/src/System/Net/Mail/DotAtomReader.cs +++ b/src/libraries/System.Net.Mail/src/System/Net/Mail/DotAtomReader.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Diagnostics; using System.Net.Mime; @@ -43,7 +44,7 @@ internal static bool TryReadReverse(string data, int index, out int outIndex, bo // Scan for the first invalid chars (including whitespace) for (; 0 <= index; index--) { - if (data[index] <= MailBnfHelper.Ascii7bitMaxValue // Any Unicode allowed + if (Ascii.IsAscii(data[index]) // Any ASCII allowed && (data[index] != MailBnfHelper.Dot && !MailBnfHelper.Atext[data[index]])) // Invalid char { break; diff --git a/src/libraries/System.Net.Mail/src/System/Net/Mail/MailBnfHelper.cs b/src/libraries/System.Net.Mail/src/System/Net/Mail/MailBnfHelper.cs index b932fa90f0667c..3bc33882d7e832 100644 --- a/src/libraries/System.Net.Mail/src/System/Net/Mail/MailBnfHelper.cs +++ b/src/libraries/System.Net.Mail/src/System/Net/Mail/MailBnfHelper.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Diagnostics; using System.Text; @@ -26,7 +27,6 @@ internal static class MailBnfHelper // characters allowed inside of comments internal static readonly bool[] Ctext = CreateCharactersAllowedInComments(); - internal const int Ascii7bitMaxValue = 127; internal const char Quote = '\"'; internal const char Space = ' '; internal const char Tab = '\t'; @@ -226,11 +226,11 @@ internal static void ValidateHeaderName(string data) { //if data contains Unicode and Unicode is permitted, then //it is valid in a quoted string in a header. - if (data[offset] <= Ascii7bitMaxValue && !Qtext[data[offset]]) + if (Ascii.IsAscii(data[offset]) && !Qtext[data[offset]]) throw new FormatException(SR.Format(SR.MailHeaderFieldInvalidCharacter, data[offset])); } //not permitting Unicode, in which case Unicode is a formatting error - else if (data[offset] > Ascii7bitMaxValue || !Qtext[data[offset]]) + else if (!Ascii.IsAscii(data[offset]) || !Qtext[data[offset]]) { throw new FormatException(SR.Format(SR.MailHeaderFieldInvalidCharacter, data[offset])); } @@ -256,7 +256,7 @@ internal static string ReadToken(string data, ref int offset, StringBuilder? bui int start = offset; for (; offset < data.Length; offset++) { - if (data[offset] > Ascii7bitMaxValue) + if (!Ascii.IsAscii(data[offset])) { throw new FormatException(SR.Format(SR.MailHeaderFieldInvalidCharacter, data[offset])); } @@ -367,7 +367,7 @@ internal static void GetTokenOrQuotedString(string data, StringBuilder builder, private static bool CheckForUnicode(char ch, bool allowUnicode) { - if (ch < Ascii7bitMaxValue) + if (Ascii.IsAscii(ch)) { return false; } diff --git a/src/libraries/System.Net.Mail/src/System/Net/Mail/QuotedPairReader.cs b/src/libraries/System.Net.Mail/src/System/Net/Mail/QuotedPairReader.cs index 34079edf51c8f7..e2c0422775f1ef 100644 --- a/src/libraries/System.Net.Mail/src/System/Net/Mail/QuotedPairReader.cs +++ b/src/libraries/System.Net.Mail/src/System/Net/Mail/QuotedPairReader.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Diagnostics; using System.Net.Mime; @@ -52,7 +53,7 @@ internal static bool TryCountQuotedChars(string data, int index, bool permitUnic } else { - if (!permitUnicodeEscaping && data[index] > MailBnfHelper.Ascii7bitMaxValue) + if (!permitUnicodeEscaping && !Ascii.IsAscii(data[index])) { if (throwExceptionIfFail) { diff --git a/src/libraries/System.Net.Mail/src/System/Net/Mail/QuotedStringFormatReader.cs b/src/libraries/System.Net.Mail/src/System/Net/Mail/QuotedStringFormatReader.cs index e12d731640074d..aa59ce553f2d3a 100644 --- a/src/libraries/System.Net.Mail/src/System/Net/Mail/QuotedStringFormatReader.cs +++ b/src/libraries/System.Net.Mail/src/System/Net/Mail/QuotedStringFormatReader.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Diagnostics; using System.Net.Mime; @@ -185,7 +186,7 @@ internal static bool TryReadReverseUnQuoted(string data, int index, bool permitU // non-whitespace control characters as well as all remaining ASCII chars except backslash and double quote. private static bool IsValidQtext(bool allowUnicode, char ch) { - if (ch > MailBnfHelper.Ascii7bitMaxValue) + if (!Ascii.IsAscii(ch)) { return allowUnicode; } diff --git a/src/libraries/System.Net.Mail/src/System/Net/Mail/SmtpClient.cs b/src/libraries/System.Net.Mail/src/System/Net/Mail/SmtpClient.cs index 45c3664531ba51..52d8a55b12f86b 100644 --- a/src/libraries/System.Net.Mail/src/System/Net/Mail/SmtpClient.cs +++ b/src/libraries/System.Net.Mail/src/System/Net/Mail/SmtpClient.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.ComponentModel; using System.Diagnostics.CodeAnalysis; using System.Globalization; @@ -143,7 +144,7 @@ private void Initialize() for (int i = 0; i < clientDomainRaw.Length; i++) { ch = clientDomainRaw[i]; - if ((ushort)ch <= 0x7F) + if (Ascii.IsAscii(ch)) sb.Append(ch); } if (sb.Length > 0) diff --git a/src/libraries/System.Net.Mail/src/System/Net/Mail/WhitespaceReader.cs b/src/libraries/System.Net.Mail/src/System/Net/Mail/WhitespaceReader.cs index f8b4c0a27d5c34..9bd4e0cb62e48c 100644 --- a/src/libraries/System.Net.Mail/src/System/Net/Mail/WhitespaceReader.cs +++ b/src/libraries/System.Net.Mail/src/System/Net/Mail/WhitespaceReader.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Diagnostics; using System.Net.Mime; @@ -166,7 +167,7 @@ internal static bool TryReadCfwsReverse(string data, int index, out int outIndex } // Check for valid characters within comments. Allow Unicode, as we won't transmit any comments. else if (commentDepth > 0 - && (data[index] > MailBnfHelper.Ascii7bitMaxValue || MailBnfHelper.Ctext[data[index]])) + && (!Ascii.IsAscii(data[index]) || MailBnfHelper.Ctext[data[index]])) { index--; } diff --git a/src/libraries/System.Net.Mail/src/System/Net/Mime/MimeBasePart.cs b/src/libraries/System.Net.Mail/src/System/Net/Mime/MimeBasePart.cs index 50f1d368f591ce..ef893a91059d40 100644 --- a/src/libraries/System.Net.Mail/src/System/Net/Mime/MimeBasePart.cs +++ b/src/libraries/System.Net.Mail/src/System/Net/Mime/MimeBasePart.cs @@ -4,6 +4,7 @@ using System.Collections.Specialized; using System.Text; using System.Net.Mail; +using System.Buffers.Text; namespace System.Net.Mime { @@ -111,18 +112,7 @@ internal static bool IsAscii(string value, bool permitCROrLF) { ArgumentNullException.ThrowIfNull(value); - foreach (char c in value) - { - if (c > 0x7f) - { - return false; - } - if (!permitCROrLF && (c == '\r' || c == '\n')) - { - return false; - } - } - return true; + return Ascii.IsAscii(value) && (permitCROrLF || value.AsSpan().IndexOfAny('\r', '\n') == -1); } internal string? ContentID diff --git a/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs b/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs index 89a8f5375e2a19..0c7fe9e106dac9 100644 --- a/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs +++ b/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs @@ -1,6 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; +using System.Buffers.Text; using System.Collections.Specialized; using System.ComponentModel; using System.Diagnostics; @@ -507,7 +509,11 @@ private void OpenFileInternal( "Content-Type: " + contentType + "\r\n" + "\r\n"; formHeaderBytes = Encoding.UTF8.GetBytes(formHeader); - boundaryBytes = Encoding.ASCII.GetBytes("\r\n--" + boundary + "--\r\n"); + + string boundaryBytesText = "\r\n--" + boundary + "--\r\n"; + boundaryBytes = new byte[boundaryBytesText.Length]; + OperationStatus conversionStatus = Ascii.FromUtf16(boundaryBytesText, boundaryBytes, out _, out _); + Debug.Assert(conversionStatus == OperationStatus.Done); } else { diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs index 016e97d5dffc31..274861f601a2cf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs @@ -103,6 +103,7 @@ public static bool Equals(ReadOnlySpan left, ReadOnlySpan right) public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan right) => left.Length == right.Length && SequenceEqualIgnoreCase(left, right) == EqualsResult.Match; + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan right) => left.Length == right.Length && Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(left), ref MemoryMarshal.GetReference(right), left.Length); diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs index 547601d74d7def..4b4317327ae2b7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs @@ -1,6 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; +using System.Buffers.Text; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; @@ -190,128 +192,48 @@ private unsafe char ChangeCase(char c, bool toUpper) internal void ChangeCaseToLower(ReadOnlySpan source, Span destination) { Debug.Assert(destination.Length >= source.Length); - ChangeCaseCommon(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length); + ChangeCaseCommon(source, destination); } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal void ChangeCaseToUpper(ReadOnlySpan source, Span destination) { Debug.Assert(destination.Length >= source.Length); - ChangeCaseCommon(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length); + ChangeCaseCommon(source, destination); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private void ChangeCaseCommon(ReadOnlySpan source, Span destination) where TConversion : struct + private unsafe void ChangeCaseCommon(ReadOnlySpan source, Span destination) where TConversion : struct { - Debug.Assert(destination.Length >= source.Length); - ChangeCaseCommon(ref MemoryMarshal.GetReference(source), ref MemoryMarshal.GetReference(destination), source.Length); - } - - private unsafe void ChangeCaseCommon(ref char source, ref char destination, int charCount) where TConversion : struct - { - Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion)); - bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds - Debug.Assert(!GlobalizationMode.Invariant); - Debug.Assert(charCount >= 0); + Debug.Assert(typeof(TConversion) == typeof(ToUpperConversion) || typeof(TConversion) == typeof(ToLowerConversion)); - if (charCount == 0) + if (source.IsEmpty) { - goto Return; + return; } - fixed (char* pSource = &source) - fixed (char* pDestination = &destination) + bool toUpper = typeof(TConversion) == typeof(ToUpperConversion); // JIT will treat this as a constant in release builds + int charsConsumed = 0; + + if (IsAsciiCasingSameAsInvariant) { - nuint currIdx = 0; // in chars + OperationStatus operationStatus = toUpper + ? Ascii.ToUpper(source, destination, out charsConsumed, out _) + : Ascii.ToLower(source, destination, out charsConsumed, out _); - if (IsAsciiCasingSameAsInvariant) + if (operationStatus != OperationStatus.InvalidData) { - // Read 4 chars (two 32-bit integers) at a time - - if (charCount >= 4) - { - nuint lastIndexWhereCanReadFourChars = (uint)charCount - 4; - do - { - // This is a mostly branchless case change routine. Generally speaking, we assume that the majority - // of input is ASCII, so the 'if' checks below should normally evaluate to false. However, within - // the ASCII data, we expect that characters of either case might be about equally distributed, so - // we want the case change operation itself to be branchless. This gives optimal performance in the - // common case. We also expect that developers aren't passing very long (16+ character) strings into - // this method, so we won't bother vectorizing until data shows us that it's worthwhile to do so. - - uint tempValue = Unsafe.ReadUnaligned(pSource + currIdx); - if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue)) - { - goto NonAscii; - } - tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue); - Unsafe.WriteUnaligned(pDestination + currIdx, tempValue); - - tempValue = Unsafe.ReadUnaligned(pSource + currIdx + 2); - if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue)) - { - goto NonAsciiSkipTwoChars; - } - tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue); - Unsafe.WriteUnaligned(pDestination + currIdx + 2, tempValue); - currIdx += 4; - } while (currIdx <= lastIndexWhereCanReadFourChars); - - // At this point, there are fewer than 4 characters remaining to convert. - Debug.Assert((uint)charCount - currIdx < 4); - } - - // If there are 2 or 3 characters left to convert, we'll convert 2 of them now. - if ((charCount & 2) != 0) - { - uint tempValue = Unsafe.ReadUnaligned(pSource + currIdx); - if (!Utf16Utility.AllCharsInUInt32AreAscii(tempValue)) - { - goto NonAscii; - } - tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue); - Unsafe.WriteUnaligned(pDestination + currIdx, tempValue); - currIdx += 2; - } - - // If there's a single character left to convert, do it now. - if ((charCount & 1) != 0) - { - uint tempValue = pSource[currIdx]; - if (tempValue > 0x7Fu) - { - goto NonAscii; - } - tempValue = (toUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(tempValue) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(tempValue); - pDestination[currIdx] = (char)tempValue; - } - - // And we're finished! - - goto Return; - - // If we reached this point, we found non-ASCII data. - // Fall back down the p/invoke code path. - - NonAsciiSkipTwoChars: - currIdx += 2; - - NonAscii: - Debug.Assert(currIdx < (uint)charCount, "We somehow read past the end of the buffer."); - charCount -= (int)currIdx; + Debug.Assert(operationStatus == OperationStatus.Done); + return; } - - // We encountered non-ASCII data and therefore can't perform invariant case conversion; or the requested culture - // has a case conversion that's different from the invariant culture, even for ASCII data (e.g., tr-TR converts - // 'i' (U+0069) to Latin Capital Letter I With Dot Above (U+0130)). - - ChangeCaseCore(pSource + currIdx, charCount, pDestination + currIdx, charCount, toUpper); } - Return: - return; + fixed (char* pSource = &MemoryMarshal.GetReference(source)) + fixed (char* pDestination = &MemoryMarshal.GetReference(destination)) + { + ChangeCaseCore(pSource + charsConsumed, source.Length - charsConsumed, pDestination + charsConsumed, destination.Length - charsConsumed, toUpper); + } } private unsafe string ChangeCaseCommon(string source) where TConversion : struct diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIEncoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIEncoding.cs index ebf07d971271d9..b875e589f05ec0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIEncoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIEncoding.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers; +using System.Buffers.Text; using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -188,7 +189,7 @@ private protected sealed override unsafe int GetByteCountFast(char* pChars, int if (!(fallback is EncoderReplacementFallback replacementFallback && replacementFallback.MaxCharCount == 1 - && replacementFallback.DefaultString[0] <= 0x7F)) + && Ascii.IsAscii(replacementFallback.DefaultString[0]))) { // Unrecognized fallback mechanism - count chars manually. @@ -367,7 +368,7 @@ private protected sealed override unsafe int GetBytesWithFallback(ReadOnlySpan byte if (!bytes.IsEmpty) { byte b = bytes[0]; - if (b <= 0x7F) + if (Ascii.IsAscii(b)) { // ASCII byte diff --git a/src/libraries/System.Private.Uri/src/System/UriHelper.cs b/src/libraries/System.Private.Uri/src/System/UriHelper.cs index 4a07e754a4d070..4d57da7ce27a6c 100644 --- a/src/libraries/System.Private.Uri/src/System/UriHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/UriHelper.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Text; using System.Diagnostics; using System.Runtime.InteropServices; @@ -137,7 +138,7 @@ internal static string EscapeString( Debug.Assert(!noEscape['%'], "Need to treat % specially; it should be part of any escaped set"); int i = 0; char c; - for (; i < stringToEscape.Length && (c = stringToEscape[i]) <= 0x7F && noEscape[c]; i++) ; + for (; i < stringToEscape.Length && Ascii.IsAscii(c = stringToEscape[i]) && noEscape[c]; i++) ; if (i == stringToEscape.Length) { return stringToEscape; @@ -176,7 +177,7 @@ internal static unsafe void EscapeString(ReadOnlySpan stringToEscape, ref Debug.Assert(!noEscape['%'], "Need to treat % specially in case checkExistingEscaped is true"); int i = 0; char c; - for (; i < stringToEscape.Length && (c = stringToEscape[i]) <= 0x7F && noEscape[c]; i++) ; + for (; i < stringToEscape.Length && Ascii.IsAscii(c = stringToEscape[i]) && noEscape[c]; i++) ; if (i == stringToEscape.Length) { dest.Append(stringToEscape); diff --git a/src/libraries/System.Reflection.Metadata/src/System/Reflection/Internal/Utilities/MemoryBlock.cs b/src/libraries/System.Reflection.Metadata/src/System/Reflection/Internal/Utilities/MemoryBlock.cs index c205f5875f9c29..28a652eb129a4f 100644 --- a/src/libraries/System.Reflection.Metadata/src/System/Reflection/Internal/Utilities/MemoryBlock.cs +++ b/src/libraries/System.Reflection.Metadata/src/System/Reflection/Internal/Utilities/MemoryBlock.cs @@ -474,6 +474,9 @@ internal bool Utf8NullTerminatedStringStartsWithAsciiPrefix(int offset, string a CheckBounds(offset, 0); +#if NET7_0_OR_GREATER + return Buffers.Text.Ascii.StartsWith(new ReadOnlySpan(Pointer + offset, Length - offset), asciiPrefix); +#else // Make sure that we won't read beyond the block even if the block doesn't end with 0 byte. if (asciiPrefix.Length > Length - offset) { @@ -495,6 +498,7 @@ internal bool Utf8NullTerminatedStringStartsWithAsciiPrefix(int offset, string a } return true; +#endif } internal int CompareUtf8NullTerminatedStringWithAsciiString(int offset, string asciiString) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 393ca384c1fe64..3437a0b958607b 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -977,8 +977,9 @@ public static bool ParticipatesInCaseConversion(ReadOnlySpan s) } /// Gets whether the specified span contains only ASCII. - public static bool IsAscii(ReadOnlySpan s) // TODO https://github.com/dotnet/runtime/issues/28230: Replace once Ascii is available + public static bool IsAscii(ReadOnlySpan s) { +#if NETSTANDARD foreach (char c in s) { if (c >= 128) @@ -988,6 +989,9 @@ public static bool IsAscii(ReadOnlySpan s) // TODO https://github.com/dotn } return true; +#else + return Buffers.Text.Ascii.IsAscii(s); +#endif } /// Gets whether the specified character is an ASCII letter. From ab12f70dc589183cb3c0527c13382e193efdc5e2 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Thu, 8 Sep 2022 14:47:38 +0200 Subject: [PATCH 37/46] Apply suggestions from code review Co-authored-by: Stephen Toub --- src/libraries/Common/src/System/CharArrayHelpers.cs | 1 - .../System.Net.HttpListener/src/System/Net/HttpListener.cs | 2 +- .../System.Net.Mail/src/System/Net/Mime/MimeBasePart.cs | 2 +- .../src/System/Buffers/Text/Ascii.Searching.cs | 1 - 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/libraries/Common/src/System/CharArrayHelpers.cs b/src/libraries/Common/src/System/CharArrayHelpers.cs index 24f1d388cb5fb3..e01d281e1aa9b8 100644 --- a/src/libraries/Common/src/System/CharArrayHelpers.cs +++ b/src/libraries/Common/src/System/CharArrayHelpers.cs @@ -8,7 +8,6 @@ namespace System { internal static class CharArrayHelpers { - #if NET7_0_OR_GREATER [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static bool EqualsOrdinalAsciiIgnoreCase(string left, char[] right, int rightStartIndex, int rightLength) diff --git a/src/libraries/System.Net.HttpListener/src/System/Net/HttpListener.cs b/src/libraries/System.Net.HttpListener/src/System/Net/HttpListener.cs index bd927b91270d90..d554d4dd64d9e8 100644 --- a/src/libraries/System.Net.HttpListener/src/System/Net/HttpListener.cs +++ b/src/libraries/System.Net.HttpListener/src/System/Net/HttpListener.cs @@ -201,7 +201,7 @@ static string CreateRegisteredPrefix(string uriPrefix, int j, int i) } int toLowerLength = destination.IndexOf(':'); - if (toLowerLength == -1) + if (toLowerLength < 0) { toLowerLength = destination.Length; } diff --git a/src/libraries/System.Net.Mail/src/System/Net/Mime/MimeBasePart.cs b/src/libraries/System.Net.Mail/src/System/Net/Mime/MimeBasePart.cs index ef893a91059d40..9ed1b48341155c 100644 --- a/src/libraries/System.Net.Mail/src/System/Net/Mime/MimeBasePart.cs +++ b/src/libraries/System.Net.Mail/src/System/Net/Mime/MimeBasePart.cs @@ -112,7 +112,7 @@ internal static bool IsAscii(string value, bool permitCROrLF) { ArgumentNullException.ThrowIfNull(value); - return Ascii.IsAscii(value) && (permitCROrLF || value.AsSpan().IndexOfAny('\r', '\n') == -1); + return Ascii.IsAscii(value) && (permitCROrLF || value.AsSpan().IndexOfAny('\r', '\n') < 0); } internal string? ContentID diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs index 92fdcf81094766..ee876bdd9f0bd3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs @@ -275,4 +275,3 @@ public static unsafe void Convert(ReadOnlySpan source, Span destinat } } } -#pragma warning restore SA1121 // Use built-in type alias From 29b38f6620462841a159babb373ae686634eea82 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Thu, 8 Sep 2022 14:49:44 +0200 Subject: [PATCH 38/46] address code review feedback --- .../Runtime/InteropServices/PInvokeMarshal.cs | 6 +- .../Common/src/System/CharArrayHelpers.cs | 16 ++--- .../aspnetcore/Http2/Hpack/HPackEncoder.cs | 12 ++-- .../aspnetcore/Http3/QPack/QPackEncoder.cs | 30 ++++++++-- .../Net/Http/Http2LoopbackConnection.cs | 6 +- .../src/System.Net.WebClient.csproj | 1 + .../src/System/Net/WebClient.cs | 8 +-- .../Buffers/Text/Ascii.CaseConversion.cs | 60 ++++++++++--------- .../System/Buffers/Text/Ascii.Comparison.cs | 32 +++++----- .../System/Buffers/Text/Ascii.Searching.cs | 7 ++- .../src/System/Buffers/Text/Ascii.Trimming.cs | 6 +- .../Text/RegularExpressions/RegexCharClass.cs | 2 +- 12 files changed, 103 insertions(+), 83 deletions(-) diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs index 17fd037a4b1d68..6692249ea5ade8 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs @@ -543,9 +543,9 @@ public static unsafe char AnsiCharToWideChar(byte nativeValue) /// private static unsafe bool CalculateStringLength(byte* pchBuffer, out int ansiBufferLen, out int unicodeBufferLen) { - ansiBufferLen = SpanHelpers.IndexOfNullByte(ref *pchBuffer); - - bool allAscii = Ascii.IsAscii(new ReadOnlySpan(pchBuffer, ansiBufferLen)); + ReadOnlySpan span = MemoryMarshal.CreateReadOnlySpanFromNullTerminated(pchBuffer); + ansiBufferLen = span.Length; + bool allAscii = Ascii.IsAscii(span); if (allAscii) { diff --git a/src/libraries/Common/src/System/CharArrayHelpers.cs b/src/libraries/Common/src/System/CharArrayHelpers.cs index e01d281e1aa9b8..73abc7f0984dc7 100644 --- a/src/libraries/Common/src/System/CharArrayHelpers.cs +++ b/src/libraries/Common/src/System/CharArrayHelpers.cs @@ -8,16 +8,14 @@ namespace System { internal static class CharArrayHelpers { -#if NET7_0_OR_GREATER - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool EqualsOrdinalAsciiIgnoreCase(string left, char[] right, int rightStartIndex, int rightLength) - => Buffers.Text.Ascii.EndsWithIgnoreCase(left, right.AsSpan(rightStartIndex, rightLength)); -#else // used by System.Net.Http.WinHttpHandler which targets older TFMs internal static bool EqualsOrdinalAsciiIgnoreCase(string left, char[] right, int rightStartIndex, int rightLength) { Debug.Assert(left != null, "Expected non-null string"); DebugAssertArrayInputs(right, rightStartIndex, rightLength); +#if NET7_0_OR_GREATER + return Buffers.Text.Ascii.EndsWithIgnoreCase(left, right.AsSpan(rightStartIndex, rightLength)); +#else // used by System.Net.Http.WinHttpHandler which targets older TFMs if (left.Length != rightLength) { return false; @@ -42,18 +40,13 @@ internal static bool EqualsOrdinalAsciiIgnoreCase(string left, char[] right, int } return true; - } #endif + } internal static void Trim(char[] array, ref int startIndex, ref int length) { DebugAssertArrayInputs(array, startIndex, length); -#if NET7_0_OR_GREATER - Range range = Buffers.Text.Ascii.Trim(new ReadOnlySpan(array, startIndex, length)); - startIndex += range.Start.Value; - length = range.End.Value - range.Start.Value; -#else int offset = 0; while (offset < length && char.IsWhiteSpace(array[startIndex + offset])) { @@ -68,7 +61,6 @@ internal static void Trim(char[] array, ref int startIndex, ref int length) startIndex += offset; length = end - offset + 1; -#endif } [Conditional("DEBUG")] diff --git a/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/HPackEncoder.cs b/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/HPackEncoder.cs index c2bfd14a38af16..780e7433c32afb 100644 --- a/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/HPackEncoder.cs +++ b/src/libraries/Common/src/System/Net/Http/aspnetcore/Http2/Hpack/HPackEncoder.cs @@ -2,8 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. #nullable enable -using System.Buffers; -using System.Buffers.Text; using System.Collections.Generic; using System.Diagnostics; using System.Text; @@ -402,9 +400,15 @@ private static void EncodeValueStringPart(string value, Span destination) { Debug.Assert(destination.Length >= value.Length); - if (Ascii.FromUtf16(value, destination, out _, out _) == OperationStatus.InvalidData) + for (int i = 0; i < value.Length; i++) { - throw new HttpRequestException(SR.net_http_request_invalid_char_encoding); + char c = value[i]; + if ((c & 0xFF80) != 0) + { + throw new HttpRequestException(SR.net_http_request_invalid_char_encoding); + } + + destination[i] = (byte)c; } } diff --git a/src/libraries/Common/src/System/Net/Http/aspnetcore/Http3/QPack/QPackEncoder.cs b/src/libraries/Common/src/System/Net/Http/aspnetcore/Http3/QPack/QPackEncoder.cs index 5cb195c46fcb5a..53ecff40a399f8 100644 --- a/src/libraries/Common/src/System/Net/Http/aspnetcore/Http3/QPack/QPackEncoder.cs +++ b/src/libraries/Common/src/System/Net/Http/aspnetcore/Http3/QPack/QPackEncoder.cs @@ -2,8 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. #nullable enable -using System.Buffers; -using System.Buffers.Text; +using System.Collections.Generic; using System.Diagnostics; using System.Net.Http.HPack; using System.Text; @@ -312,14 +311,23 @@ private static void EncodeValueStringPart(string s, Span buffer) { Debug.Assert(buffer.Length >= s.Length); - if (Ascii.FromUtf16(s, buffer, out _, out _) == OperationStatus.InvalidData) + for (int i = 0; i < s.Length; ++i) { - throw new HttpRequestException(SR.net_http_request_invalid_char_encoding); + char ch = s[i]; + + if (ch > 127) + { + throw new QPackEncodingException(SR.net_http_request_invalid_char_encoding); + } + + buffer[i] = (byte)ch; } } private static bool EncodeNameString(string s, Span buffer, out int length) { + const int toLowerMask = 0x20; + if (buffer.Length != 0) { buffer[0] = 0x30; @@ -330,8 +338,18 @@ private static bool EncodeNameString(string s, Span buffer, out int length if (buffer.Length >= s.Length) { - OperationStatus toLowerStatus = Ascii.ToLower(s, buffer, out _, out _); - Debug.Assert(toLowerStatus == OperationStatus.Done, "HttpHeaders prevents adding non-ASCII header names."); + for (int i = 0; i < s.Length; ++i) + { + int ch = s[i]; + Debug.Assert(ch <= 127, "HttpHeaders prevents adding non-ASCII header names."); + + if ((uint)(ch - 'A') <= 'Z' - 'A') + { + ch |= toLowerMask; + } + + buffer[i] = (byte)ch; + } length = nameLength + s.Length; return true; diff --git a/src/libraries/Common/tests/System/Net/Http/Http2LoopbackConnection.cs b/src/libraries/Common/tests/System/Net/Http/Http2LoopbackConnection.cs index 919333c987447f..ed5af827da852a 100644 --- a/src/libraries/Common/tests/System/Net/Http/Http2LoopbackConnection.cs +++ b/src/libraries/Common/tests/System/Net/Http/Http2LoopbackConnection.cs @@ -91,11 +91,7 @@ private async Task ReadPrefixAsync() throw new Exception("Connection stream closed while attempting to read connection preface."); } -#if NETFRAMEWORK - if (Text.Encoding.ASCII.GetString(_prefix).Contains("HTTP/1.1")) -#else - if (Buffers.Text.Ascii.IndexOf(_prefix, "HTTP/1.1") >= 0) -#endif + if (_prefix.AsSpan().IndexOf("HTTP/1.1"u8) >= 0) { // Tests that use HttpAgnosticLoopbackServer will attempt to send an HTTP/1.1 request to an HTTP/2 server. // This is invalid and we should terminate the connection. diff --git a/src/libraries/System.Net.WebClient/src/System.Net.WebClient.csproj b/src/libraries/System.Net.WebClient/src/System.Net.WebClient.csproj index 949fea2f4d4076..4d14e69747feb7 100644 --- a/src/libraries/System.Net.WebClient/src/System.Net.WebClient.csproj +++ b/src/libraries/System.Net.WebClient/src/System.Net.WebClient.csproj @@ -32,6 +32,7 @@ + diff --git a/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs b/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs index 0c7fe9e106dac9..7921fbf870e40b 100644 --- a/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs +++ b/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs @@ -7,7 +7,6 @@ using System.ComponentModel; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; -using System.Globalization; using System.IO; using System.Net.Cache; using System.Security; @@ -510,9 +509,10 @@ private void OpenFileInternal( "\r\n"; formHeaderBytes = Encoding.UTF8.GetBytes(formHeader); - string boundaryBytesText = "\r\n--" + boundary + "--\r\n"; - boundaryBytes = new byte[boundaryBytesText.Length]; - OperationStatus conversionStatus = Ascii.FromUtf16(boundaryBytesText, boundaryBytes, out _, out _); + boundaryBytes = new byte["\r\n--".Length + boundary.Length + "--\r\n".Length]; + "\r\n--"u8.CopyTo(boundaryBytes); + "--\r\n"u8.CopyTo(boundaryBytes.AsSpan("\r\n--".Length + boundary.Length)); + OperationStatus conversionStatus = Ascii.FromUtf16(boundary, boundaryBytes.AsSpan("\r\n--".Length), out _, out _); Debug.Assert(conversionStatus == OperationStatus.Done); } else diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs index a70544afa4e7fe..8b15d9bc5ee5ef 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs @@ -67,7 +67,7 @@ private static unsafe OperationStatus ChangeCase(ReadOnlySp where TTo : unmanaged, IBinaryInteger where TCasing : struct { - if ((typeof(TFrom) == typeof(TTo) || (Unsafe.SizeOf() * source.Length % Unsafe.SizeOf() == 0)) && source.Overlaps(MemoryMarshal.Cast(destination))) + if (MemoryMarshal.AsBytes(source).Overlaps(MemoryMarshal.AsBytes(destination))) { throw new InvalidOperationException(SR.InvalidOperation_SpanOverlappedOperation); } @@ -121,13 +121,13 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD Debug.Assert(typeof(TTo) == typeof(byte) || typeof(TTo) == typeof(ushort)); Debug.Assert(typeof(TCasing) == typeof(ToUpperConversion) || typeof(TCasing) == typeof(ToLowerConversion)); - bool SourceIsAscii = (sizeof(TFrom) == 1); // JIT turns this into a const - bool DestIsAscii = (sizeof(TTo) == 1); // JIT turns this into a const - bool ConversionIsWidening = SourceIsAscii && !DestIsAscii; // JIT turns this into a const - bool ConversionIsNarrowing = !SourceIsAscii && DestIsAscii; // JIT turns this into a const - bool ConversionIsWidthPreserving = typeof(TFrom) == typeof(TTo); // JIT turns this into a const - bool ConversionIsToUpper = (typeof(TCasing) == typeof(ToUpperConversion)); // JIT turns this into a const - uint NumInputElementsToConsumeEachVectorizedLoopIteration = (uint)(sizeof(Vector128) / sizeof(TFrom)); // JIT turns this into a const + bool sourceIsAscii = (sizeof(TFrom) == 1); // JIT turns this into a const + bool destIsAscii = (sizeof(TTo) == 1); // JIT turns this into a const + bool conversionIsWidening = sourceIsAscii && !destIsAscii; // JIT turns this into a const + bool conversionIsNarrowing = !sourceIsAscii && destIsAscii; // JIT turns this into a const + bool conversionIsWidthPreserving = typeof(TFrom) == typeof(TTo); // JIT turns this into a const + bool conversionIsToUpper = (typeof(TCasing) == typeof(ToUpperConversion)); // JIT turns this into a const + uint numInputElementsToConsumeEachVectorizedLoopIteration = (uint)(sizeof(Vector128) / sizeof(TFrom)); // JIT turns this into a const nuint i = 0; @@ -135,14 +135,14 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD // widening or narrowing. In this case, fall back to a naive element-by-element // loop. - if (!ConversionIsWidthPreserving && !Vector128.IsHardwareAccelerated) + if (!conversionIsWidthPreserving && !Vector128.IsHardwareAccelerated) { goto DrainRemaining; } // Process the input as a series of 128-bit blocks. - if (Vector128.IsHardwareAccelerated && elementCount >= NumInputElementsToConsumeEachVectorizedLoopIteration) + if (Vector128.IsHardwareAccelerated && elementCount >= numInputElementsToConsumeEachVectorizedLoopIteration) { // Unaligned read and check for non-ASCII data. @@ -157,7 +157,7 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD // (value - CONST) <= (Z - A), but using signed instead of unsigned arithmetic. TFrom SourceSignedMinValue = TFrom.CreateTruncating(1 << (8 * sizeof(TFrom) - 1)); - Vector128 subtractionVector = Vector128.Create(ConversionIsToUpper ? (SourceSignedMinValue + TFrom.CreateTruncating('a')) : (SourceSignedMinValue + TFrom.CreateTruncating('A'))); + Vector128 subtractionVector = Vector128.Create(conversionIsToUpper ? (SourceSignedMinValue + TFrom.CreateTruncating('a')) : (SourceSignedMinValue + TFrom.CreateTruncating('A'))); Vector128 comparisionVector = Vector128.Create(SourceSignedMinValue + TFrom.CreateTruncating(26 /* A..Z or a..z */)); Vector128 caseConversionVector = Vector128.Create(TFrom.CreateTruncating(0x20)); // works both directions @@ -172,8 +172,8 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD // many elements we should skip in order to have future writes be // aligned. - uint expectedWriteAlignment = NumInputElementsToConsumeEachVectorizedLoopIteration * (uint)sizeof(TTo); // JIT turns this into a const - i = NumInputElementsToConsumeEachVectorizedLoopIteration - ((uint)pDest % expectedWriteAlignment) / (uint)sizeof(TTo); + uint expectedWriteAlignment = numInputElementsToConsumeEachVectorizedLoopIteration * (uint)sizeof(TTo); // JIT turns this into a const + i = numInputElementsToConsumeEachVectorizedLoopIteration - ((uint)pDest % expectedWriteAlignment) / (uint)sizeof(TTo); Debug.Assert((nuint)(&pDest[i]) % expectedWriteAlignment == 0, "Destination buffer wasn't properly aligned!"); // Future iterations of this loop will be aligned, @@ -183,7 +183,7 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD { Debug.Assert(i <= elementCount, "We overran a buffer somewhere."); - if ((elementCount - i) < NumInputElementsToConsumeEachVectorizedLoopIteration) + if ((elementCount - i) < numInputElementsToConsumeEachVectorizedLoopIteration) { // If we're about to enter the final iteration of the loop, back up so that // we can read one unaligned block. If we've already consumed all the data, @@ -194,7 +194,7 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD goto Return; } - i = elementCount - NumInputElementsToConsumeEachVectorizedLoopIteration; + i = elementCount - numInputElementsToConsumeEachVectorizedLoopIteration; } // Unaligned read & check for non-ASCII data. @@ -214,7 +214,7 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD // We expect this write to be aligned except for the last run through the loop. ChangeWidthAndWriteTo(srcVector, pDest, i); - i += NumInputElementsToConsumeEachVectorizedLoopIteration; + i += numInputElementsToConsumeEachVectorizedLoopIteration; } } @@ -225,13 +225,13 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD if (IntPtr.Size >= 8 && (elementCount - i) >= (nuint)(8 / sizeof(TFrom))) { ulong nextBlockAsUInt64 = Unsafe.ReadUnaligned(&pSrc[i]); - if (SourceIsAscii) + if (sourceIsAscii) { if (!Utf8Utility.AllBytesInUInt64AreAscii(nextBlockAsUInt64)) { goto Drain32; } - nextBlockAsUInt64 = (ConversionIsToUpper) + nextBlockAsUInt64 = (conversionIsToUpper) ? Utf8Utility.ConvertAllAsciiBytesInUInt64ToUppercase(nextBlockAsUInt64) : Utf8Utility.ConvertAllAsciiBytesInUInt64ToLowercase(nextBlockAsUInt64); } @@ -241,12 +241,12 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD { goto Drain32; } - nextBlockAsUInt64 = (ConversionIsToUpper) + nextBlockAsUInt64 = (conversionIsToUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt64ToUppercase(nextBlockAsUInt64) : Utf16Utility.ConvertAllAsciiCharsInUInt64ToLowercase(nextBlockAsUInt64); } - if (ConversionIsWidthPreserving) + if (conversionIsWidthPreserving) { Unsafe.WriteUnaligned(&pDest[i], nextBlockAsUInt64); } @@ -255,7 +255,7 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD Debug.Assert(Vector128.IsHardwareAccelerated); Vector128 blockAsVectorOfUInt64 = Vector128.CreateScalarUnsafe(nextBlockAsUInt64); - if (ConversionIsWidening) + if (conversionIsWidening) { Vector128.StoreUnsafe(Vector128.WidenLower(blockAsVectorOfUInt64.AsByte()), ref *(ushort*)pDest, i); } @@ -284,13 +284,13 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD if ((elementCount - i) >= (nuint)(4 / sizeof(TFrom))) { uint nextBlockAsUInt32 = Unsafe.ReadUnaligned(&pSrc[i]); - if (SourceIsAscii) + if (sourceIsAscii) { if (!Utf8Utility.AllBytesInUInt32AreAscii(nextBlockAsUInt32)) { goto DrainRemaining; } - nextBlockAsUInt32 = (ConversionIsToUpper) + nextBlockAsUInt32 = (conversionIsToUpper) ? Utf8Utility.ConvertAllAsciiBytesInUInt32ToUppercase(nextBlockAsUInt32) : Utf8Utility.ConvertAllAsciiBytesInUInt32ToLowercase(nextBlockAsUInt32); } @@ -300,12 +300,12 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD { goto DrainRemaining; } - nextBlockAsUInt32 = (ConversionIsToUpper) + nextBlockAsUInt32 = (conversionIsToUpper) ? Utf16Utility.ConvertAllAsciiCharsInUInt32ToUppercase(nextBlockAsUInt32) : Utf16Utility.ConvertAllAsciiCharsInUInt32ToLowercase(nextBlockAsUInt32); } - if (ConversionIsWidthPreserving) + if (conversionIsWidthPreserving) { Unsafe.WriteUnaligned(&pDest[i], nextBlockAsUInt32); } @@ -314,7 +314,7 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD Debug.Assert(Vector128.IsHardwareAccelerated); Vector128 blockAsVectorOfUInt32 = Vector128.CreateScalarUnsafe(nextBlockAsUInt32); - if (ConversionIsWidening) + if (conversionIsWidening) { Vector128 widenedBlock = Vector128.WidenLower(blockAsVectorOfUInt32.AsByte()).AsUInt64(); Unsafe.WriteUnaligned(&pDest[i], widenedBlock.ToScalar()); @@ -345,8 +345,12 @@ private static unsafe nuint ChangeCase(TFrom* pSrc, TTo* pD for (; i < elementCount; i++) { uint element = uint.CreateTruncating(pSrc[i]); - if (!UnicodeUtility.IsAsciiCodePoint(element)) { break; } - if (ConversionIsToUpper) + if (!UnicodeUtility.IsAsciiCodePoint(element)) + { + break; + } + + if (conversionIsToUpper) { if (UnicodeUtility.IsInRangeInclusive(element, 'a', 'z')) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs index 274861f601a2cf..f0a04207b6348e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs @@ -13,10 +13,15 @@ using System.Numerics; #pragma warning disable SA1121 // Use built-in type alias -using SkipChecks = System.Boolean; +// used to express: from the two provided char and byte buffers, check byte buffer for non-ASCII bytes +// as it's the value ("needle") that must not contain non-ASCII characters. Used by StartsWith and EndstWith. using CheckBytes = System.Byte; +// same as above, but for chars using CheckChars = System.Char; -using CheckNonAscii = System.Byte; +// don't check for non-ASCII (used by Equals which does not throw for non-ASCII bytes) +using SkipChecks = System.Boolean; +// used to express: check value for non-ASCII bytes/chars +using CheckValue = System.SByte; namespace System.Buffers.Text { @@ -123,30 +128,30 @@ public static unsafe bool EndsWith(ReadOnlySpan text, ReadOnlySpan v => value.IsEmpty || (text.Length >= value.Length && Map(Equals(text.Slice(text.Length - value.Length), value))); public static bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); public static bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); // TODO adsitnik: discuss whether this overload should exists, as the only difference with ROS.StartsWith(ROS, StringComparison.OrdinalIgnoreCase) // is throwing an exception for non-ASCII characters found in value public static bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); public static bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); public static unsafe bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); public static unsafe bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); public static unsafe bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); public static unsafe bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); + => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool Map(EqualsResult equalsResult) @@ -235,7 +240,7 @@ private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpa } while (!Unsafe.IsAddressGreaterThan(ref currentCharsSearchSpace, ref oneVectorAwayFromCharsEnd)); - // If any elements remain, process the first vector in the search space. + // If any elements remain, process the last vector in the search space. if ((uint)chars.Length % Vector256.Count != 0) { charValues = Vector256.LoadUnsafe(ref oneVectorAwayFromCharsEnd); @@ -305,7 +310,7 @@ private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpa } while (!Unsafe.IsAddressGreaterThan(ref currentCharsSearchSpace, ref oneVectorAwayFromCharsEnd)); - // If any elements remain, process the first vector in the search space. + // If any elements remain, process the last vector in the search space. if ((uint)chars.Length % Vector128.Count != 0) { charValues = Vector128.LoadUnsafe(ref oneVectorAwayFromCharsEnd); @@ -349,7 +354,7 @@ private static EqualsResult SequenceEqualIgnoreCase(ReadO uint valueA = uint.CreateTruncating(text[i]); uint valueB = uint.CreateTruncating(value[i]); - if (typeof(TCheck) != typeof(SkipChecks)) + if (typeof(TCheck) == typeof(CheckValue)) { if (!UnicodeUtility.IsAsciiCodePoint(valueB)) { @@ -409,4 +414,3 @@ private enum EqualsResult } } } -#pragma warning restore SA1121 // Use built-in type alias diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs index ee876bdd9f0bd3..40f77377ba0e29 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs @@ -7,7 +7,8 @@ using System.Text; #pragma warning disable SA1121 // Use built-in type alias -using CheckNonAscii = System.Byte; +// used to express: check value for non-ASCII bytes/chars +using CheckValue = System.SByte; namespace System.Buffers.Text { @@ -165,7 +166,7 @@ private static int IndexOfIgnoreCase(ReadOnlySpan text, Re break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. // Found the first element of "value". See if the tail matches. - if (Map(SequenceEqualIgnoreCase(text.Slice(offset + 1, value.Length - 1), value.Slice(1)))) // Map throws if non-ASCII char is found in value + if (Map(SequenceEqualIgnoreCase(text.Slice(offset + 1, value.Length - 1), value.Slice(1)))) // Map throws if non-ASCII char is found in value return offset; // The tail matched. Return a successful find. remainingSearchSpaceLength--; @@ -217,7 +218,7 @@ private static int LastIndexOfIgnoreCase(ReadOnlySpan text break; // Found the first element of "value". See if the tail matches. - if (Map(SequenceEqualIgnoreCase(text.Slice(relativeIndex + 1, value.Length - 1), value.Slice(1)))) + if (Map(SequenceEqualIgnoreCase(text.Slice(relativeIndex + 1, value.Length - 1), value.Slice(1)))) return relativeIndex; // The tail matched. Return a successful find. offset += remainingSearchSpaceLength - relativeIndex; diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Trimming.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Trimming.cs index 3e0b4988066770..0874637687b551 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Trimming.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Trimming.cs @@ -18,7 +18,7 @@ public static partial class Ascii private static Range TrimHelper(ReadOnlySpan value, TrimType trimType) where T : unmanaged, IBinaryInteger { - const uint trimMask = + const uint TrimMask = (1u << (0x09 - 1)) | (1u << (0x0A - 1)) | (1u << (0x0B - 1)) @@ -32,7 +32,7 @@ private static Range TrimHelper(ReadOnlySpan value, TrimType trimType) for (; start < value.Length; start++) { uint elementValue = uint.CreateTruncating(value[start]); - if ((elementValue > 0x20) || ((trimMask & (1u << ((int)elementValue - 1))) == 0)) + if ((elementValue > 0x20) || ((TrimMask & (1u << ((int)elementValue - 1))) == 0)) { break; } @@ -45,7 +45,7 @@ private static Range TrimHelper(ReadOnlySpan value, TrimType trimType) for (; start <= end; end--) { uint elementValue = uint.CreateTruncating(value[end]); - if ((elementValue > 0x20) || ((trimMask & (1u << ((int)elementValue - 1))) == 0)) + if ((elementValue > 0x20) || ((TrimMask & (1u << ((int)elementValue - 1))) == 0)) { break; } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 3437a0b958607b..d31915af61f99c 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -979,7 +979,7 @@ public static bool ParticipatesInCaseConversion(ReadOnlySpan s) /// Gets whether the specified span contains only ASCII. public static bool IsAscii(ReadOnlySpan s) { -#if NETSTANDARD +#if REGEXGENERATOR foreach (char c in s) { if (c >= 128) From 1edc812f7f9f655d4ac35bd58484f20dfc16c877 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Fri, 9 Sep 2022 10:21:55 +0200 Subject: [PATCH 39/46] fold ASCIIUtility into Ascii, use public APIs --- .../System.Private.CoreLib.Shared.projitems | 4 +- .../Buffers/Text/Ascii.CaseConversion.cs | 5 +- .../System/Buffers/Text/Ascii.Comparison.cs | 4 +- .../System/Buffers/Text/Ascii.Searching.cs | 16 ++--- .../System/Buffers/Text/Ascii.Transcoding.cs | 4 +- .../Text/Ascii.Utility.Helpers.cs} | 5 +- .../Text/Ascii.Utility.cs} | 18 +++--- .../src/System/Buffers/Text/Ascii.cs | 33 +++++----- .../src/System/Convert.cs | 4 +- .../src/System/String.cs | 13 +--- .../src/System/Text/ASCIIEncoding.cs | 54 +++++++---------- .../Text/Unicode/Utf16Utility.Validation.cs | 4 +- .../Text/Unicode/Utf8Utility.Transcoding.cs | 60 +++++++++---------- .../Text/Unicode/Utf8Utility.Validation.cs | 39 ++++++------ .../src/System/DomainNameHelper.cs | 11 +--- 15 files changed, 123 insertions(+), 151 deletions(-) rename src/libraries/System.Private.CoreLib/src/System/{Text/ASCIIUtility.Helpers.cs => Buffers/Text/Ascii.Utility.Helpers.cs} (96%) rename src/libraries/System.Private.CoreLib/src/System/{Text/ASCIIUtility.cs => Buffers/Text/Ascii.Utility.cs} (99%) diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index a4dd02dc83bea4..0c0e9839b4607f 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -121,6 +121,8 @@ + + @@ -1022,8 +1024,6 @@ - - diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs index 8b15d9bc5ee5ef..f1e548b5be8730 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs @@ -382,7 +382,10 @@ private static unsafe bool VectorContainsAnyNonAsciiData(Vector128 vector) } else if (sizeof(T) == 2) { - if (ASCIIUtility.VectorContainsNonAsciiChar(vector.AsUInt16())) { return true; } + if (VectorContainsNonAsciiChar(vector.AsUInt16())) + { + return true; + } } else { diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs index f0a04207b6348e..f48194d1ed1381 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs @@ -286,7 +286,7 @@ private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpa if (typeof(TCheck) == typeof(CheckChars)) { - if (ASCIIUtility.VectorContainsNonAsciiChar(charValues)) + if (VectorContainsNonAsciiChar(charValues)) { return EqualsResult.NonAsciiFound; } @@ -318,7 +318,7 @@ private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpa if (typeof(TCheck) == typeof(CheckChars)) { - if (ASCIIUtility.VectorContainsNonAsciiChar(charValues)) + if (VectorContainsNonAsciiChar(charValues)) { return EqualsResult.NonAsciiFound; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs index 40f77377ba0e29..f4289b77ab5703 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs @@ -17,16 +17,16 @@ public static partial class Ascii private const int StackallocBytesLimit = 512; public static int IndexOf(ReadOnlySpan text, ReadOnlySpan value) - => IndexOf(text, value); + => IndexOf(text, value); public static int LastIndexOf(ReadOnlySpan text, ReadOnlySpan value) - => LastIndexOf(text, value); + => LastIndexOf(text, value); public static int IndexOf(ReadOnlySpan text, ReadOnlySpan value) - => IndexOf(text, value); + => IndexOf(text, value); public static int LastIndexOf(ReadOnlySpan text, ReadOnlySpan value) - => LastIndexOf(text, value); + => LastIndexOf(text, value); public static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) => IndexOfIgnoreCase(text, value); @@ -237,7 +237,7 @@ private interface IConverter static abstract void Convert(ReadOnlySpan source, Span destination); } - private readonly struct NarrowUtf16ToAscii : IConverter + private readonly struct NarrowConverter : IConverter { public static unsafe void Convert(ReadOnlySpan source, Span destination) { @@ -246,7 +246,7 @@ public static unsafe void Convert(ReadOnlySpan source, Span destinat fixed (char* pValue = &MemoryMarshal.GetReference(source)) fixed (byte* pNarrowed = &MemoryMarshal.GetReference(destination)) { - asciiCharCount = ASCIIUtility.NarrowUtf16ToAscii(pValue, pNarrowed, (nuint)source.Length); + asciiCharCount = NarrowUtf16ToAscii(pValue, pNarrowed, (nuint)source.Length); } if (asciiCharCount != (nuint)source.Length) @@ -256,7 +256,7 @@ public static unsafe void Convert(ReadOnlySpan source, Span destinat } } - private readonly struct WidenAsciiToUtf16 : IConverter + private readonly struct WidenConverter : IConverter { public static unsafe void Convert(ReadOnlySpan source, Span destination) { @@ -265,7 +265,7 @@ public static unsafe void Convert(ReadOnlySpan source, Span destinat fixed (byte* pValue = &MemoryMarshal.GetReference(source)) fixed (char* pWidened = &MemoryMarshal.GetReference(destination)) { - asciiCharCount = ASCIIUtility.WidenAsciiToUtf16(pValue, pWidened, (nuint)source.Length); + asciiCharCount = WidenAsciiToUtf16(pValue, pWidened, (nuint)source.Length); } if (asciiCharCount != (nuint)source.Length) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Transcoding.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Transcoding.cs index b356b4903b2d9a..42537ed372c070 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Transcoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Transcoding.cs @@ -37,7 +37,7 @@ public static unsafe OperationStatus ToUtf16(ReadOnlySpan source, Span source, Span /// A mask which selects only the high bit of each byte of the given . diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Utility.cs similarity index 99% rename from src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs rename to src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Utility.cs index cfce1de6fde362..52702cd4356cf2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Utility.cs @@ -8,15 +8,15 @@ using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; -namespace System.Text +namespace System.Buffers.Text { - internal static partial class ASCIIUtility + public static partial class Ascii { /// /// Returns iff all bytes in are ASCII. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool AllBytesInUInt64AreAscii(ulong value) + private static bool AllBytesInUInt64AreAscii(ulong value) { // If the high bit of any byte is set, that byte is non-ASCII. @@ -81,7 +81,7 @@ private static bool FirstCharInUInt32IsAscii(uint value) /// /// An ASCII byte is defined as 0x00 - 0x7F, inclusive. [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength) + private static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength) { // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized // code below. This has two benefits: (a) we can take advantage of specific instructions like @@ -617,7 +617,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Intrinsified(byte* pBuff /// /// An ASCII char is defined as 0x0000 - 0x007F, inclusive. [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe nuint GetIndexOfFirstNonAsciiChar(char* pBuffer, nuint bufferLength /* in chars */) + internal static unsafe nuint GetIndexOfFirstNonAsciiChar(char* pBuffer, nuint bufferLength /* in chars */) { // If SSE2/ASIMD is supported, use those specific intrinsics instead of the generic vectorized // code below. This has two benefits: (a) we can take advantage of specific instructions like @@ -1152,7 +1152,7 @@ private static void NarrowTwoUtf16CharsToAsciiAndWriteToBuffer(ref byte outputBu /// or once elements have been converted. Returns the total number /// of elements that were able to be converted. /// - public static unsafe nuint NarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount) + private static unsafe nuint NarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount) { nuint currentOffset = 0; @@ -1374,7 +1374,7 @@ public static unsafe nuint NarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAsciiBu } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool VectorContainsNonAsciiChar(Vector128 asciiVector) + private static bool VectorContainsNonAsciiChar(Vector128 asciiVector) { // max ASCII character is 0b_0111_1111, so the most significant bit (0x80) tells whether it contains non ascii @@ -1395,7 +1395,7 @@ internal static bool VectorContainsNonAsciiChar(Vector128 asciiVector) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool VectorContainsNonAsciiChar(Vector128 utf16Vector) + private static bool VectorContainsNonAsciiChar(Vector128 utf16Vector) { // prefer architecture specific intrinsic as they offer better perf if (Sse2.IsSupported) @@ -1576,7 +1576,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer, /// or once elements have been converted. Returns the total number /// of elements that were able to be converted. /// - public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount) + private static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount) { // Intrinsified in mono interpreter nuint currentOffset = 0; diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs index 98e7d602beb13d..6aff402e3a3d91 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.cs @@ -3,7 +3,6 @@ using System.Diagnostics; using System.Runtime.InteropServices; -using System.Text; namespace System.Buffers.Text { @@ -17,10 +16,15 @@ public static partial class Ascii /// byte appears, or -1 if the buffer contains only ASCII bytes. public static unsafe int GetIndexOfFirstNonAsciiByte(ReadOnlySpan buffer) { + if (buffer.IsEmpty) + { + return -1; + } + nuint bufferLength = (uint)buffer.Length; fixed (byte* pBuffer = &MemoryMarshal.GetReference(buffer)) { - nuint idxOfFirstNonAsciiElement = ASCIIUtility.GetIndexOfFirstNonAsciiByte(pBuffer, bufferLength); + nuint idxOfFirstNonAsciiElement = GetIndexOfFirstNonAsciiByte(pBuffer, bufferLength); Debug.Assert(idxOfFirstNonAsciiElement <= bufferLength); return (idxOfFirstNonAsciiElement == bufferLength) ? -1 : (int)idxOfFirstNonAsciiElement; } @@ -34,10 +38,15 @@ public static unsafe int GetIndexOfFirstNonAsciiByte(ReadOnlySpan buffer) /// char appears, or -1 if the buffer contains only ASCII char. public static unsafe int GetIndexOfFirstNonAsciiChar(ReadOnlySpan buffer) { + if (buffer.IsEmpty) + { + return -1; + } + nuint bufferLength = (uint)buffer.Length; fixed (char* pBuffer = &MemoryMarshal.GetReference(buffer)) { - nuint idxOfFirstNonAsciiElement = ASCIIUtility.GetIndexOfFirstNonAsciiChar(pBuffer, bufferLength); + nuint idxOfFirstNonAsciiElement = GetIndexOfFirstNonAsciiChar(pBuffer, bufferLength); Debug.Assert(idxOfFirstNonAsciiElement <= bufferLength); return (idxOfFirstNonAsciiElement == bufferLength) ? -1 : (int)idxOfFirstNonAsciiElement; } @@ -49,14 +58,7 @@ public static unsafe int GetIndexOfFirstNonAsciiChar(ReadOnlySpan buffer) /// The value to inspect. /// True if contains only ASCII bytes or is /// empty; False otherwise. - public static unsafe bool IsAscii(ReadOnlySpan value) - { - nuint valueLength = (uint)value.Length; - fixed (byte* pValue = &MemoryMarshal.GetReference(value)) - { - return ASCIIUtility.GetIndexOfFirstNonAsciiByte(pValue, valueLength) == valueLength; - } - } + public static unsafe bool IsAscii(ReadOnlySpan value) => value.IsEmpty || GetIndexOfFirstNonAsciiByte(value) < 0; /// /// Determines whether the provided value contains only ASCII chars. @@ -64,14 +66,7 @@ public static unsafe bool IsAscii(ReadOnlySpan value) /// The value to inspect. /// True if contains only ASCII chars or is /// empty; False otherwise. - public static unsafe bool IsAscii(ReadOnlySpan value) - { - nuint valueLength = (uint)value.Length; - fixed (char* pValue = &MemoryMarshal.GetReference(value)) - { - return ASCIIUtility.GetIndexOfFirstNonAsciiChar(pValue, valueLength) == valueLength; - } - } + public static unsafe bool IsAscii(ReadOnlySpan value) => value.IsEmpty || GetIndexOfFirstNonAsciiChar(value) < 0; /// /// Determines whether the provided value is ASCII byte. diff --git a/src/libraries/System.Private.CoreLib/src/System/Convert.cs b/src/libraries/System.Private.CoreLib/src/System/Convert.cs index ba85c62448b56d..14ec78f0f1e0b6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Convert.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Convert.cs @@ -2471,7 +2471,7 @@ private static unsafe void ToBase64CharsLargeNoLineBreaks(ReadOnlySpan byt OperationStatus status = Base64.EncodeToUtf8(bytes, MemoryMarshal.AsBytes(chars), out _, out int bytesWritten); Debug.Assert(status == OperationStatus.Done && charLengthRequired == bytesWritten); - // Now widen the ASCII bytes in-place to chars (if the vectorized ASCIIUtility.WidenAsciiToUtf16 is ever updated + // Now widen the ASCII bytes in-place to chars (if the vectorized Ascii.WidenAsciiToUtf16 is ever updated // to support in-place updates, it should be used here instead). Since the base64 bytes are all valid ASCII, the byte // data is guaranteed to be 1/2 as long as the char data, and we can widen in-place. ref ushort dest = ref Unsafe.As(ref MemoryMarshal.GetReference(chars)); @@ -2522,7 +2522,7 @@ private static unsafe void ToBase64CharsLargeNoLineBreaks(ReadOnlySpan byt { dest = ref Unsafe.Subtract(ref dest, 4); src = ref Unsafe.Subtract(ref src, 4); - ASCIIUtility.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref Unsafe.As(ref dest), Unsafe.ReadUnaligned(ref src)); + Ascii.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref Unsafe.As(ref dest), Unsafe.ReadUnaligned(ref src)); } // The length produced by Base64 encoding is always a multiple of 4, so we don't need to handle diff --git a/src/libraries/System.Private.CoreLib/src/System/String.cs b/src/libraries/System.Private.CoreLib/src/System/String.cs index 39d9153f1d1725..75319d3adcc61a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers; +using System.Buffers.Text; using System.Collections; using System.Collections.Generic; using System.ComponentModel; @@ -691,7 +692,7 @@ public bool IsNormalized() public bool IsNormalized(NormalizationForm normalizationForm) { - if (this.IsAscii()) + if (Ascii.IsAscii(this)) { // If its ASCII && one of the 4 main forms, then its already normalized if (normalizationForm == NormalizationForm.FormC || @@ -710,7 +711,7 @@ public string Normalize() public string Normalize(NormalizationForm normalizationForm) { - if (this.IsAscii()) + if (Ascii.IsAscii(this)) { // If its ASCII && one of the 4 main forms, then its already normalized if (normalizationForm == NormalizationForm.FormC || @@ -722,14 +723,6 @@ public string Normalize(NormalizationForm normalizationForm) return Normalization.Normalize(this, normalizationForm); } - private unsafe bool IsAscii() - { - fixed (char* str = &_firstChar) - { - return ASCIIUtility.GetIndexOfFirstNonAsciiChar(str, (uint)Length) == (uint)Length; - } - } - // Gets the character at a specified position. // [IndexerName("Chars")] diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIEncoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIEncoding.cs index b875e589f05ec0..ce0f0d5ee23dfc 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIEncoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIEncoding.cs @@ -193,7 +193,8 @@ private protected sealed override unsafe int GetByteCountFast(char* pChars, int { // Unrecognized fallback mechanism - count chars manually. - byteCount = (int)ASCIIUtility.GetIndexOfFirstNonAsciiChar(pChars, (uint)charsLength); + int firstNonAsciiIndex = Ascii.GetIndexOfFirstNonAsciiChar(new ReadOnlySpan(pChars, charsLength)); + byteCount = firstNonAsciiIndex < 0 ? charsLength : firstNonAsciiIndex; } charsConsumed = byteCount; @@ -354,9 +355,7 @@ private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetBytesCommon private protected sealed override unsafe int GetBytesFast(char* pChars, int charsLength, byte* pBytes, int bytesLength, out int charsConsumed) { - int bytesWritten = (int)ASCIIUtility.NarrowUtf16ToAscii(pChars, pBytes, (uint)Math.Min(charsLength, bytesLength)); - - charsConsumed = bytesWritten; + Ascii.FromUtf16(new ReadOnlySpan(pChars, charsLength), new Span(pBytes, bytesLength), out charsConsumed, out int bytesWritten); return bytesWritten; } @@ -375,22 +374,19 @@ private protected sealed override unsafe int GetBytesWithFallback(ReadOnlySpan(pBytes, bytesLength)); + charCount = indexOfFirstNonAscii < 0 ? bytesLength : indexOfFirstNonAscii; } bytesConsumed = charCount; @@ -630,9 +627,7 @@ private unsafe int GetCharsCommon(byte* pBytes, int byteCount, char* pChars, int [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetCharsCommon private protected sealed override unsafe int GetCharsFast(byte* pBytes, int bytesLength, char* pChars, int charsLength, out int bytesConsumed) { - int charsWritten = (int)ASCIIUtility.WidenAsciiToUtf16(pBytes, pChars, (uint)Math.Min(bytesLength, charsLength)); - - bytesConsumed = charsWritten; + Ascii.ToUtf16(new ReadOnlySpan(pBytes, bytesLength), new Span(pChars, charsLength), out bytesConsumed, out int charsWritten); return charsWritten; } @@ -650,22 +645,19 @@ private protected sealed override unsafe int GetCharsWithFallback(ReadOnlySpan(pInputBuffer, inputLength)); + int numAsciiCharsConsumedJustNow = firstNonAsciiIndex < 0 ? inputLength : firstNonAsciiIndex; Debug.Assert(0 <= numAsciiCharsConsumedJustNow && numAsciiCharsConsumedJustNow <= inputLength); pInputBuffer += (uint)numAsciiCharsConsumedJustNow; diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs index 77866f028ea0c6..3d571437d91e8d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers; +using System.Buffers.Text; using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; @@ -25,27 +26,24 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng Debug.Assert(pOutputBuffer != null || outputCharsRemaining == 0, "Destination length must be zero if destination buffer pointer is null."); // First, try vectorized conversion. + OperationStatus status = Ascii.ToUtf16(new ReadOnlySpan(pInputBuffer, inputLength), new Span(pOutputBuffer, outputCharsRemaining), out int bytesConsumed, out _); - { - nuint numElementsConverted = ASCIIUtility.WidenAsciiToUtf16(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputCharsRemaining)); - - pInputBuffer += numElementsConverted; - pOutputBuffer += numElementsConverted; - - // Quick check - did we just end up consuming the entire input buffer? - // If so, short-circuit the remainder of the method. + pInputBuffer += bytesConsumed; + pOutputBuffer += bytesConsumed; - if ((int)numElementsConverted == inputLength) - { - pInputBufferRemaining = pInputBuffer; - pOutputBufferRemaining = pOutputBuffer; - return OperationStatus.Done; - } + // Quick check - did we just end up consuming the entire input buffer? + // If so, short-circuit the remainder of the method. - inputLength -= (int)numElementsConverted; - outputCharsRemaining -= (int)numElementsConverted; + if (status == OperationStatus.Done) + { + pInputBufferRemaining = pInputBuffer; + pOutputBufferRemaining = pOutputBuffer; + return OperationStatus.Done; } + inputLength -= bytesConsumed; + outputCharsRemaining -= bytesConsumed; + if (inputLength < sizeof(uint)) { goto ProcessInputOfLessThanDWordSize; @@ -74,7 +72,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng #endif // First, check for the common case of all-ASCII bytes. - if (ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)) + if (Ascii.AllBytesInUInt32AreAscii(thisDWord)) { // We read an all-ASCII sequence. @@ -83,7 +81,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng goto ProcessRemainingBytesSlow; // running out of space, but may be able to write some data } - ASCIIUtility.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref *pOutputBuffer, thisDWord); + Ascii.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref *pOutputBuffer, thisDWord); pInputBuffer += 4; pOutputBuffer += 4; outputCharsRemaining -= 4; @@ -102,15 +100,15 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng thisDWord = Unsafe.ReadUnaligned(pInputBuffer); secondDWord = Unsafe.ReadUnaligned(pInputBuffer + sizeof(uint)); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord | secondDWord)) + if (!Ascii.AllBytesInUInt32AreAscii(thisDWord | secondDWord)) { goto LoopTerminatedEarlyDueToNonAsciiData; } pInputBuffer += 8; - ASCIIUtility.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref pOutputBuffer[0], thisDWord); - ASCIIUtility.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref pOutputBuffer[4], secondDWord); + Ascii.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref pOutputBuffer[0], thisDWord); + Ascii.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref pOutputBuffer[4], secondDWord); pOutputBuffer += 8; } @@ -121,15 +119,15 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng LoopTerminatedEarlyDueToNonAsciiData: - if (ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)) + if (Ascii.AllBytesInUInt32AreAscii(thisDWord)) { // The first DWORD contained all-ASCII bytes, so expand it. - ASCIIUtility.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref *pOutputBuffer, thisDWord); + Ascii.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref *pOutputBuffer, thisDWord); // continue the outer loop from the second DWORD - Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(secondDWord)); + Debug.Assert(!Ascii.AllBytesInUInt32AreAscii(secondDWord)); thisDWord = secondDWord; pInputBuffer += 4; @@ -147,7 +145,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng AfterReadDWordSkipAllBytesAsciiCheck: - Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)); // this should have been handled earlier + Debug.Assert(!Ascii.AllBytesInUInt32AreAscii(thisDWord)); // this should have been handled earlier // Next, try stripping off ASCII bytes one at a time. // We only handle up to three ASCII bytes here since we handled the four ASCII byte case above. @@ -848,23 +846,23 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt // First, try vectorized conversion. { - nuint numElementsConverted = ASCIIUtility.NarrowUtf16ToAscii(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputBytesRemaining)); + OperationStatus status = Ascii.FromUtf16(new ReadOnlySpan(pInputBuffer, inputLength), new Span(pOutputBuffer, outputBytesRemaining), out int charsConsumed, out _); - pInputBuffer += numElementsConverted; - pOutputBuffer += numElementsConverted; + pInputBuffer += charsConsumed; + pOutputBuffer += charsConsumed; // Quick check - did we just end up consuming the entire input buffer? // If so, short-circuit the remainder of the method. - if ((int)numElementsConverted == inputLength) + if (status == OperationStatus.Done) { pInputBufferRemaining = pInputBuffer; pOutputBufferRemaining = pOutputBuffer; return OperationStatus.Done; } - inputLength -= (int)numElementsConverted; - outputBytesRemaining -= (int)numElementsConverted; + inputLength -= charsConsumed; + outputBytesRemaining -= charsConsumed; } if (inputLength < CharsPerDWord) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs index 5784cfa136430a..8e08a4a3bdae10 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; @@ -26,23 +27,19 @@ internal static unsafe partial class Utf8Utility Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null."); // First, try to drain off as many ASCII bytes as we can from the beginning. - + int indexOfFirstNonAscii = Ascii.GetIndexOfFirstNonAsciiByte(new ReadOnlySpan(pInputBuffer, inputLength)); + // Quick check - did we just end up consuming the entire input buffer? + // If so, short-circuit the remainder of the method. + if (indexOfFirstNonAscii < 0) { - nuint numAsciiBytesCounted = ASCIIUtility.GetIndexOfFirstNonAsciiByte(pInputBuffer, (uint)inputLength); - pInputBuffer += numAsciiBytesCounted; - - // Quick check - did we just end up consuming the entire input buffer? - // If so, short-circuit the remainder of the method. - - inputLength -= (int)numAsciiBytesCounted; - if (inputLength == 0) - { - utf16CodeUnitCountAdjustment = 0; - scalarCountAdjustment = 0; - return pInputBuffer; - } + utf16CodeUnitCountAdjustment = 0; + scalarCountAdjustment = 0; + return pInputBuffer + inputLength; } + pInputBuffer += indexOfFirstNonAscii; + inputLength -= indexOfFirstNonAscii; + #if DEBUG // Keep these around for final validation at the end of the method. byte* pOriginalInputBuffer = pInputBuffer; @@ -82,7 +79,7 @@ internal static unsafe partial class Utf8Utility // First, check for the common case of all-ASCII bytes. - if (ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)) + if (Ascii.AllBytesInUInt32AreAscii(thisDWord)) { // We read an all-ASCII sequence. @@ -102,7 +99,7 @@ internal static unsafe partial class Utf8Utility // the read pointer up to the next aligned address. thisDWord = Unsafe.ReadUnaligned(pInputBuffer); - if (!ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)) + if (!Ascii.AllBytesInUInt32AreAscii(thisDWord)) { goto AfterReadDWordSkipAllBytesAsciiCheck; } @@ -156,12 +153,12 @@ internal static unsafe partial class Utf8Utility } else { - if (!ASCIIUtility.AllBytesInUInt32AreAscii(((uint*)pInputBuffer)[0] | ((uint*)pInputBuffer)[1])) + if (!Ascii.AllBytesInUInt32AreAscii(((uint*)pInputBuffer)[0] | ((uint*)pInputBuffer)[1])) { goto LoopTerminatedEarlyDueToNonAsciiDataInFirstPair; } - if (!ASCIIUtility.AllBytesInUInt32AreAscii(((uint*)pInputBuffer)[2] | ((uint*)pInputBuffer)[3])) + if (!Ascii.AllBytesInUInt32AreAscii(((uint*)pInputBuffer)[2] | ((uint*)pInputBuffer)[3])) { goto LoopTerminatedEarlyDueToNonAsciiDataInSecondPair; } @@ -206,7 +203,7 @@ internal static unsafe partial class Utf8Utility // Let's perform a quick check here to bypass the logic at the beginning of the main loop. thisDWord = *(uint*)pInputBuffer; // still aligned here - if (ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)) + if (Ascii.AllBytesInUInt32AreAscii(thisDWord)) { pInputBuffer += sizeof(uint); // consumed 1 more DWORD thisDWord = *(uint*)pInputBuffer; // still aligned here @@ -220,13 +217,13 @@ internal static unsafe partial class Utf8Utility AfterReadDWordSkipAllBytesAsciiCheck: - Debug.Assert(!ASCIIUtility.AllBytesInUInt32AreAscii(thisDWord)); // this should have been handled earlier + Debug.Assert(!Ascii.AllBytesInUInt32AreAscii(thisDWord)); // this should have been handled earlier // Next, try stripping off ASCII bytes one at a time. // We only handle up to three ASCII bytes here since we handled the four ASCII byte case above. { - uint numLeadingAsciiBytes = ASCIIUtility.CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(thisDWord); + uint numLeadingAsciiBytes = Ascii.CountNumberOfLeadingAsciiBytesFromUInt32WithSomeNonAsciiData(thisDWord); pInputBuffer += numLeadingAsciiBytes; if (pFinalPosWhereCanReadDWordFromInputBuffer < pInputBuffer) diff --git a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs index c2fb9889093938..a1ec8bf6ac4834 100644 --- a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Diagnostics; using System.Globalization; using System.Runtime.CompilerServices; @@ -211,15 +212,7 @@ internal static string IdnEquivalent(string hostname) // check if only ascii chars // special case since idnmapping will not lowercase if only ascii present - bool allAscii = true; - foreach (char c in hostname) - { - if (c > 0x7F) - { - allAscii = false; - break; - } - } + bool allAscii = Ascii.IsAscii(hostname); if (allAscii) { From e2d71058cd79404ad771832c195409170d9cce4f Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Fri, 9 Sep 2022 18:21:36 +0200 Subject: [PATCH 40/46] fix tests that were relying on reflection so far (and I did not know about their existence) --- .../tests/Ascii/CaseConversionTests.cs | 14 +- .../System.Memory/tests/Ascii/EqualsTests.cs | 10 +- .../tests/Ascii/FromUtf16Tests.cs | 105 ++++ .../tests/Ascii/GetHashCodeByteTests.cs | 8 +- .../tests/Ascii/GetHashCodeCharTests.cs | 9 +- .../Ascii/GetIndexOfFirstNonAsciiByteTests.cs | 72 ++- .../Ascii/GetIndexOfFirstNonAsciiCharTests.cs | 80 ++- .../System.Memory/tests/Ascii/IndexOfTests.cs | 12 +- .../tests/Ascii/StartsEndsWithTests.cs | 12 +- .../System.Memory/tests/Ascii/ToUtf16Tests.cs | 108 ++++ .../System.Memory/tests/Ascii/TrimTests.cs | 14 +- .../tests/System.Memory.Tests.csproj | 2 + .../src/System/Buffers/Text/Ascii.Utility.cs | 2 +- .../tests/System.Runtime.Tests.csproj | 1 - .../tests/System/Text/ASCIIUtilityTests.cs | 461 ------------------ 15 files changed, 399 insertions(+), 511 deletions(-) create mode 100644 src/libraries/System.Memory/tests/Ascii/FromUtf16Tests.cs create mode 100644 src/libraries/System.Memory/tests/Ascii/ToUtf16Tests.cs delete mode 100644 src/libraries/System.Runtime/tests/System/Text/ASCIIUtilityTests.cs diff --git a/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs b/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs index 6365f482d84818..d902e4a7e774cb 100644 --- a/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs @@ -8,12 +8,12 @@ namespace System.Buffers.Text.Tests { - public class CaseConversionTests + public static class CaseConversionTests { private const byte MaxValidAsciiChar = 127; [Fact] - public void OverlappingBuffers_Throws() + public static void OverlappingBuffers_Throws() { byte[] byteBuffer = new byte[10]; char[] charBuffer = new char[10]; @@ -61,7 +61,7 @@ private static void VerifySingleChar(OperationStatus status, int value, T exp } [Fact] - public void SingleByteConversion() + public static void SingleByteConversion() { byte[] destinationByte = new byte[1]; char[] destinationChar = new char[1]; @@ -87,7 +87,7 @@ public void SingleByteConversion() } [Fact] - public void SingleCharConversion() + public static void SingleCharConversion() { char[] sourceChar = new char[1], destinationChar = new char[1]; // this test is "optimized" as it performs a LOT of iterations byte[] destinationByte = new byte[1]; @@ -115,7 +115,7 @@ public void SingleCharConversion() [Theory] [InlineData("\u00C0bCDe")] // U+00C0 is not ASCII [InlineData("\u00E0bCDe")] // U+00E0 is not ASCII - public void InvalidCharacters(string sourceChars) + public static void InvalidCharacters(string sourceChars) { char[] destinationChars = new char[sourceChars.Length]; byte[] sourceBytes = System.Text.Encoding.ASCII.GetBytes(sourceChars); @@ -184,7 +184,7 @@ public static IEnumerable MultipleValidCharacterConversion_Arguments [Theory] [MemberData(nameof(MultipleValidCharacterConversion_Arguments))] - public void MultipleValidCharacterConversion(string sourceChars, string expectedLowerChars, string expectedUpperChars) + public static void MultipleValidCharacterConversion(string sourceChars, string expectedLowerChars, string expectedUpperChars) { Assert.Equal(sourceChars.Length, expectedLowerChars.Length); Assert.Equal(expectedLowerChars.Length, expectedUpperChars.Length); @@ -238,7 +238,7 @@ static void VerifyBool(bool result, int processed, T[] expected, T[] actual) [Theory] [InlineData("Hello", 4, "hell", "HELL")] [InlineData(" AbC ", 3, " ab", " AB")] - public void DestinationTooSmall(string sourceChars, int destinationSize, string expectedLowerChars, string expectedUpperChars) + public static void DestinationTooSmall(string sourceChars, int destinationSize, string expectedLowerChars, string expectedUpperChars) { Assert.NotEqual(sourceChars.Length, destinationSize); Assert.Equal(destinationSize, expectedLowerChars.Length); diff --git a/src/libraries/System.Memory/tests/Ascii/EqualsTests.cs b/src/libraries/System.Memory/tests/Ascii/EqualsTests.cs index 512c8274334cdb..056d932f222ea2 100644 --- a/src/libraries/System.Memory/tests/Ascii/EqualsTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/EqualsTests.cs @@ -8,10 +8,10 @@ namespace System.Buffers.Text.Tests { - public class EqualsTests + public static class EqualsTests { [Fact] - public void InvalidCharacters_DoesNotThrow() + public static void InvalidCharacters_DoesNotThrow() { Assert.False(Ascii.Equals(Enumerable.Repeat((byte)128, "valid".Length).ToArray(), "valid")); Assert.False(Ascii.Equals("valid"u8, "aa\u00C0aa")); @@ -39,7 +39,7 @@ public static IEnumerable ExactMatch_TestData [Theory] [MemberData(nameof(ExactMatch_TestData))] - public void ExactMatchFound(string left, string right) + public static void ExactMatchFound(string left, string right) { Assert.True(Ascii.Equals(Encoding.ASCII.GetBytes(left), right)); @@ -70,7 +70,7 @@ public static IEnumerable ExactMatchNotFound_TestData [Theory] [MemberData(nameof(ExactMatchNotFound_TestData))] - public void ExactMatchNotFound(string left, string right) + public static void ExactMatchNotFound(string left, string right) { Assert.False(Ascii.Equals(Encoding.ASCII.GetBytes(left), right)); @@ -96,7 +96,7 @@ public static IEnumerable IgnoreCaseMatch_TestData [Theory] [MemberData(nameof(IgnoreCaseMatch_TestData))] - public void IgnoreCaseMatchFound(string left, string right) + public static void IgnoreCaseMatchFound(string left, string right) { Assert.True(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), Encoding.ASCII.GetBytes(right))); Assert.True(Ascii.EqualsIgnoreCase(left, right)); diff --git a/src/libraries/System.Memory/tests/Ascii/FromUtf16Tests.cs b/src/libraries/System.Memory/tests/Ascii/FromUtf16Tests.cs new file mode 100644 index 00000000000000..b1e221f3195c1d --- /dev/null +++ b/src/libraries/System.Memory/tests/Ascii/FromUtf16Tests.cs @@ -0,0 +1,105 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Security.Cryptography; +using Xunit; + +namespace System.Buffers.Text.Tests +{ + public static class FromUtf16Tests + { + [Fact] + public static unsafe void EmptyInputs() + { + Assert.Equal(OperationStatus.Done, Ascii.FromUtf16(ReadOnlySpan.Empty, Span.Empty, out int charsConsumed, out int bytesWritten)); + Assert.Equal(0, charsConsumed); + Assert.Equal(charsConsumed, bytesWritten); + } + + [Fact] + public static void AllAsciiInput() + { + using BoundedMemory utf16Mem = BoundedMemory.Allocate(128); + using BoundedMemory asciiMem = BoundedMemory.Allocate(128); + + // Fill source with 00 .. 7F. + + Span utf16Span = utf16Mem.Span; + for (int i = 0; i < utf16Span.Length; i++) + { + utf16Span[i] = (char)i; + } + utf16Mem.MakeReadonly(); + + // We'll write to the ASCII span. + // We test with a variety of span lengths to test alignment and fallthrough code paths. + + Span asciiSpan = asciiMem.Span; + + for (int i = 0; i < utf16Span.Length; i++) + { + asciiSpan.Clear(); // remove any data from previous iteration + + // First, validate that the workhorse saw the incoming data as all-ASCII. + Assert.Equal(OperationStatus.Done, Ascii.FromUtf16(utf16Span.Slice(i), asciiSpan.Slice(i), out int charsConsumed, out int bytesWritten)); + Assert.Equal(128 - i, charsConsumed); + Assert.Equal(charsConsumed, bytesWritten); + + // Then, validate that the data was transcoded properly. + + for (int j = i; j < 128; j++) + { + Assert.Equal((ushort)utf16Span[i], (ushort)asciiSpan[i]); + } + } + } + + [Fact] + public static void SomeNonAsciiInput() + { + using BoundedMemory utf16Mem = BoundedMemory.Allocate(128); + using BoundedMemory asciiMem = BoundedMemory.Allocate(128); + + // Fill source with 00 .. 7F. + + Span utf16Span = utf16Mem.Span; + for (int i = 0; i < utf16Span.Length; i++) + { + utf16Span[i] = (char)i; + } + + // We'll write to the ASCII span. + + Span asciiSpan = asciiMem.Span; + + for (int i = utf16Span.Length - 1; i >= 0; i--) + { + RandomNumberGenerator.Fill(asciiSpan); // fill with garbage + + // First, keep track of the garbage we wrote to the destination. + // We want to ensure it wasn't overwritten. + + byte[] expectedTrailingData = asciiSpan.Slice(i).ToArray(); + + // Then, set the desired byte as non-ASCII, then check that the workhorse + // correctly saw the data as non-ASCII. + + utf16Span[i] = '\u0123'; // use U+0123 instead of U+0080 since it catches inappropriate pmovmskb usage + Assert.Equal(OperationStatus.InvalidData, Ascii.FromUtf16(utf16Span, asciiSpan, out int charsConsumed, out int bytesWritten)); + Assert.Equal(i, charsConsumed); + Assert.Equal(charsConsumed, bytesWritten); + + // Next, validate that the ASCII data was transcoded properly. + + for (int j = 0; j < i; j++) + { + Assert.Equal((ushort)utf16Span[j], (ushort)asciiSpan[j]); + } + + // Finally, validate that the trailing data wasn't overwritten with non-ASCII data. + + Assert.Equal(expectedTrailingData, asciiSpan.Slice(i).ToArray()); + } + } + } +} diff --git a/src/libraries/System.Memory/tests/Ascii/GetHashCodeByteTests.cs b/src/libraries/System.Memory/tests/Ascii/GetHashCodeByteTests.cs index 500c2714fa6344..40101f539edf26 100644 --- a/src/libraries/System.Memory/tests/Ascii/GetHashCodeByteTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/GetHashCodeByteTests.cs @@ -9,12 +9,12 @@ namespace System.Buffers.Text.Tests { - public class GetHashCodeByteTests + public static class GetHashCodeByteTests { [Theory] [InlineData(new byte[] { 128 })] [InlineData(new byte[] { 91, 91, 128, 91 })] // >= 4 chars can execute a different code path - public void InvalidCharactersInValueThrowsOrReturnsFalse(byte[] value) + public static void InvalidCharactersInValueThrowsOrReturnsFalse(byte[] value) { Assert.Throws(() => Ascii.GetHashCode(value)); Assert.Throws(() => Ascii.GetHashCodeIgnoreCase(value)); @@ -25,7 +25,7 @@ public void InvalidCharactersInValueThrowsOrReturnsFalse(byte[] value) Assert.Equal(default(int), hashCode); } - public IEnumerable ValidInputValidOutput_TestData + public static IEnumerable ValidInputValidOutput_TestData { get { @@ -40,7 +40,7 @@ public IEnumerable ValidInputValidOutput_TestData [Theory] [InlineData(nameof(ValidInputValidOutput_TestData))] - public void ValidInputValidOutput(string input) + public static void ValidInputValidOutput(string input) { // The contract makes it clear that hash code is randomized and is not guaranteed to match string.GetHashCode. // But.. re-using same types used internally by string.GetHashCode was the simplest way to get good hashing implementaiton. diff --git a/src/libraries/System.Memory/tests/Ascii/GetHashCodeCharTests.cs b/src/libraries/System.Memory/tests/Ascii/GetHashCodeCharTests.cs index d1aa6167b51b45..c2d7926a8380a0 100644 --- a/src/libraries/System.Memory/tests/Ascii/GetHashCodeCharTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/GetHashCodeCharTests.cs @@ -3,17 +3,16 @@ using System.Collections.Generic; using System.Linq; -using System.Text; using Xunit; namespace System.Buffers.Text.Tests { - public class GetHashCodeCharTests + public static class GetHashCodeCharTests { [Theory] [InlineData("\u00C0")] [InlineData("aaa\u00C0bbb")] - public void InvalidCharactersInValueThrowsOrReturnsFalse(string value) + public static void InvalidCharactersInValueThrowsOrReturnsFalse(string value) { Assert.Throws(() => Ascii.GetHashCode(value)); Assert.Throws(() => Ascii.GetHashCodeIgnoreCase(value)); @@ -24,7 +23,7 @@ public void InvalidCharactersInValueThrowsOrReturnsFalse(string value) Assert.Equal(default(int), hashCode); } - public IEnumerable ValidInputValidOutput_TestData + public static IEnumerable ValidInputValidOutput_TestData { get { @@ -39,7 +38,7 @@ public IEnumerable ValidInputValidOutput_TestData [Theory] [InlineData(nameof(ValidInputValidOutput_TestData))] - public void ValidInputValidOutput(string input) + public static void ValidInputValidOutput(string input) { // The contract makes it clear that hash code is randomized and is not guaranteed to match string.GetHashCode. // But.. re-using same types used internally by string.GetHashCode was the simplest way to get good hashing implementaiton. diff --git a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs index 6955f04c4e736f..00eb0bcbffa60b 100644 --- a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs @@ -3,18 +3,19 @@ using System.Collections.Generic; using System.Linq; +using System.Numerics; using System.Runtime.Intrinsics; using Xunit; namespace System.Buffers.Text.Tests { - public class GetIndexOfFirstNonAsciiByteTests + public static class GetIndexOfFirstNonAsciiByteTests { private static byte GetNextValidAsciiByte() => (byte)Random.Shared.Next(0, 127 + 1); private static byte GetNextInvalidAsciiByte() => (byte)Random.Shared.Next(128, 255 + 1); [Fact] - public void EmptyInput_IndexNotFound() + public static void EmptyInput_IndexNotFound() { Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiByte(ReadOnlySpan.Empty)); Assert.True(Ascii.IsAscii(ReadOnlySpan.Empty)); @@ -44,7 +45,7 @@ public static IEnumerable AsciiOnlyBuffers [Theory] [MemberData(nameof(AsciiOnlyBuffers))] - public void AllAscii_IndexNotFound(byte[] buffer) + public static void AllAscii_IndexNotFound(byte[] buffer) { Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiByte(buffer)); Assert.True(Ascii.IsAscii(buffer)); @@ -74,7 +75,7 @@ static byte[] Create(int length, int index) [Theory] [MemberData(nameof(ContainingNonAsciiCharactersBuffers))] - public void NonAscii_IndexFound(int expectedIndex, byte[] buffer) + public static void NonAscii_IndexFound(int expectedIndex, byte[] buffer) { Assert.Equal(expectedIndex, Ascii.GetIndexOfFirstNonAsciiByte(buffer)); Assert.False(Ascii.IsAscii(buffer)); @@ -84,5 +85,68 @@ public void NonAscii_IndexFound(int expectedIndex, byte[] buffer) Assert.Equal(i != expectedIndex, Ascii.IsAscii(buffer[i])); } } + + [Fact] + public static void Vector128InnerLoop() + { + // The purpose of this test is to make sure we're identifying the correct + // vector (of the two that we're reading simultaneously) when performing + // the final ASCII drain at the end of the method once we've broken out + // of the inner loop. + + using (BoundedMemory mem = BoundedMemory.Allocate(1024)) + { + Span bytes = mem.Span; + + for (int i = 0; i < bytes.Length; i++) + { + bytes[i] &= 0x7F; // make sure each byte (of the pre-populated random data) is ASCII + } + + // Two vectors have offsets 0 .. 31. We'll go backward to avoid having to + // re-clear the vector every time. + + for (int i = 2 * Vector128.Count - 1; i >= 0; i--) + { + bytes[100 + i * 13] = 0x80; // 13 is relatively prime to 32, so it ensures all possible positions are hit + Assert.Equal(100 + i * 13, Ascii.GetIndexOfFirstNonAsciiByte(bytes)); + } + } + } + + [Fact] + public static void Boundaries() + { + // The purpose of this test is to make sure we're hitting all of the vectorized + // and draining logic correctly both in the SSE2 and in the non-SSE2 enlightened + // code paths. We shouldn't be reading beyond the boundaries we were given. + + // The 5 * Vector test should make sure that we're exercising all possible + // code paths across both implementations. + using (BoundedMemory mem = BoundedMemory.Allocate(5 * Vector.Count)) + { + Span bytes = mem.Span; + + // First, try it with all-ASCII buffers. + + for (int i = 0; i < bytes.Length; i++) + { + bytes[i] &= 0x7F; // make sure each byte (of the pre-populated random data) is ASCII + } + + for (int i = bytes.Length; i >= 0; i--) + { + Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiByte(bytes.Slice(0, i))); + } + + // Then, try it with non-ASCII bytes. + + for (int i = bytes.Length; i >= 1; i--) + { + bytes[i - 1] = 0x80; // set non-ASCII + Assert.Equal(i - 1, Ascii.GetIndexOfFirstNonAsciiByte(bytes.Slice(0, i))); + } + } + } } } diff --git a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs index 903f016ad180e8..549294acf77ca9 100644 --- a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs @@ -3,18 +3,19 @@ using System.Collections.Generic; using System.Linq; +using System.Numerics; using System.Runtime.Intrinsics; using Xunit; namespace System.Buffers.Text.Tests { - public class GetIndexOfFirstNonAsciiCharTests + public static class GetIndexOfFirstNonAsciiCharTests { private static char GetNextValidAsciiChar() => (char)Random.Shared.Next(0, 127 + 1); private static char GetNextInvalidAsciiChar() => (char)Random.Shared.Next(128, ushort.MaxValue + 1); [Fact] - public void EmptyInput_IndexNotFound() + public static void EmptyInput_IndexNotFound() { Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiChar(ReadOnlySpan.Empty)); Assert.True(Ascii.IsAscii(ReadOnlySpan.Empty)); @@ -44,7 +45,7 @@ public static IEnumerable AsciiOnlyBuffers [Theory] [MemberData(nameof(AsciiOnlyBuffers))] - public void AllAscii_IndexNotFound(char[] buffer) + public static void AllAscii_IndexNotFound(char[] buffer) { Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiChar(buffer)); Assert.True(Ascii.IsAscii(buffer)); @@ -74,7 +75,7 @@ static char[] Create(int length, int index) [Theory] [MemberData(nameof(ContainingNonAsciiCharactersBuffers))] - public void NonAscii_IndexFound(int expectedIndex, char[] buffer) + public static void NonAscii_IndexFound(int expectedIndex, char[] buffer) { Assert.Equal(expectedIndex, Ascii.GetIndexOfFirstNonAsciiChar(buffer)); Assert.False(Ascii.IsAscii(buffer)); @@ -84,5 +85,76 @@ public void NonAscii_IndexFound(int expectedIndex, char[] buffer) Assert.Equal(i != expectedIndex, Ascii.IsAscii(buffer[i])); } } + + [Fact] + public static void Vector128InnerLoop() + { + // The purpose of this test is to make sure we're identifying the correct + // vector (of the two that we're reading simultaneously) when performing + // the final ASCII drain at the end of the method once we've broken out + // of the inner loop. + // + // Use U+0123 instead of U+0080 for this test because if our implementation + // uses pminuw / pmovmskb incorrectly, U+0123 will incorrectly show up as ASCII, + // causing our test to produce a false negative. + + using (BoundedMemory mem = BoundedMemory.Allocate(1024)) + { + Span chars = mem.Span; + + for (int i = 0; i < chars.Length; i++) + { + chars[i] &= '\u007F'; // make sure each char (of the pre-populated random data) is ASCII + } + + // Two vectors have offsets 0 .. 31. We'll go backward to avoid having to + // re-clear the vector every time. + + for (int i = 2 * Vector128.Count - 1; i >= 0; i--) + { + chars[100 + i * 13] = '\u0123'; // 13 is relatively prime to 32, so it ensures all possible positions are hit + Assert.Equal(100 + i * 13, Ascii.GetIndexOfFirstNonAsciiChar(chars)); + } + } + } + + [Fact] + public static void Boundaries() + { + // The purpose of this test is to make sure we're hitting all of the vectorized + // and draining logic correctly both in the SSE2 and in the non-SSE2 enlightened + // code paths. We shouldn't be reading beyond the boundaries we were given. + // + // The 5 * Vector test should make sure that we're exercising all possible + // code paths across both implementations. The sizeof(char) is because we're + // specifying element count, but underlying implementation reinterpret casts to bytes. + // + // Use U+0123 instead of U+0080 for this test because if our implementation + // uses pminuw / pmovmskb incorrectly, U+0123 will incorrectly show up as ASCII, + // causing our test to produce a false negative. + + using (BoundedMemory mem = BoundedMemory.Allocate(5 * Vector.Count / sizeof(char))) + { + Span chars = mem.Span; + + for (int i = 0; i < chars.Length; i++) + { + chars[i] &= '\u007F'; // make sure each char (of the pre-populated random data) is ASCII + } + + for (int i = chars.Length; i >= 0; i--) + { + Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiChar(chars.Slice(0, i))); + } + + // Then, try it with non-ASCII bytes. + + for (int i = chars.Length; i >= 1; i--) + { + chars[i - 1] = '\u0123'; // set non-ASCII + Assert.Equal(i - 1, Ascii.GetIndexOfFirstNonAsciiChar(chars.Slice(0, i))); + } + } + } } } diff --git a/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs b/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs index bb07b58b6c725c..b68caff2c098f8 100644 --- a/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs @@ -9,10 +9,10 @@ namespace System.Buffers.Text.Tests { [ActiveIssue("https://github.com/dotnet/runtime/issues/75125", TestRuntimes.Mono)] - public class IndexOfTests + public static class IndexOfTests { [Fact] - public void InvalidCharactersInValueThrows() + public static void InvalidCharactersInValueThrows() { Assert.Throws(() => Ascii.IndexOf("aaaa"u8, "\u00C0")); Assert.Throws(() => Ascii.IndexOf("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"u8, "aaaaaaaaaaaaa\u00C0")); @@ -58,7 +58,7 @@ public static IEnumerable ExactMatchFound_TestData [Theory] [MemberData(nameof(ExactMatchFound_TestData))] - public void ExactMatchFound(string text, string value, int expectedFirstIndex, int expectedLastIndex) + public static void ExactMatchFound(string text, string value, int expectedFirstIndex, int expectedLastIndex) { Assert.Equal(expectedFirstIndex, Ascii.IndexOf(text, Encoding.ASCII.GetBytes(value))); Assert.Equal(expectedFirstIndex, Ascii.IndexOf(Encoding.ASCII.GetBytes(text), value)); @@ -91,7 +91,7 @@ public static IEnumerable ExactMatchNotFound_TestData [Theory] [MemberData(nameof(ExactMatchNotFound_TestData))] - public void ExactMatchNotFound(string text, string value) + public static void ExactMatchNotFound(string text, string value) { Assert.Equal(-1, Ascii.IndexOf(text, Encoding.ASCII.GetBytes(value))); Assert.Equal(-1, Ascii.IndexOf(Encoding.ASCII.GetBytes(text), value)); @@ -115,7 +115,7 @@ public static IEnumerable IgnoreCaseMatchFound_TestData [Theory] [MemberData(nameof(IgnoreCaseMatchFound_TestData))] - public void IgnoreCaseMatchFound(string text, string value, int expectedFirstIndex, int expectedLastIndex) + public static void IgnoreCaseMatchFound(string text, string value, int expectedFirstIndex, int expectedLastIndex) { Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(text, value)); @@ -143,7 +143,7 @@ public static IEnumerable IgnoreCaseMatchNotFound_TestData [Theory] [MemberData(nameof(IgnoreCaseMatchNotFound_TestData))] - public void IgnoreCaseMatchNotFound(string text, string value) + public static void IgnoreCaseMatchNotFound(string text, string value) { Assert.Equal(-1, Ascii.IndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); Assert.Equal(-1, Ascii.IndexOfIgnoreCase(text, value)); diff --git a/src/libraries/System.Memory/tests/Ascii/StartsEndsWithTests.cs b/src/libraries/System.Memory/tests/Ascii/StartsEndsWithTests.cs index 0d5bd41c1e465d..9ee12d8c13080c 100644 --- a/src/libraries/System.Memory/tests/Ascii/StartsEndsWithTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/StartsEndsWithTests.cs @@ -9,10 +9,10 @@ namespace System.Buffers.Text.Tests { - public class StartsEndsWithTests + public static class StartsEndsWithTests { [Fact] - public void InvalidCharactersInValueThrows() + public static void InvalidCharactersInValueThrows() { Assert.Throws(() => Ascii.StartsWith("aaaa"u8, "\u00C0")); // non-vectorized code path Assert.Throws(() => Ascii.StartsWith("aaaaaaaaaaaaaaaaaaaaaaaaa"u8, "aaaaaaaaaaaaaaaaaaaaaaaa\u00C0")); // vectorized code path @@ -54,7 +54,7 @@ public static IEnumerable ExactMatchFound_TestData [Theory] [MemberData(nameof(ExactMatchFound_TestData))] - public void MatchFound(string text, string value) + public static void MatchFound(string text, string value) { Assert.True(Ascii.StartsWith(text, Encoding.ASCII.GetBytes(value))); Assert.True(Ascii.StartsWith(Encoding.ASCII.GetBytes(text), value)); @@ -93,7 +93,7 @@ public static IEnumerable IgnoreCaseMatchFound_TestData [Theory] [MemberData(nameof(IgnoreCaseMatchFound_TestData))] - public void IgnoreCaseMatchFound(string text, string value) + public static void IgnoreCaseMatchFound(string text, string value) { Assert.True(Ascii.StartsWithIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); Assert.True(Ascii.StartsWithIgnoreCase(text, value)); @@ -133,7 +133,7 @@ public static IEnumerable ExactMatchNotFound_TestData [Theory] [MemberData(nameof(ExactMatchNotFound_TestData))] - public void ExactMatchNotFound(string text, string value) + public static void ExactMatchNotFound(string text, string value) { Assert.False(Ascii.StartsWith(text, Encoding.ASCII.GetBytes(value))); Assert.False(Ascii.StartsWith(Encoding.ASCII.GetBytes(text), value)); @@ -172,7 +172,7 @@ public static IEnumerable IgnoreCaseMatchNotFound_TestData [Theory] [MemberData(nameof(IgnoreCaseMatchNotFound_TestData))] - public void IgnoreCaseMatchNotFound(string text, string value) + public static void IgnoreCaseMatchNotFound(string text, string value) { Assert.False(Ascii.StartsWithIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); Assert.False(Ascii.StartsWithIgnoreCase(text, value)); diff --git a/src/libraries/System.Memory/tests/Ascii/ToUtf16Tests.cs b/src/libraries/System.Memory/tests/Ascii/ToUtf16Tests.cs new file mode 100644 index 00000000000000..81646e2145619f --- /dev/null +++ b/src/libraries/System.Memory/tests/Ascii/ToUtf16Tests.cs @@ -0,0 +1,108 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.InteropServices; +using System.Security.Cryptography; +using Xunit; + +namespace System.Buffers.Text.Tests +{ + public static class ToUtf16Tests + { + [Fact] + public static void EmptyInputs() + { + Assert.Equal(OperationStatus.Done, Ascii.ToUtf16(ReadOnlySpan.Empty, Span.Empty, out int bytesConsumed, out int charsWritten)); + Assert.Equal(0, bytesConsumed); + Assert.Equal(0, charsWritten); + } + + [Fact] + public static void AllAsciiInput() + { + using BoundedMemory asciiMem = BoundedMemory.Allocate(128); + using BoundedMemory utf16Mem = BoundedMemory.Allocate(128); + + // Fill source with 00 .. 7F, then trap future writes. + + Span asciiSpan = asciiMem.Span; + for (int i = 0; i < asciiSpan.Length; i++) + { + asciiSpan[i] = (byte)i; + } + asciiMem.MakeReadonly(); + + // We'll write to the UTF-16 span. + // We test with a variety of span lengths to test alignment and fallthrough code paths. + + Span utf16Span = utf16Mem.Span; + + for (int i = 0; i < asciiSpan.Length; i++) + { + utf16Span.Clear(); // remove any data from previous iteration + + // First, validate that the workhorse saw the incoming data as all-ASCII. + + Assert.Equal(OperationStatus.Done, Ascii.ToUtf16(asciiSpan.Slice(i), utf16Span.Slice(i), out int bytesConsumed, out int charsWritten)); + Assert.Equal(128 - i, bytesConsumed); + Assert.Equal(bytesConsumed, charsWritten); + + // Then, validate that the data was transcoded properly. + + for (int j = i; j < 128; j++) + { + Assert.Equal((ushort)asciiSpan[i], (ushort)utf16Span[i]); + } + } + } + + [Fact] + public static void SomeNonAsciiInput() + { + using BoundedMemory asciiMem = BoundedMemory.Allocate(128); + using BoundedMemory utf16Mem = BoundedMemory.Allocate(128); + + // Fill source with 00 .. 7F, then trap future writes. + + Span asciiSpan = asciiMem.Span; + for (int i = 0; i < asciiSpan.Length; i++) + { + asciiSpan[i] = (byte)i; + } + + // We'll write to the UTF-16 span. + + Span utf16Span = utf16Mem.Span; + + for (int i = asciiSpan.Length - 1; i >= 0; i--) + { + RandomNumberGenerator.Fill(MemoryMarshal.Cast(utf16Span)); // fill with garbage + + // First, keep track of the garbage we wrote to the destination. + // We want to ensure it wasn't overwritten. + + char[] expectedTrailingData = utf16Span.Slice(i).ToArray(); + + // Then, set the desired byte as non-ASCII, then check that the workhorse + // correctly saw the data as non-ASCII. + + asciiSpan[i] |= (byte)0x80; + + Assert.Equal(OperationStatus.InvalidData, Ascii.ToUtf16(asciiSpan, utf16Span, out int bytesConsumed, out int charsWritten)); + Assert.Equal(i, bytesConsumed); + Assert.Equal(bytesConsumed, charsWritten); + + // Next, validate that the ASCII data was transcoded properly. + + for (int j = 0; j < i; j++) + { + Assert.Equal((ushort)asciiSpan[j], (ushort)utf16Span[j]); + } + + // Finally, validate that the trailing data wasn't overwritten with non-ASCII data. + + Assert.Equal(expectedTrailingData, utf16Span.Slice(i).ToArray()); + } + } + } +} diff --git a/src/libraries/System.Memory/tests/Ascii/TrimTests.cs b/src/libraries/System.Memory/tests/Ascii/TrimTests.cs index 0c1f91ad9a1a54..ec30d971d2a8cc 100644 --- a/src/libraries/System.Memory/tests/Ascii/TrimTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/TrimTests.cs @@ -6,10 +6,10 @@ namespace System.Buffers.Text.Tests { - public class TrimTests + public static class TrimTests { [Fact] - public void EmptyInput() + public static void EmptyInput() { Assert.Equal(default(Range), Ascii.Trim(ReadOnlySpan.Empty)); Assert.Equal(default(Range), Ascii.Trim(ReadOnlySpan.Empty)); @@ -23,7 +23,7 @@ public void EmptyInput() [InlineData("1")] [InlineData("abc")] [InlineData("a\tb c\rd\ne")] - public void NothingToTrimNonEmptyInput(string text) + public static void NothingToTrimNonEmptyInput(string text) { ReadOnlySpan bytes = Encoding.ASCII.GetBytes(text); @@ -44,7 +44,7 @@ public void NothingToTrimNonEmptyInput(string text) [InlineData("\r\n")] [InlineData(" \t\r\n ")] [InlineData("\n \t \r")] - public void OnlyWhitespaces(string text) + public static void OnlyWhitespaces(string text) { ReadOnlySpan bytes = Encoding.ASCII.GetBytes(text); @@ -65,7 +65,7 @@ public void OnlyWhitespaces(string text) [InlineData("\nd", 1)] [InlineData(" \t\r\ne", 4)] [InlineData(" \t\r\n\n\r\t f", 8)] - public void StartingWithWhitespace(string text, int leadingWhitespaceCount) + public static void StartingWithWhitespace(string text, int leadingWhitespaceCount) { ReadOnlySpan bytes = Encoding.ASCII.GetBytes(text); @@ -84,7 +84,7 @@ public void StartingWithWhitespace(string text, int leadingWhitespaceCount) [InlineData("d\n", 1)] [InlineData("e \t\r\n", 4)] [InlineData("f \t\r\n\n\r\t ", 8)] - public void EndingWithWhitespace(string text, int trailingWhitespaceCount) + public static void EndingWithWhitespace(string text, int trailingWhitespaceCount) { ReadOnlySpan bytes = Encoding.ASCII.GetBytes(text); @@ -103,7 +103,7 @@ public void EndingWithWhitespace(string text, int trailingWhitespaceCount) [InlineData("\nd\n", 1, 1)] [InlineData(" \t\r\ne \t\r\n", 4, 4)] [InlineData(" \t\r\n\n\r\t f \t\r\n\n\r\t ", 8, 8)] - public void StartingAndEndingWithWhitespace(string text, int leadingWhitespaceCount, int trailingWhitespaceCount) + public static void StartingAndEndingWithWhitespace(string text, int leadingWhitespaceCount, int trailingWhitespaceCount) { ReadOnlySpan bytes = Encoding.ASCII.GetBytes(text); diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index 928f526de0073c..50c6c67aa82102 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -15,12 +15,14 @@ + + diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Utility.cs index 52702cd4356cf2..2df3c8c53ac99f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Utility.cs @@ -617,7 +617,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Intrinsified(byte* pBuff /// /// An ASCII char is defined as 0x0000 - 0x007F, inclusive. [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe nuint GetIndexOfFirstNonAsciiChar(char* pBuffer, nuint bufferLength /* in chars */) + private static unsafe nuint GetIndexOfFirstNonAsciiChar(char* pBuffer, nuint bufferLength /* in chars */) { // If SSE2/ASIMD is supported, use those specific intrinsics instead of the generic vectorized // code below. This has two benefits: (a) we can take advantage of specific instructions like diff --git a/src/libraries/System.Runtime/tests/System.Runtime.Tests.csproj b/src/libraries/System.Runtime/tests/System.Runtime.Tests.csproj index fc6859854dc2e6..f250eb5ae62701 100644 --- a/src/libraries/System.Runtime/tests/System.Runtime.Tests.csproj +++ b/src/libraries/System.Runtime/tests/System.Runtime.Tests.csproj @@ -292,7 +292,6 @@ - diff --git a/src/libraries/System.Runtime/tests/System/Text/ASCIIUtilityTests.cs b/src/libraries/System.Runtime/tests/System/Text/ASCIIUtilityTests.cs deleted file mode 100644 index 72ca1707055ab0..00000000000000 --- a/src/libraries/System.Runtime/tests/System/Text/ASCIIUtilityTests.cs +++ /dev/null @@ -1,461 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Buffers; -using System.Diagnostics.CodeAnalysis; -using System.Numerics; -using System.Reflection; -using System.Runtime.InteropServices; -using System.Security.Cryptography; -using Xunit; - -namespace System.Text.Tests -{ - // Since many of the methods we'll be testing are internal, we'll need to invoke - // them via reflection. - public static unsafe class AsciiUtilityTests - { - private const int SizeOfVector128 = 128 / 8; - - // The delegate definitions and members below provide us access to CoreLib's internals. - // We use UIntPtr instead of nuint everywhere here since we don't know what our target arch is. - - private delegate UIntPtr FnGetIndexOfFirstNonAsciiByte(byte* pBuffer, UIntPtr bufferLength); - private static readonly UnsafeLazyDelegate _fnGetIndexOfFirstNonAsciiByte = new UnsafeLazyDelegate("GetIndexOfFirstNonAsciiByte"); - - private delegate UIntPtr FnGetIndexOfFirstNonAsciiChar(char* pBuffer, UIntPtr bufferLength); - private static readonly UnsafeLazyDelegate _fnGetIndexOfFirstNonAsciiChar = new UnsafeLazyDelegate("GetIndexOfFirstNonAsciiChar"); - - private delegate UIntPtr FnNarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAsciiBuffer, UIntPtr elementCount); - private static readonly UnsafeLazyDelegate _fnNarrowUtf16ToAscii = new UnsafeLazyDelegate("NarrowUtf16ToAscii"); - - private delegate UIntPtr FnWidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buffer, UIntPtr elementCount); - private static readonly UnsafeLazyDelegate _fnWidenAsciiToUtf16 = new UnsafeLazyDelegate("WidenAsciiToUtf16"); - - [Fact] - public static void GetIndexOfFirstNonAsciiByte_EmptyInput_NullReference() - { - Assert.Equal(UIntPtr.Zero, _fnGetIndexOfFirstNonAsciiByte.Delegate(null, UIntPtr.Zero)); - } - - [Fact] - public static void GetIndexOfFirstNonAsciiByte_EmptyInput_NonNullReference() - { - byte b = default; - Assert.Equal(UIntPtr.Zero, _fnGetIndexOfFirstNonAsciiByte.Delegate(&b, UIntPtr.Zero)); - } - - [Fact] - public static void GetIndexOfFirstNonAsciiByte_Vector128InnerLoop() - { - // The purpose of this test is to make sure we're identifying the correct - // vector (of the two that we're reading simultaneously) when performing - // the final ASCII drain at the end of the method once we've broken out - // of the inner loop. - - using (BoundedMemory mem = BoundedMemory.Allocate(1024)) - { - Span bytes = mem.Span; - - for (int i = 0; i < bytes.Length; i++) - { - bytes[i] &= 0x7F; // make sure each byte (of the pre-populated random data) is ASCII - } - - // Two vectors have offsets 0 .. 31. We'll go backward to avoid having to - // re-clear the vector every time. - - for (int i = 2 * SizeOfVector128 - 1; i >= 0; i--) - { - bytes[100 + i * 13] = 0x80; // 13 is relatively prime to 32, so it ensures all possible positions are hit - Assert.Equal(100 + i * 13, CallGetIndexOfFirstNonAsciiByte(bytes)); - } - } - } - - [Fact] - public static void GetIndexOfFirstNonAsciiByte_Boundaries() - { - // The purpose of this test is to make sure we're hitting all of the vectorized - // and draining logic correctly both in the SSE2 and in the non-SSE2 enlightened - // code paths. We shouldn't be reading beyond the boundaries we were given. - - // The 5 * Vector test should make sure that we're exercising all possible - // code paths across both implementations. - using (BoundedMemory mem = BoundedMemory.Allocate(5 * Vector.Count)) - { - Span bytes = mem.Span; - - // First, try it with all-ASCII buffers. - - for (int i = 0; i < bytes.Length; i++) - { - bytes[i] &= 0x7F; // make sure each byte (of the pre-populated random data) is ASCII - } - - for (int i = bytes.Length; i >= 0; i--) - { - Assert.Equal(i, CallGetIndexOfFirstNonAsciiByte(bytes.Slice(0, i))); - } - - // Then, try it with non-ASCII bytes. - - for (int i = bytes.Length; i >= 1; i--) - { - bytes[i - 1] = 0x80; // set non-ASCII - Assert.Equal(i - 1, CallGetIndexOfFirstNonAsciiByte(bytes.Slice(0, i))); - } - } - } - - [Fact] - public static void GetIndexOfFirstNonAsciiChar_EmptyInput_NullReference() - { - Assert.Equal(UIntPtr.Zero, _fnGetIndexOfFirstNonAsciiChar.Delegate(null, UIntPtr.Zero)); - } - - [Fact] - public static void GetIndexOfFirstNonAsciiChar_EmptyInput_NonNullReference() - { - char c = default; - Assert.Equal(UIntPtr.Zero, _fnGetIndexOfFirstNonAsciiChar.Delegate(&c, UIntPtr.Zero)); - } - - [Fact] - public static void GetIndexOfFirstNonAsciiChar_Vector128InnerLoop() - { - // The purpose of this test is to make sure we're identifying the correct - // vector (of the two that we're reading simultaneously) when performing - // the final ASCII drain at the end of the method once we've broken out - // of the inner loop. - // - // Use U+0123 instead of U+0080 for this test because if our implementation - // uses pminuw / pmovmskb incorrectly, U+0123 will incorrectly show up as ASCII, - // causing our test to produce a false negative. - - using (BoundedMemory mem = BoundedMemory.Allocate(1024)) - { - Span chars = mem.Span; - - for (int i = 0; i < chars.Length; i++) - { - chars[i] &= '\u007F'; // make sure each char (of the pre-populated random data) is ASCII - } - - // Two vectors have offsets 0 .. 31. We'll go backward to avoid having to - // re-clear the vector every time. - - for (int i = 2 * SizeOfVector128 - 1; i >= 0; i--) - { - chars[100 + i * 13] = '\u0123'; // 13 is relatively prime to 32, so it ensures all possible positions are hit - Assert.Equal(100 + i * 13, CallGetIndexOfFirstNonAsciiChar(chars)); - } - } - } - - [Fact] - public static void GetIndexOfFirstNonAsciiChar_Boundaries() - { - // The purpose of this test is to make sure we're hitting all of the vectorized - // and draining logic correctly both in the SSE2 and in the non-SSE2 enlightened - // code paths. We shouldn't be reading beyond the boundaries we were given. - // - // The 5 * Vector test should make sure that we're exercising all possible - // code paths across both implementations. The sizeof(char) is because we're - // specifying element count, but underlying implementation reinterpret casts to bytes. - // - // Use U+0123 instead of U+0080 for this test because if our implementation - // uses pminuw / pmovmskb incorrectly, U+0123 will incorrectly show up as ASCII, - // causing our test to produce a false negative. - - using (BoundedMemory mem = BoundedMemory.Allocate(5 * Vector.Count / sizeof(char))) - { - Span chars = mem.Span; - - for (int i = 0; i < chars.Length; i++) - { - chars[i] &= '\u007F'; // make sure each char (of the pre-populated random data) is ASCII - } - - for (int i = chars.Length; i >= 0; i--) - { - Assert.Equal(i, CallGetIndexOfFirstNonAsciiChar(chars.Slice(0, i))); - } - - // Then, try it with non-ASCII bytes. - - for (int i = chars.Length; i >= 1; i--) - { - chars[i - 1] = '\u0123'; // set non-ASCII - Assert.Equal(i - 1, CallGetIndexOfFirstNonAsciiChar(chars.Slice(0, i))); - } - } - } - - [Fact] - public static void WidenAsciiToUtf16_EmptyInput_NullReferences() - { - Assert.Equal(UIntPtr.Zero, _fnWidenAsciiToUtf16.Delegate(null, null, UIntPtr.Zero)); - } - - [Fact] - public static void WidenAsciiToUtf16_EmptyInput_NonNullReference() - { - byte b = default; - char c = default; - Assert.Equal(UIntPtr.Zero, _fnWidenAsciiToUtf16.Delegate(&b, &c, UIntPtr.Zero)); - } - - [Fact] - public static void WidenAsciiToUtf16_AllAsciiInput() - { - using BoundedMemory asciiMem = BoundedMemory.Allocate(128); - using BoundedMemory utf16Mem = BoundedMemory.Allocate(128); - - // Fill source with 00 .. 7F, then trap future writes. - - Span asciiSpan = asciiMem.Span; - for (int i = 0; i < asciiSpan.Length; i++) - { - asciiSpan[i] = (byte)i; - } - asciiMem.MakeReadonly(); - - // We'll write to the UTF-16 span. - // We test with a variety of span lengths to test alignment and fallthrough code paths. - - Span utf16Span = utf16Mem.Span; - - for (int i = 0; i < asciiSpan.Length; i++) - { - utf16Span.Clear(); // remove any data from previous iteration - - // First, validate that the workhorse saw the incoming data as all-ASCII. - - Assert.Equal(128 - i, CallWidenAsciiToUtf16(asciiSpan.Slice(i), utf16Span.Slice(i))); - - // Then, validate that the data was transcoded properly. - - for (int j = i; j < 128; j++) - { - Assert.Equal((ushort)asciiSpan[i], (ushort)utf16Span[i]); - } - } - } - - [Fact] - public static void WidenAsciiToUtf16_SomeNonAsciiInput() - { - using BoundedMemory asciiMem = BoundedMemory.Allocate(128); - using BoundedMemory utf16Mem = BoundedMemory.Allocate(128); - - // Fill source with 00 .. 7F, then trap future writes. - - Span asciiSpan = asciiMem.Span; - for (int i = 0; i < asciiSpan.Length; i++) - { - asciiSpan[i] = (byte)i; - } - - // We'll write to the UTF-16 span. - - Span utf16Span = utf16Mem.Span; - - for (int i = asciiSpan.Length - 1; i >= 0; i--) - { - RandomNumberGenerator.Fill(MemoryMarshal.Cast(utf16Span)); // fill with garbage - - // First, keep track of the garbage we wrote to the destination. - // We want to ensure it wasn't overwritten. - - char[] expectedTrailingData = utf16Span.Slice(i).ToArray(); - - // Then, set the desired byte as non-ASCII, then check that the workhorse - // correctly saw the data as non-ASCII. - - asciiSpan[i] |= (byte)0x80; - Assert.Equal(i, CallWidenAsciiToUtf16(asciiSpan, utf16Span)); - - // Next, validate that the ASCII data was transcoded properly. - - for (int j = 0; j < i; j++) - { - Assert.Equal((ushort)asciiSpan[j], (ushort)utf16Span[j]); - } - - // Finally, validate that the trailing data wasn't overwritten with non-ASCII data. - - Assert.Equal(expectedTrailingData, utf16Span.Slice(i).ToArray()); - } - } - - [Fact] - public static unsafe void NarrowUtf16ToAscii_EmptyInput_NullReferences() - { - Assert.Equal(UIntPtr.Zero, _fnNarrowUtf16ToAscii.Delegate(null, null, UIntPtr.Zero)); - } - - [Fact] - public static void NarrowUtf16ToAscii_EmptyInput_NonNullReference() - { - char c = default; - byte b = default; - Assert.Equal(UIntPtr.Zero, _fnNarrowUtf16ToAscii.Delegate(&c, &b, UIntPtr.Zero)); - } - - [Fact] - public static void NarrowUtf16ToAscii_AllAsciiInput() - { - using BoundedMemory utf16Mem = BoundedMemory.Allocate(128); - using BoundedMemory asciiMem = BoundedMemory.Allocate(128); - - // Fill source with 00 .. 7F. - - Span utf16Span = utf16Mem.Span; - for (int i = 0; i < utf16Span.Length; i++) - { - utf16Span[i] = (char)i; - } - utf16Mem.MakeReadonly(); - - // We'll write to the ASCII span. - // We test with a variety of span lengths to test alignment and fallthrough code paths. - - Span asciiSpan = asciiMem.Span; - - for (int i = 0; i < utf16Span.Length; i++) - { - asciiSpan.Clear(); // remove any data from previous iteration - - // First, validate that the workhorse saw the incoming data as all-ASCII. - - Assert.Equal(128 - i, CallNarrowUtf16ToAscii(utf16Span.Slice(i), asciiSpan.Slice(i))); - - // Then, validate that the data was transcoded properly. - - for (int j = i; j < 128; j++) - { - Assert.Equal((ushort)utf16Span[i], (ushort)asciiSpan[i]); - } - } - } - - [Fact] - public static void NarrowUtf16ToAscii_SomeNonAsciiInput() - { - using BoundedMemory utf16Mem = BoundedMemory.Allocate(128); - using BoundedMemory asciiMem = BoundedMemory.Allocate(128); - - // Fill source with 00 .. 7F. - - Span utf16Span = utf16Mem.Span; - for (int i = 0; i < utf16Span.Length; i++) - { - utf16Span[i] = (char)i; - } - - // We'll write to the ASCII span. - - Span asciiSpan = asciiMem.Span; - - for (int i = utf16Span.Length - 1; i >= 0; i--) - { - RandomNumberGenerator.Fill(asciiSpan); // fill with garbage - - // First, keep track of the garbage we wrote to the destination. - // We want to ensure it wasn't overwritten. - - byte[] expectedTrailingData = asciiSpan.Slice(i).ToArray(); - - // Then, set the desired byte as non-ASCII, then check that the workhorse - // correctly saw the data as non-ASCII. - - utf16Span[i] = '\u0123'; // use U+0123 instead of U+0080 since it catches inappropriate pmovmskb usage - Assert.Equal(i, CallNarrowUtf16ToAscii(utf16Span, asciiSpan)); - - // Next, validate that the ASCII data was transcoded properly. - - for (int j = 0; j < i; j++) - { - Assert.Equal((ushort)utf16Span[j], (ushort)asciiSpan[j]); - } - - // Finally, validate that the trailing data wasn't overwritten with non-ASCII data. - - Assert.Equal(expectedTrailingData, asciiSpan.Slice(i).ToArray()); - } - } - - private static int CallGetIndexOfFirstNonAsciiByte(ReadOnlySpan buffer) - { - fixed (byte* pBuffer = &MemoryMarshal.GetReference(buffer)) - { - // Conversions between UIntPtr <-> int are not checked by default. - return checked((int)_fnGetIndexOfFirstNonAsciiByte.Delegate(pBuffer, (UIntPtr)buffer.Length)); - } - } - - private static int CallGetIndexOfFirstNonAsciiChar(ReadOnlySpan buffer) - { - fixed (char* pBuffer = &MemoryMarshal.GetReference(buffer)) - { - // Conversions between UIntPtr <-> int are not checked by default. - return checked((int)_fnGetIndexOfFirstNonAsciiChar.Delegate(pBuffer, (UIntPtr)buffer.Length)); - } - } - - private static int CallNarrowUtf16ToAscii(ReadOnlySpan utf16, Span ascii) - { - Assert.Equal(utf16.Length, ascii.Length); - - fixed (char* pUtf16 = &MemoryMarshal.GetReference(utf16)) - fixed (byte* pAscii = &MemoryMarshal.GetReference(ascii)) - { - // Conversions between UIntPtr <-> int are not checked by default. - return checked((int)_fnNarrowUtf16ToAscii.Delegate(pUtf16, pAscii, (UIntPtr)utf16.Length)); - } - } - - private static int CallWidenAsciiToUtf16(ReadOnlySpan ascii, Span utf16) - { - Assert.Equal(ascii.Length, utf16.Length); - - fixed (byte* pAscii = &MemoryMarshal.GetReference(ascii)) - fixed (char* pUtf16 = &MemoryMarshal.GetReference(utf16)) - { - // Conversions between UIntPtr <-> int are not checked by default. - return checked((int)_fnWidenAsciiToUtf16.Delegate(pAscii, pUtf16, (UIntPtr)ascii.Length)); - } - } - - [return: DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicMethods | DynamicallyAccessedMemberTypes.NonPublicMethods)] - private static Type GetAsciiUtilityType() - { - return Type.GetType("System.Text.ASCIIUtility, System.Private.CoreLib"); - } - - private sealed class UnsafeLazyDelegate where TDelegate : Delegate - { - private readonly Lazy _lazyDelegate; - - public UnsafeLazyDelegate(string methodName) - { - _lazyDelegate = new Lazy(() => - { - Assert.True(typeof(TDelegate).IsSubclassOf(typeof(MulticastDelegate))); - - // Get the MethodInfo for the target method - - MethodInfo methodInfo = GetAsciiUtilityType().GetMethod(methodName, BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static); - Assert.NotNull(methodInfo); - - // Construct the TDelegate pointing to this method - - return methodInfo.CreateDelegate(); - }); - } - - public TDelegate Delegate => _lazyDelegate.Value; - } - } -} From 61066590fec7809c1168d88396839d69fd91a52c Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Mon, 12 Sep 2022 12:28:13 +0200 Subject: [PATCH 41/46] fix byte->char casting --- .../System.Memory/tests/Ascii/IndexOfTests.cs | 1 - .../src/System/Buffers/Text/Ascii.Searching.cs | 16 ++++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs b/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs index b68caff2c098f8..473b5d080d9566 100644 --- a/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs +++ b/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs @@ -8,7 +8,6 @@ namespace System.Buffers.Text.Tests { - [ActiveIssue("https://github.com/dotnet/runtime/issues/75125", TestRuntimes.Mono)] public static class IndexOfTests { [Fact] diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs index f4289b77ab5703..4edc21679a75cf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs @@ -134,13 +134,13 @@ private static int IndexOfIgnoreCase(ReadOnlySpan text, Re } TValue firstValue = value[0]; - if (!UnicodeUtility.IsAsciiCodePoint(uint.CreateTruncating(firstValue))) + char firstChar = (char)ushort.CreateTruncating(firstValue); + if (!IsAscii(firstChar)) { ThrowNonAsciiFound(); } - TText valueHead = Unsafe.As(ref firstValue); - char differentCase = GetDifferentCaseOrSame(Unsafe.As(ref firstValue)); - TText valueHeadDifferentCase = Unsafe.As(ref differentCase); + TText valueHead = TText.CreateTruncating(firstValue); + TText valueHeadDifferentCase = TText.CreateTruncating((ushort)GetDifferentCaseOrSame(firstChar)); int valueTailLength = value.Length - 1; if (valueTailLength == 0) @@ -190,13 +190,13 @@ private static int LastIndexOfIgnoreCase(ReadOnlySpan text } TValue firstValue = value[0]; - if (!UnicodeUtility.IsAsciiCodePoint(uint.CreateTruncating(firstValue))) + char firstChar = (char)ushort.CreateTruncating(firstValue); + if (!IsAscii(firstChar)) { ThrowNonAsciiFound(); } - TText valueHead = Unsafe.As(ref firstValue); - char differentCase = GetDifferentCaseOrSame(Unsafe.As(ref firstValue)); - TText valueHeadDifferentCase = Unsafe.As(ref differentCase); + TText valueHead = TText.CreateTruncating(firstValue); + TText valueHeadDifferentCase = TText.CreateTruncating((ushort)GetDifferentCaseOrSame(firstChar)); int valueTailLength = value.Length - 1; if (valueTailLength == 0) From 0d69abd9cfdc0c9988f81743301e064f237b97a5 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Wed, 7 Dec 2022 14:23:14 +0100 Subject: [PATCH 42/46] adjust code after recent API Review --- .../Runtime/InteropServices/PInvokeMarshal.cs | 8 +- .../Common/src/System/CharArrayHelpers.cs | 4 - .../src/System/Net/CaseInsensitiveAscii.cs | 29 +- .../System.Memory/tests/Ascii/EqualsTests.cs | 106 ----- .../tests/Ascii/GetHashCodeByteTests.cs | 74 ---- .../tests/Ascii/GetHashCodeCharTests.cs | 58 --- .../System.Memory/tests/Ascii/IndexOfTests.cs | 158 ------- .../tests/Ascii/StartsEndsWithTests.cs | 188 -------- .../tests/System.Memory.Tests.csproj | 11 - .../src/System.Net.Http.csproj | 1 + .../src/System/Net/Http/ByteArrayHelpers.cs | 62 +++ .../Headers/ContentDispositionHeaderValue.cs | 3 +- .../Net/Http/Headers/HeaderDescriptor.cs | 5 +- .../System/Net/Http/Headers/KnownHeaders.cs | 3 +- .../AuthenticationHelper.Digest.cs | 3 +- .../Http/SocketsHttpHandler/HttpConnection.cs | 6 +- .../SocketsHttpHandler/HttpConnectionBase.cs | 3 +- .../System.Net.Http.Unit.Tests.csproj | 2 + .../src/System/Net/HttpListener.cs | 3 +- .../System/Net/Mail/DomainLiteralReader.cs | 4 +- .../src/System/Net/Mail/DotAtomReader.cs | 4 +- .../src/System/Net/Mail/MailBnfHelper.cs | 9 +- .../src/System/Net/Mail/QuotedPairReader.cs | 4 +- .../Net/Mail/QuotedStringFormatReader.cs | 4 +- .../src/System/Net/Mail/SmtpClient.cs | 3 +- .../src/System/Net/Mail/WhitespaceReader.cs | 4 +- .../src/System/Net/Mime/MimeBasePart.cs | 3 +- .../src/System/Net/WebClient.cs | 2 +- .../System.Private.CoreLib.Shared.projitems | 14 +- .../System/Buffers/Text/Ascii.Comparison.cs | 416 ------------------ .../System/Buffers/Text/Ascii.Searching.cs | 278 ------------ .../src/System/Globalization/TextInfo.cs | 4 +- .../src/System/String.cs | 4 +- .../src/System/Text/ASCIIEncoding.cs | 18 +- .../Text/Ascii.CaseConversion.cs | 61 ++- .../{Buffers => }/Text/Ascii.Transcoding.cs | 16 +- .../{Buffers => }/Text/Ascii.Trimming.cs | 3 +- .../Text/Ascii.Utility.Helpers.cs | 2 +- .../{Buffers => }/Text/Ascii.Utility.cs | 2 +- .../src/System/{Buffers => }/Text/Ascii.cs | 14 +- .../Text/Unicode/Utf8Utility.Transcoding.cs | 4 +- .../src/System/DomainNameHelper.cs | 3 +- .../src/System/UriHelper.cs | 5 +- .../Internal/Utilities/MemoryBlock.cs | 4 - .../System.Runtime/ref/System.Runtime.cs | 92 ++-- .../ASCIIEncoding/ASCIIEncodingDecode.cs | 12 +- .../ASCIIEncoding/ASCIIEncodingEncode.cs | 12 +- .../tests/Ascii/CaseConversionTests.cs | 161 +++---- .../tests/Ascii/FromUtf16Tests.cs | 18 +- .../tests/Ascii/IsValidByteTests.cs} | 30 +- .../tests/Ascii/IsValidCharTests.cs} | 30 +- .../tests/Ascii/ToUtf16Tests.cs | 16 +- .../tests/Ascii/TrimTests.cs | 3 +- .../tests/System.Text.Encoding.Tests.csproj | 6 + .../Text/RegularExpressions/RegexCharClass.cs | 2 +- 55 files changed, 351 insertions(+), 1643 deletions(-) delete mode 100644 src/libraries/System.Memory/tests/Ascii/EqualsTests.cs delete mode 100644 src/libraries/System.Memory/tests/Ascii/GetHashCodeByteTests.cs delete mode 100644 src/libraries/System.Memory/tests/Ascii/GetHashCodeCharTests.cs delete mode 100644 src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs delete mode 100644 src/libraries/System.Memory/tests/Ascii/StartsEndsWithTests.cs create mode 100644 src/libraries/System.Net.Http/src/System/Net/Http/ByteArrayHelpers.cs delete mode 100644 src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs delete mode 100644 src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs rename src/libraries/System.Private.CoreLib/src/System/{Buffers => }/Text/Ascii.CaseConversion.cs (90%) rename src/libraries/System.Private.CoreLib/src/System/{Buffers => }/Text/Ascii.Transcoding.cs (84%) rename src/libraries/System.Private.CoreLib/src/System/{Buffers => }/Text/Ascii.Trimming.cs (97%) rename src/libraries/System.Private.CoreLib/src/System/{Buffers => }/Text/Ascii.Utility.Helpers.cs (99%) rename src/libraries/System.Private.CoreLib/src/System/{Buffers => }/Text/Ascii.Utility.cs (99%) rename src/libraries/System.Private.CoreLib/src/System/{Buffers => }/Text/Ascii.cs (87%) rename src/libraries/{System.Memory => System.Text.Encoding}/tests/Ascii/CaseConversionTests.cs (65%) rename src/libraries/{System.Memory => System.Text.Encoding}/tests/Ascii/FromUtf16Tests.cs (84%) rename src/libraries/{System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs => System.Text.Encoding/tests/Ascii/IsValidByteTests.cs} (81%) rename src/libraries/{System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs => System.Text.Encoding/tests/Ascii/IsValidCharTests.cs} (83%) rename src/libraries/{System.Memory => System.Text.Encoding}/tests/Ascii/ToUtf16Tests.cs (86%) rename src/libraries/{System.Memory => System.Text.Encoding}/tests/Ascii/TrimTests.cs (98%) diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs index 6692249ea5ade8..9aac0343e64650 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/PInvokeMarshal.cs @@ -10,7 +10,7 @@ using Internal.Runtime.Augments; using Internal.Runtime.CompilerHelpers; using Internal.Runtime.CompilerServices; -using System.Buffers.Text; +using System.Text; using System.Buffers; namespace System.Runtime.InteropServices @@ -498,7 +498,7 @@ public static unsafe char AnsiCharToWideChar(byte nativeValue) internal static unsafe byte* StringToAnsiString(char* pManaged, int lenUnicode, byte* pNative, bool terminateWithNull, bool bestFit, bool throwOnUnmappableChar) { - bool allAscii = Ascii.IsAscii(new ReadOnlySpan(pManaged, lenUnicode)); + bool allAscii = Ascii.IsValid(new ReadOnlySpan(pManaged, lenUnicode)); int length; if (allAscii) // If all ASCII, map one UNICODE character to one ANSI char @@ -516,7 +516,7 @@ public static unsafe char AnsiCharToWideChar(byte nativeValue) } if (allAscii) // ASCII conversion { - OperationStatus conversionStatus = Ascii.FromUtf16(new ReadOnlySpan(pManaged, length), new Span(pNative, length), out _, out _); + OperationStatus conversionStatus = Ascii.FromUtf16(new ReadOnlySpan(pManaged, length), new Span(pNative, length), out _); Debug.Assert(conversionStatus == OperationStatus.Done); } else // Let OS convert @@ -545,7 +545,7 @@ private static unsafe bool CalculateStringLength(byte* pchBuffer, out int ansiBu { ReadOnlySpan span = MemoryMarshal.CreateReadOnlySpanFromNullTerminated(pchBuffer); ansiBufferLen = span.Length; - bool allAscii = Ascii.IsAscii(span); + bool allAscii = Ascii.IsValid(span); if (allAscii) { diff --git a/src/libraries/Common/src/System/CharArrayHelpers.cs b/src/libraries/Common/src/System/CharArrayHelpers.cs index 73abc7f0984dc7..95dad91071a300 100644 --- a/src/libraries/Common/src/System/CharArrayHelpers.cs +++ b/src/libraries/Common/src/System/CharArrayHelpers.cs @@ -13,9 +13,6 @@ internal static bool EqualsOrdinalAsciiIgnoreCase(string left, char[] right, int Debug.Assert(left != null, "Expected non-null string"); DebugAssertArrayInputs(right, rightStartIndex, rightLength); -#if NET7_0_OR_GREATER - return Buffers.Text.Ascii.EndsWithIgnoreCase(left, right.AsSpan(rightStartIndex, rightLength)); -#else // used by System.Net.Http.WinHttpHandler which targets older TFMs if (left.Length != rightLength) { return false; @@ -40,7 +37,6 @@ internal static bool EqualsOrdinalAsciiIgnoreCase(string left, char[] right, int } return true; -#endif } internal static void Trim(char[] array, ref int startIndex, ref int length) diff --git a/src/libraries/Common/src/System/Net/CaseInsensitiveAscii.cs b/src/libraries/Common/src/System/Net/CaseInsensitiveAscii.cs index 29bdfadfbaea5f..4bfdf23ca7cd84 100644 --- a/src/libraries/Common/src/System/Net/CaseInsensitiveAscii.cs +++ b/src/libraries/Common/src/System/Net/CaseInsensitiveAscii.cs @@ -1,7 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Buffers.Text; using System.Collections; namespace System.Net @@ -85,6 +84,17 @@ public int Compare(object? firstObject, object? secondObject) return result; } + // ASCII string case insensitive hash function + private static int FastGetHashCode(string myString) + { + int myHashCode = myString.Length; + if (myHashCode != 0) + { + myHashCode ^= AsciiToLower[(byte)myString[0]] << 24 ^ AsciiToLower[(byte)myString[myHashCode - 1]] << 16; + } + return myHashCode; + } + // ASCII string case insensitive comparer public new bool Equals(object? firstObject, object? secondObject) { @@ -96,7 +106,22 @@ public int Compare(object? firstObject, object? secondObject) } if (secondString != null) { - return Ascii.EqualsIgnoreCase(firstString, secondString); + int index = firstString.Length; + if (index == secondString.Length) + { + if (FastGetHashCode(firstString) == FastGetHashCode(secondString)) + { + while (index > 0) + { + index--; + if (AsciiToLower[firstString[index]] != AsciiToLower[secondString[index]]) + { + return false; + } + } + return true; + } + } } return false; } diff --git a/src/libraries/System.Memory/tests/Ascii/EqualsTests.cs b/src/libraries/System.Memory/tests/Ascii/EqualsTests.cs deleted file mode 100644 index 056d932f222ea2..00000000000000 --- a/src/libraries/System.Memory/tests/Ascii/EqualsTests.cs +++ /dev/null @@ -1,106 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Collections.Generic; -using System.Linq; -using System.Text; -using Xunit; - -namespace System.Buffers.Text.Tests -{ - public static class EqualsTests - { - [Fact] - public static void InvalidCharacters_DoesNotThrow() - { - Assert.False(Ascii.Equals(Enumerable.Repeat((byte)128, "valid".Length).ToArray(), "valid")); - Assert.False(Ascii.Equals("valid"u8, "aa\u00C0aa")); - - Assert.False(Ascii.EqualsIgnoreCase(new byte[] { 127 }, new byte[] { 128 })); - Assert.True(Ascii.EqualsIgnoreCase(new byte[] { 128 }, new byte[] { 128 })); - Assert.False(Ascii.EqualsIgnoreCase(new byte[] { 128 }, new byte[] { 127 })); - - Assert.False(Ascii.EqualsIgnoreCase(Enumerable.Repeat((byte)128, "valid".Length).ToArray(), "valid")); - Assert.False(Ascii.EqualsIgnoreCase("valid"u8, "aa\u00C0aa")); - } - - public static IEnumerable ExactMatch_TestData - { - get - { - yield return new object[] { "test", "test" }; - - for (char textLength = (char)0; textLength <= 127; textLength++) - { - yield return new object[] { new string(textLength, textLength), new string(textLength, textLength) }; - } - } - } - - [Theory] - [MemberData(nameof(ExactMatch_TestData))] - public static void ExactMatchFound(string left, string right) - { - Assert.True(Ascii.Equals(Encoding.ASCII.GetBytes(left), right)); - - Assert.True(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), Encoding.ASCII.GetBytes(right))); - Assert.True(Ascii.EqualsIgnoreCase(left, right)); - Assert.True(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), right)); - } - - public static IEnumerable ExactMatchNotFound_TestData - { - get - { - yield return new object[] { "tak", "nie" }; - - for (char i = (char)1; i <= 127; i++) - { - if (i != '?') // ASCIIEncoding maps invalid ASCII to ? - { - yield return new object[] { new string(i, i), string.Create(i, i, (destination, iteration) => - { - destination.Fill((char)iteration); - destination[iteration / 2] = (char)128; - })}; - } - } - } - } - - [Theory] - [MemberData(nameof(ExactMatchNotFound_TestData))] - public static void ExactMatchNotFound(string left, string right) - { - Assert.False(Ascii.Equals(Encoding.ASCII.GetBytes(left), right)); - - Assert.False(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), Encoding.ASCII.GetBytes(right))); - Assert.False(Ascii.EqualsIgnoreCase(left, right)); - Assert.False(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), right)); - } - - public static IEnumerable IgnoreCaseMatch_TestData - { - get - { - yield return new object[] { "aBc", "AbC" }; - - for (char i = (char)0; i <= 127; i++) - { - char left = i; - char right = char.IsAsciiLetterUpper(left) ? char.ToLower(left) : char.IsAsciiLetterLower(left) ? char.ToUpper(left) : left; - yield return new object[] { new string(left, i), new string(right, i) }; - } - } - } - - [Theory] - [MemberData(nameof(IgnoreCaseMatch_TestData))] - public static void IgnoreCaseMatchFound(string left, string right) - { - Assert.True(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), Encoding.ASCII.GetBytes(right))); - Assert.True(Ascii.EqualsIgnoreCase(left, right)); - Assert.True(Ascii.EqualsIgnoreCase(Encoding.ASCII.GetBytes(left), right)); - } - } -} diff --git a/src/libraries/System.Memory/tests/Ascii/GetHashCodeByteTests.cs b/src/libraries/System.Memory/tests/Ascii/GetHashCodeByteTests.cs deleted file mode 100644 index 40101f539edf26..00000000000000 --- a/src/libraries/System.Memory/tests/Ascii/GetHashCodeByteTests.cs +++ /dev/null @@ -1,74 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Collections.Generic; -using System.Linq; -using System.Runtime.InteropServices; -using System.Text; -using Xunit; - -namespace System.Buffers.Text.Tests -{ - public static class GetHashCodeByteTests - { - [Theory] - [InlineData(new byte[] { 128 })] - [InlineData(new byte[] { 91, 91, 128, 91 })] // >= 4 chars can execute a different code path - public static void InvalidCharactersInValueThrowsOrReturnsFalse(byte[] value) - { - Assert.Throws(() => Ascii.GetHashCode(value)); - Assert.Throws(() => Ascii.GetHashCodeIgnoreCase(value)); - - Assert.False(Ascii.TryGetHashCode(value, out int hashCode)); - Assert.Equal(default(int), hashCode); - Assert.False(Ascii.TryGetHashCodeIgnoreCase(value, out hashCode)); - Assert.Equal(default(int), hashCode); - } - - public static IEnumerable ValidInputValidOutput_TestData - { - get - { - yield return new object[] { "test" }; - yield return new object[] { "tESt" }; - yield return new object[] { "!@#$%^&*()" }; - yield return new object[] { "0123456789" }; - yield return new object[] { " \t\r\n" }; - yield return new object[] { new string(Enumerable.Range(0, 127).Select(i => (char)i).ToArray()) }; - } - } - - [Theory] - [InlineData(nameof(ValidInputValidOutput_TestData))] - public static void ValidInputValidOutput(string input) - { - // The contract makes it clear that hash code is randomized and is not guaranteed to match string.GetHashCode. - // But.. re-using same types used internally by string.GetHashCode was the simplest way to get good hashing implementaiton. - // So this test verifies implementation detail. - - // string.GetHashcode treats string as buffer of bytes - // this is why this test casts ROS to ROS, rather than doing actual encoding conversion (this would narrow the bytes) - ReadOnlySpan bytes = MemoryMarshal.AsBytes(input.AsSpan()); - - int expectedHashCode = input.GetHashCode(); - Assert.Equal(expectedHashCode, Ascii.GetHashCode(bytes)); - Assert.True(Ascii.TryGetHashCode(input, out int actualHashCode)); - Assert.Equal(expectedHashCode, actualHashCode); - - // Ascii.*GetHashCodeIgnoreCase(bytes) processes four ASCII bytes at a time - // rather than two ascii chars as string.GetHashCode(StringComparison.OrdinalIgnoreCase) does. - // This is why they might produce different outputs and their results are not checked for equality. - - bytes = Encoding.ASCII.GetBytes(input); - expectedHashCode = Ascii.GetHashCodeIgnoreCase(bytes); - - // just verify that the output is the same for multiple invocations - for (int i = 0; i < 10; i++) - { - Assert.Equal(expectedHashCode, Ascii.GetHashCodeIgnoreCase(bytes)); - Assert.True(Ascii.TryGetHashCodeIgnoreCase(bytes, out actualHashCode)); - Assert.Equal(expectedHashCode, actualHashCode); - } - } - } -} diff --git a/src/libraries/System.Memory/tests/Ascii/GetHashCodeCharTests.cs b/src/libraries/System.Memory/tests/Ascii/GetHashCodeCharTests.cs deleted file mode 100644 index c2d7926a8380a0..00000000000000 --- a/src/libraries/System.Memory/tests/Ascii/GetHashCodeCharTests.cs +++ /dev/null @@ -1,58 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Collections.Generic; -using System.Linq; -using Xunit; - -namespace System.Buffers.Text.Tests -{ - public static class GetHashCodeCharTests - { - [Theory] - [InlineData("\u00C0")] - [InlineData("aaa\u00C0bbb")] - public static void InvalidCharactersInValueThrowsOrReturnsFalse(string value) - { - Assert.Throws(() => Ascii.GetHashCode(value)); - Assert.Throws(() => Ascii.GetHashCodeIgnoreCase(value)); - - Assert.False(Ascii.TryGetHashCode(value, out int hashCode)); - Assert.Equal(default(int), hashCode); - Assert.False(Ascii.TryGetHashCodeIgnoreCase(value, out hashCode)); - Assert.Equal(default(int), hashCode); - } - - public static IEnumerable ValidInputValidOutput_TestData - { - get - { - yield return new object[] { "test" }; - yield return new object[] { "tESt" }; - yield return new object[] { "!@#$%^&*()" }; - yield return new object[] { "0123456789" }; - yield return new object[] { " \t\r\n" }; - yield return new object[] { new string(Enumerable.Range(0, 127).Select(i => (char)i).ToArray()) }; - } - } - - [Theory] - [InlineData(nameof(ValidInputValidOutput_TestData))] - public static void ValidInputValidOutput(string input) - { - // The contract makes it clear that hash code is randomized and is not guaranteed to match string.GetHashCode. - // But.. re-using same types used internally by string.GetHashCode was the simplest way to get good hashing implementaiton. - // So this test verifies implementation detail. - - int expectedHashCode = input.GetHashCode(); - Assert.Equal(expectedHashCode, Ascii.GetHashCode(input)); - Assert.True(Ascii.TryGetHashCode(input, out int actualHashCode)); - Assert.Equal(expectedHashCode, actualHashCode); - - expectedHashCode = input.GetHashCode(StringComparison.OrdinalIgnoreCase); - Assert.Equal(expectedHashCode, Ascii.GetHashCodeIgnoreCase(input)); - Assert.True(Ascii.TryGetHashCodeIgnoreCase(input, out actualHashCode)); - Assert.Equal(expectedHashCode, actualHashCode); - } - } -} diff --git a/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs b/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs deleted file mode 100644 index 473b5d080d9566..00000000000000 --- a/src/libraries/System.Memory/tests/Ascii/IndexOfTests.cs +++ /dev/null @@ -1,158 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Collections.Generic; -using System.Linq; -using System.Text; -using Xunit; - -namespace System.Buffers.Text.Tests -{ - public static class IndexOfTests - { - [Fact] - public static void InvalidCharactersInValueThrows() - { - Assert.Throws(() => Ascii.IndexOf("aaaa"u8, "\u00C0")); - Assert.Throws(() => Ascii.IndexOf("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"u8, "aaaaaaaaaaaaa\u00C0")); - Assert.Throws(() => Ascii.IndexOf("aaaa", new byte[] { 128 })); - Assert.Throws(() => Ascii.IndexOf(new string('a', 50), Enumerable.Repeat((byte)'a', 20).Concat(new byte[] { 128 }).ToArray())); - - Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa"u8, new byte[] { 128 })); - Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa"u8, new byte[] { (byte)'a', 128 })); - Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa", "\u00C0")); - Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa", "a\u00C0")); - Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa"u8, "\u00C0")); - Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa"u8, "a\u00C0")); - Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa", new byte[] { 128 })); - Assert.Throws(() => Ascii.IndexOfIgnoreCase("aaaa", new byte[] { (byte)'a', 128 })); - - Assert.Throws(() => Ascii.LastIndexOf("aaaa"u8, "\u00C0")); - Assert.Throws(() => Ascii.LastIndexOf("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"u8, "aaaaaaaaaaaaa\u00C0")); - Assert.Throws(() => Ascii.LastIndexOf("aaaa", new byte[] { 128 })); - Assert.Throws(() => Ascii.LastIndexOf(new string('a', 50), Enumerable.Repeat((byte)'a', 20).Concat(new byte[] { 128 }).ToArray())); - - Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa"u8, new byte[] { 128 })); - Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa"u8, new byte[] { (byte)'a', 128 })); - Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa", "\u00C0")); - Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa", "a\u00C0")); - Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa"u8, "\u00C0")); - Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa"u8, "a\u00C0")); - Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa", new byte[] { 128 })); - Assert.Throws(() => Ascii.LastIndexOfIgnoreCase("aaaa", new byte[] { (byte)'a', 128 })); - } - - public static IEnumerable ExactMatchFound_TestData - { - get - { - yield return new object[] { "test", "", 0, 4 }; - yield return new object[] { "test", "test", 0, 0 }; - yield return new object[] { "abcdefghijk", "cde", 2, 2 }; - yield return new object[] { "abcdabcdabcd" , "abcd", 0, 8 }; - yield return new object[] { "test0test1test2test3test4test5test6", "test3test4test5test6", 15, 15 }; - yield return new object[] { "This is not a very complex test case", "complex test", 19, 19 }; - } - } - - [Theory] - [MemberData(nameof(ExactMatchFound_TestData))] - public static void ExactMatchFound(string text, string value, int expectedFirstIndex, int expectedLastIndex) - { - Assert.Equal(expectedFirstIndex, Ascii.IndexOf(text, Encoding.ASCII.GetBytes(value))); - Assert.Equal(expectedFirstIndex, Ascii.IndexOf(Encoding.ASCII.GetBytes(text), value)); - - Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); - Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(text, value)); - Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), value)); - Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(text, Encoding.ASCII.GetBytes(value))); - - Assert.Equal(expectedLastIndex, Ascii.LastIndexOf(text, Encoding.ASCII.GetBytes(value))); - Assert.Equal(expectedLastIndex, Ascii.LastIndexOf(Encoding.ASCII.GetBytes(text), value)); - - Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); - Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(text, value)); - Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), value)); - Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(text, Encoding.ASCII.GetBytes(value))); - } - - public static IEnumerable ExactMatchNotFound_TestData - { - get - { - yield return new object[] { "test", "TEST" }; - yield return new object[] { "abcdefghijk", "xyz" }; - yield return new object[] { "abcdabcdabcd", "abcD" }; - yield return new object[] { "test0test1test2test3test4test5test6", "test8" }; - yield return new object[] { "This is not a very complex test case", "benchmark" }; - } - } - - [Theory] - [MemberData(nameof(ExactMatchNotFound_TestData))] - public static void ExactMatchNotFound(string text, string value) - { - Assert.Equal(-1, Ascii.IndexOf(text, Encoding.ASCII.GetBytes(value))); - Assert.Equal(-1, Ascii.IndexOf(Encoding.ASCII.GetBytes(text), value)); - - Assert.Equal(-1, Ascii.LastIndexOf(text, Encoding.ASCII.GetBytes(value))); - Assert.Equal(-1, Ascii.LastIndexOf(Encoding.ASCII.GetBytes(text), value)); - } - - public static IEnumerable IgnoreCaseMatchFound_TestData - { - get - { - yield return new object[] { "test", "", 0, 4 }; - yield return new object[] { "tESt", "TesT", 0, 0 }; - yield return new object[] { "abcdefghijk", "CdE", 2, 2 }; - yield return new object[] { "abcdabcdabcd", "ABcD", 0, 8 }; - yield return new object[] { "test0test1test2test3test4test5test6", "TeSt3tEst4TeSt5tEsT6", 15, 15 }; - yield return new object[] { "This is not a VERY COMPLEX test case", "COMplex tEst", 19, 19 }; - } - } - - [Theory] - [MemberData(nameof(IgnoreCaseMatchFound_TestData))] - public static void IgnoreCaseMatchFound(string text, string value, int expectedFirstIndex, int expectedLastIndex) - { - Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); - Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(text, value)); - Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), value)); - Assert.Equal(expectedFirstIndex, Ascii.IndexOfIgnoreCase(text, Encoding.ASCII.GetBytes(value))); - - Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); - Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(text, value)); - Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), value)); - Assert.Equal(expectedLastIndex, Ascii.LastIndexOfIgnoreCase(text, Encoding.ASCII.GetBytes(value))); - } - - public static IEnumerable IgnoreCaseMatchNotFound_TestData - { - get - { - yield return new object[] { "test", "!" }; - yield return new object[] { "tESt", "TosT" }; - yield return new object[] { "abcdefghijk", "xyz" }; - yield return new object[] { "abcdabcdabcd", "EfGh" }; - yield return new object[] { "test0test1test2test3test4test5test6", "tESt8" }; - yield return new object[] { "This is not a VERY COMPLEX test case", "SiMplE" }; - } - } - - [Theory] - [MemberData(nameof(IgnoreCaseMatchNotFound_TestData))] - public static void IgnoreCaseMatchNotFound(string text, string value) - { - Assert.Equal(-1, Ascii.IndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); - Assert.Equal(-1, Ascii.IndexOfIgnoreCase(text, value)); - Assert.Equal(-1, Ascii.IndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), value)); - Assert.Equal(-1, Ascii.IndexOfIgnoreCase(text, Encoding.ASCII.GetBytes(value))); - - Assert.Equal(-1, Ascii.LastIndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); - Assert.Equal(-1, Ascii.LastIndexOfIgnoreCase(text, value)); - Assert.Equal(-1, Ascii.LastIndexOfIgnoreCase(Encoding.ASCII.GetBytes(text), value)); - Assert.Equal(-1, Ascii.LastIndexOfIgnoreCase(text, Encoding.ASCII.GetBytes(value))); - } - } -} diff --git a/src/libraries/System.Memory/tests/Ascii/StartsEndsWithTests.cs b/src/libraries/System.Memory/tests/Ascii/StartsEndsWithTests.cs deleted file mode 100644 index 9ee12d8c13080c..00000000000000 --- a/src/libraries/System.Memory/tests/Ascii/StartsEndsWithTests.cs +++ /dev/null @@ -1,188 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Collections.Generic; -using System.Linq; -using System.Runtime.Intrinsics; -using System.Text; -using Xunit; - -namespace System.Buffers.Text.Tests -{ - public static class StartsEndsWithTests - { - [Fact] - public static void InvalidCharactersInValueThrows() - { - Assert.Throws(() => Ascii.StartsWith("aaaa"u8, "\u00C0")); // non-vectorized code path - Assert.Throws(() => Ascii.StartsWith("aaaaaaaaaaaaaaaaaaaaaaaaa"u8, "aaaaaaaaaaaaaaaaaaaaaaaa\u00C0")); // vectorized code path - Assert.Throws(() => Ascii.StartsWith("aaaa", new byte[] { 128 })); - Assert.Throws(() => Ascii.StartsWith(new string('a', 50), Enumerable.Repeat((byte)'a', 49).Concat(new byte[] { 128 }).ToArray())); - Assert.Throws(() => Ascii.StartsWithIgnoreCase("aaaa"u8, "\u00C0")); - Assert.Throws(() => Ascii.StartsWithIgnoreCase("aaaa", "\u00C0")); - Assert.Throws(() => Ascii.StartsWithIgnoreCase("aaaa"u8, new byte[] { 128 })); - Assert.Throws(() => Ascii.StartsWithIgnoreCase("aaaa", new byte[] { 128 })); - - Assert.Throws(() => Ascii.EndsWith("aaaa"u8, "\u00C0")); // non-vectorized code path - Assert.Throws(() => Ascii.EndsWith("aaaaaaaaaaaaaaaaaaaaaaaaa"u8, "aaaaaaaaaaaaaaaaaaaaaaaa\u00C0")); // vectorized code path - Assert.Throws(() => Ascii.EndsWith("aaaa", new byte[] { 128 })); - Assert.Throws(() => Ascii.EndsWith(new string('a', 50), Enumerable.Repeat((byte)'a', 49).Concat(new byte[] { 128 }).ToArray())); - Assert.Throws(() => Ascii.EndsWithIgnoreCase("aaaa"u8, "\u00C0")); - Assert.Throws(() => Ascii.EndsWithIgnoreCase("aaaa", "\u00C0")); - Assert.Throws(() => Ascii.EndsWithIgnoreCase("aaaa"u8, new byte[] { 128 })); - Assert.Throws(() => Ascii.EndsWithIgnoreCase("aaaa", new byte[] { 128 })); - } - - public static IEnumerable ExactMatchFound_TestData - { - get - { - yield return new object[] { "test", "test" }; - yield return new object[] { "test", "t" }; - yield return new object[] { "test", "" }; - - for (int textLength = 1; textLength <= Vector128.Count * 4 + 1; textLength++) - { - for (int valueLength = 0; valueLength <= textLength; valueLength++) - { - char ascii = (char)(textLength % 128); - yield return new object[] { new string(ascii, textLength), new string(ascii, valueLength) }; - } - } - } - } - - [Theory] - [MemberData(nameof(ExactMatchFound_TestData))] - public static void MatchFound(string text, string value) - { - Assert.True(Ascii.StartsWith(text, Encoding.ASCII.GetBytes(value))); - Assert.True(Ascii.StartsWith(Encoding.ASCII.GetBytes(text), value)); - Assert.True(Ascii.StartsWithIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); - Assert.True(Ascii.StartsWithIgnoreCase(text, value)); - Assert.True(Ascii.StartsWithIgnoreCase(Encoding.ASCII.GetBytes(text), value)); - Assert.True(Ascii.StartsWithIgnoreCase(text, Encoding.ASCII.GetBytes(value))); - - Assert.True(Ascii.EndsWith(text, Encoding.ASCII.GetBytes(value))); - Assert.True(Ascii.EndsWith(Encoding.ASCII.GetBytes(text), value)); - Assert.True(Ascii.EndsWithIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); - Assert.True(Ascii.EndsWithIgnoreCase(text, value)); - Assert.True(Ascii.EndsWithIgnoreCase(Encoding.ASCII.GetBytes(text), value)); - Assert.True(Ascii.EndsWithIgnoreCase(text, Encoding.ASCII.GetBytes(value))); - } - - public static IEnumerable IgnoreCaseMatchFound_TestData - { - get - { - yield return new object[] { "test", "TEST" }; - yield return new object[] { "test", "T" }; - yield return new object[] { "test", "" }; - - for (int textLength = 1; textLength <= Vector128.Count * 4 + 1; textLength++) - { - for (int valueLength = 0; valueLength <= textLength; valueLength++) - { - char t = (char)(textLength % 128); - char v = char.IsAsciiLetterUpper(t) ? char.ToLower(t) : char.IsAsciiLetterLower(t) ? char.ToUpper(t) : t; - yield return new object[] { new string(t, textLength), new string(v, valueLength) }; - } - } - } - } - - [Theory] - [MemberData(nameof(IgnoreCaseMatchFound_TestData))] - public static void IgnoreCaseMatchFound(string text, string value) - { - Assert.True(Ascii.StartsWithIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); - Assert.True(Ascii.StartsWithIgnoreCase(text, value)); - Assert.True(Ascii.StartsWithIgnoreCase(Encoding.ASCII.GetBytes(text), value)); - Assert.True(Ascii.StartsWithIgnoreCase(text, Encoding.ASCII.GetBytes(value))); - - Assert.True(Ascii.EndsWithIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); - Assert.True(Ascii.EndsWithIgnoreCase(text, value)); - Assert.True(Ascii.EndsWithIgnoreCase(Encoding.ASCII.GetBytes(text), value)); - Assert.True(Ascii.EndsWithIgnoreCase(text, Encoding.ASCII.GetBytes(value))); - } - - public static IEnumerable ExactMatchNotFound_TestData - { - get - { - yield return new object[] { "test", "tesT" }; - yield return new object[] { "test", "Test" }; - yield return new object[] { "test", "T" }; - yield return new object[] { "test", "!" }; - - for (int textLength = 1; textLength <= Vector128.Count * 4 + 1; textLength++) - { - yield return new object[] { new string('a', textLength), new string('b', 1) }; - - for (int valueLength = 1; valueLength <= textLength; valueLength++) - { - yield return new object[] { new string('a', textLength), string.Create(valueLength, valueLength / 2, (destination, index) => - { - destination.Fill('a'); - destination[index] = 'b'; - })}; - } - } - } - } - - [Theory] - [MemberData(nameof(ExactMatchNotFound_TestData))] - public static void ExactMatchNotFound(string text, string value) - { - Assert.False(Ascii.StartsWith(text, Encoding.ASCII.GetBytes(value))); - Assert.False(Ascii.StartsWith(Encoding.ASCII.GetBytes(text), value)); - - Assert.False(Ascii.EndsWith(text, Encoding.ASCII.GetBytes(value))); - Assert.False(Ascii.EndsWith(Encoding.ASCII.GetBytes(text), value)); - } - - public static IEnumerable IgnoreCaseMatchNotFound_TestData - { - get - { - yield return new object[] { "test", "tes#" }; - yield return new object[] { "test", "T2st" }; - yield return new object[] { "test", "1" }; - yield return new object[] { "test", "#" }; - - for (int textLength = 1; textLength <= Vector128.Count * 4 + 1; textLength++) - { - yield return new object[] { new string('a', textLength), new string('b', 1) }; - - for (int valueLength = 1; valueLength <= textLength; valueLength++) - { - char t = (char)(textLength % 128); - char v = (char)(t != 127 ? t + 1 : 126); - - yield return new object[] { new string(t, textLength), string.Create(valueLength, (t, v), (destination, chars) => - { - destination.Fill(chars.t); - destination[destination.Length / 2] = chars.v; - })}; - } - } - } - } - - [Theory] - [MemberData(nameof(IgnoreCaseMatchNotFound_TestData))] - public static void IgnoreCaseMatchNotFound(string text, string value) - { - Assert.False(Ascii.StartsWithIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); - Assert.False(Ascii.StartsWithIgnoreCase(text, value)); - Assert.False(Ascii.StartsWithIgnoreCase(Encoding.ASCII.GetBytes(text), value)); - Assert.False(Ascii.StartsWithIgnoreCase(text, Encoding.ASCII.GetBytes(value))); - - Assert.False(Ascii.EndsWithIgnoreCase(Encoding.ASCII.GetBytes(text), Encoding.ASCII.GetBytes(value))); - Assert.False(Ascii.EndsWithIgnoreCase(text, value)); - Assert.False(Ascii.EndsWithIgnoreCase(Encoding.ASCII.GetBytes(text), value)); - Assert.False(Ascii.EndsWithIgnoreCase(text, Encoding.ASCII.GetBytes(value))); - } - } -} diff --git a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj index 50c6c67aa82102..5e0b857be6a71c 100644 --- a/src/libraries/System.Memory/tests/System.Memory.Tests.csproj +++ b/src/libraries/System.Memory/tests/System.Memory.Tests.csproj @@ -13,17 +13,6 @@ - - - - - - - - - - - diff --git a/src/libraries/System.Net.Http/src/System.Net.Http.csproj b/src/libraries/System.Net.Http/src/System.Net.Http.csproj index f61dabff43412a..fadb5f1bd51ec4 100644 --- a/src/libraries/System.Net.Http/src/System.Net.Http.csproj +++ b/src/libraries/System.Net.Http/src/System.Net.Http.csproj @@ -29,6 +29,7 @@ + diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/ByteArrayHelpers.cs b/src/libraries/System.Net.Http/src/System/Net/Http/ByteArrayHelpers.cs new file mode 100644 index 00000000000000..d6299477cf8d71 --- /dev/null +++ b/src/libraries/System.Net.Http/src/System/Net/Http/ByteArrayHelpers.cs @@ -0,0 +1,62 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; + +namespace System +{ + internal static class ByteArrayHelpers + { + // TODO: https://github.com/dotnet/runtime/issues/28230 + // Use Ascii.Equals* when it's available. + + internal static bool EqualsOrdinalAsciiIgnoreCase(string left, ReadOnlySpan right) + { + Debug.Assert(left != null, "Expected non-null string"); + + if (left.Length != right.Length) + { + return false; + } + + for (int i = 0; i < left.Length; i++) + { + uint charA = left[i]; + uint charB = right[i]; + + // We're only interested in ASCII characters here. + if ((charA - 'a') <= ('z' - 'a')) + charA -= ('a' - 'A'); + if ((charB - 'a') <= ('z' - 'a')) + charB -= ('a' - 'A'); + + if (charA != charB) + { + return false; + } + } + + return true; + } + + internal static bool EqualsOrdinalAscii(string left, ReadOnlySpan right) + { + Debug.Assert(left != null, "Expected non-null string"); + + if (left.Length != right.Length) + { + return false; + } + + for (int i = 0; i < left.Length; i++) + { + if (left[i] != right[i]) + { + return false; + } + } + + return true; + } + } +} diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/ContentDispositionHeaderValue.cs b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/ContentDispositionHeaderValue.cs index 85860b8c693829..0a92e06df45335 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/ContentDispositionHeaderValue.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/ContentDispositionHeaderValue.cs @@ -1,7 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Buffers.Text; using System.Collections.Generic; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; @@ -423,7 +422,7 @@ private static string EncodeAndQuoteMime(string input) throw new ArgumentException(SR.Format(CultureInfo.InvariantCulture, SR.net_http_headers_invalid_value, input)); } - else if (!Ascii.IsAscii(result)) + else if (!Ascii.IsValid(result)) { needsQuotes = true; // Encoded data must always be quoted, the equals signs are invalid in tokens. result = EncodeMime(result); // =?utf-8?B?asdfasdfaesdf?= diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderDescriptor.cs b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderDescriptor.cs index 74a0e56b03511f..d04c9f4877e48e 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderDescriptor.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/HeaderDescriptor.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers; -using System.Buffers.Text; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Text; @@ -144,7 +143,7 @@ public string GetHeaderValue(ReadOnlySpan headerValue, Encoding? valueEnco { for (int i = 0; i < knownValues.Length; i++) { - if (Ascii.Equals(headerValue, knownValues[i])) + if (ByteArrayHelpers.EqualsOrdinalAscii(knownValues[i], headerValue)) { return knownValues[i]; } @@ -252,7 +251,7 @@ public string GetHeaderValue(ReadOnlySpan headerValue, Encoding? valueEnco Debug.Assert(candidate is null || candidate.Length == contentTypeValue.Length); - return candidate != null && Ascii.Equals(contentTypeValue, candidate) ? + return candidate != null && ByteArrayHelpers.EqualsOrdinalAscii(candidate, contentTypeValue) ? candidate : null; } diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/KnownHeaders.cs b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/KnownHeaders.cs index f040d7d66310db..d1b624d06260ea 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/Headers/KnownHeaders.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/Headers/KnownHeaders.cs @@ -1,7 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Buffers.Text; using System.Net.Http.HPack; using System.Net.Http.QPack; using System.Runtime.InteropServices; @@ -430,7 +429,7 @@ public BytePtrAccessor(byte* p, int length) fixed (byte* p = &MemoryMarshal.GetReference(name)) { KnownHeader? candidate = GetCandidate(new BytePtrAccessor(p, name.Length)); - if (candidate != null && Ascii.EqualsIgnoreCase(name, candidate.Name)) + if (candidate != null && ByteArrayHelpers.EqualsOrdinalAsciiIgnoreCase(candidate.Name, name)) { return candidate; } diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/AuthenticationHelper.Digest.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/AuthenticationHelper.Digest.cs index 023df8bbcbb1cc..89bd8dc802770c 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/AuthenticationHelper.Digest.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/AuthenticationHelper.Digest.cs @@ -1,7 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Buffers.Text; using System.Collections.Generic; using System.Diagnostics; using System.IO; @@ -89,7 +88,7 @@ internal static partial class AuthenticationHelper } else { - if (!Ascii.IsAscii(credential.UserName)) + if (!Ascii.IsValid(credential.UserName)) { string usernameStar = HeaderUtilities.Encode5987(credential.UserName); sb.AppendKeyValue(UsernameStar, usernameStar, includeQuotes: false); diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnection.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnection.cs index 18d332abea017f..42a099db40a8bc 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnection.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnection.cs @@ -1023,7 +1023,7 @@ private static void ParseStatusLine(ReadOnlySpan line, HttpResponseMessage { ReadOnlySpan reasonBytes = line.Slice(MinStatusLineLength + 1); string? knownReasonPhrase = HttpStatusDescription.Get(response.StatusCode); - if (knownReasonPhrase != null && Ascii.Equals(reasonBytes, knownReasonPhrase)) + if (knownReasonPhrase != null && ByteArrayHelpers.EqualsOrdinalAscii(knownReasonPhrase, reasonBytes)) { response.SetReasonPhraseWithoutValidation(knownReasonPhrase); } @@ -1448,7 +1448,7 @@ private Task WriteAsciiStringAsync(string s, bool async) int offset = _writeOffset; if (s.Length <= _writeBuffer.Length - offset) { - OperationStatus operationStatus = Ascii.FromUtf16(s, _writeBuffer.AsSpan(offset), out _, out int bytesWritten); + OperationStatus operationStatus = Ascii.FromUtf16(s, _writeBuffer.AsSpan(offset), out int bytesWritten); Debug.Assert(operationStatus == OperationStatus.Done); _writeOffset = offset + bytesWritten; @@ -1462,7 +1462,7 @@ private Task WriteAsciiStringAsync(string s, bool async) private async Task WriteStringAsyncSlow(string s, bool async) { - if (!Ascii.IsAscii(s)) + if (!Ascii.IsValid(s)) { throw new HttpRequestException(SR.net_http_request_invalid_char_encoding); } diff --git a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnectionBase.cs b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnectionBase.cs index b982d1774bf1e1..45eae0fa57649f 100644 --- a/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnectionBase.cs +++ b/src/libraries/System.Net.Http/src/System/Net/Http/SocketsHttpHandler/HttpConnectionBase.cs @@ -1,7 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Buffers.Text; using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.IO; @@ -32,7 +31,7 @@ public string GetResponseHeaderValueWithCaching(HeaderDescriptor descriptor, Rea static string GetOrAddCachedValue([NotNull] ref string? cache, HeaderDescriptor descriptor, ReadOnlySpan value, Encoding? encoding) { string? lastValue = cache; - if (lastValue is null || !Ascii.Equals(value, lastValue)) + if (lastValue is null || !ByteArrayHelpers.EqualsOrdinalAscii(lastValue, value)) { cache = lastValue = descriptor.GetHeaderValue(value, encoding); } diff --git a/src/libraries/System.Net.Http/tests/UnitTests/System.Net.Http.Unit.Tests.csproj b/src/libraries/System.Net.Http/tests/UnitTests/System.Net.Http.Unit.Tests.csproj index f03f0d76d40085..85139c5391ff85 100644 --- a/src/libraries/System.Net.Http/tests/UnitTests/System.Net.Http.Unit.Tests.csproj +++ b/src/libraries/System.Net.Http/tests/UnitTests/System.Net.Http.Unit.Tests.csproj @@ -72,6 +72,8 @@ Link="ProductionCode\System\Net\Http\DelegatingHandler.cs" /> + 0) diff --git a/src/libraries/System.Net.Mail/src/System/Net/Mail/WhitespaceReader.cs b/src/libraries/System.Net.Mail/src/System/Net/Mail/WhitespaceReader.cs index 9bd4e0cb62e48c..b4f4382ccb8bdb 100644 --- a/src/libraries/System.Net.Mail/src/System/Net/Mail/WhitespaceReader.cs +++ b/src/libraries/System.Net.Mail/src/System/Net/Mail/WhitespaceReader.cs @@ -1,7 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Buffers.Text; +using System.Text; using System.Diagnostics; using System.Net.Mime; @@ -167,7 +167,7 @@ internal static bool TryReadCfwsReverse(string data, int index, out int outIndex } // Check for valid characters within comments. Allow Unicode, as we won't transmit any comments. else if (commentDepth > 0 - && (!Ascii.IsAscii(data[index]) || MailBnfHelper.Ctext[data[index]])) + && (!Ascii.IsValid(data[index]) || MailBnfHelper.Ctext[data[index]])) { index--; } diff --git a/src/libraries/System.Net.Mail/src/System/Net/Mime/MimeBasePart.cs b/src/libraries/System.Net.Mail/src/System/Net/Mime/MimeBasePart.cs index 9ed1b48341155c..b67c6c3a186a98 100644 --- a/src/libraries/System.Net.Mail/src/System/Net/Mime/MimeBasePart.cs +++ b/src/libraries/System.Net.Mail/src/System/Net/Mime/MimeBasePart.cs @@ -4,7 +4,6 @@ using System.Collections.Specialized; using System.Text; using System.Net.Mail; -using System.Buffers.Text; namespace System.Net.Mime { @@ -112,7 +111,7 @@ internal static bool IsAscii(string value, bool permitCROrLF) { ArgumentNullException.ThrowIfNull(value); - return Ascii.IsAscii(value) && (permitCROrLF || value.AsSpan().IndexOfAny('\r', '\n') < 0); + return Ascii.IsValid(value) && (permitCROrLF || value.AsSpan().IndexOfAny('\r', '\n') < 0); } internal string? ContentID diff --git a/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs b/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs index 7921fbf870e40b..de49e248ae2637 100644 --- a/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs +++ b/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs @@ -512,7 +512,7 @@ private void OpenFileInternal( boundaryBytes = new byte["\r\n--".Length + boundary.Length + "--\r\n".Length]; "\r\n--"u8.CopyTo(boundaryBytes); "--\r\n"u8.CopyTo(boundaryBytes.AsSpan("\r\n--".Length + boundary.Length)); - OperationStatus conversionStatus = Ascii.FromUtf16(boundary, boundaryBytes.AsSpan("\r\n--".Length), out _, out _); + OperationStatus conversionStatus = Ascii.FromUtf16(boundary, boundaryBytes.AsSpan("\r\n--".Length), out _); Debug.Assert(conversionStatus == OperationStatus.Done); } else diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 0c0e9839b4607f..368ad23c0bb04b 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -115,14 +115,6 @@ - - - - - - - - @@ -1023,6 +1015,12 @@ + + + + + + diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs deleted file mode 100644 index f48194d1ed1381..00000000000000 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Comparison.cs +++ /dev/null @@ -1,416 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; -using System.Diagnostics.CodeAnalysis; -using System.Globalization; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; -using System.Text; -using System.Text.Unicode; -using System.Numerics; - -#pragma warning disable SA1121 // Use built-in type alias -// used to express: from the two provided char and byte buffers, check byte buffer for non-ASCII bytes -// as it's the value ("needle") that must not contain non-ASCII characters. Used by StartsWith and EndstWith. -using CheckBytes = System.Byte; -// same as above, but for chars -using CheckChars = System.Char; -// don't check for non-ASCII (used by Equals which does not throw for non-ASCII bytes) -using SkipChecks = System.Boolean; -// used to express: check value for non-ASCII bytes/chars -using CheckValue = System.SByte; - -namespace System.Buffers.Text -{ - public static partial class Ascii - { - public static bool TryGetHashCode(ReadOnlySpan value, out int hashCode) - { - if (!IsAscii(value)) - { - hashCode = 0; - return false; - } - - ulong seed = Marvin.DefaultSeed; - hashCode = Marvin.ComputeHash32(ref MemoryMarshal.GetReference(value), (uint)value.Length, (uint)seed, (uint)(seed >> 32)); - return true; - } - - public static bool TryGetHashCode(ReadOnlySpan value, out int hashCode) - { - if (!IsAscii(value)) - { - hashCode = 0; - return false; - } - - ulong seed = Marvin.DefaultSeed; - hashCode = Marvin.ComputeHash32(ref Unsafe.As(ref MemoryMarshal.GetReference(value)), (uint)value.Length * 2, (uint)seed, (uint)(seed >> 32)); - return true; - } - - public static bool TryGetHashCodeIgnoreCase(ReadOnlySpan value, out int hashCode) - { - ulong seed = Marvin.DefaultSeed; - return Marvin.TryComputeHash32ForAsciiIgnoreCase(ref MemoryMarshal.GetReference(value), value.Length, (uint)seed, (uint)(seed >> 32), out hashCode); - } - - public static bool TryGetHashCodeIgnoreCase(ReadOnlySpan value, out int hashCode) - { - ulong seed = Marvin.DefaultSeed; - hashCode = Marvin.ComputeHash32OrdinalIgnoreCase(ref MemoryMarshal.GetReference(value), value.Length, (uint)seed, (uint)(seed >> 32), out bool nonAsciiFound, stopOnNonAscii: true); - return !nonAsciiFound; - } - - public static int GetHashCode(ReadOnlySpan value) - { - if (!TryGetHashCode(value, out int hashCode)) - { - ThrowNonAsciiFound(); - } - return hashCode; - } - - public static int GetHashCode(ReadOnlySpan value) - { - if (!TryGetHashCode(value, out int hashCode)) - { - ThrowNonAsciiFound(); - } - return hashCode; - } - - public static int GetHashCodeIgnoreCase(ReadOnlySpan value) - { - if (!TryGetHashCodeIgnoreCase(value, out int hashCode)) - { - ThrowNonAsciiFound(); - } - return hashCode; - } - - public static int GetHashCodeIgnoreCase(ReadOnlySpan value) - { - if (!TryGetHashCodeIgnoreCase(value, out int hashCode)) - { - ThrowNonAsciiFound(); - } - return hashCode; - } - - public static bool Equals(ReadOnlySpan left, ReadOnlySpan right) - => left.Length == right.Length && Equals(right, left) == EqualsResult.Match; - - public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan right) - => left.Length == right.Length && SequenceEqualIgnoreCase(left, right) == EqualsResult.Match; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan right) - => left.Length == right.Length && Ordinal.EqualsIgnoreCase(ref MemoryMarshal.GetReference(left), ref MemoryMarshal.GetReference(right), left.Length); - - public static bool EqualsIgnoreCase(ReadOnlySpan left, ReadOnlySpan right) - => left.Length == right.Length && SequenceEqualIgnoreCase(right, left) == EqualsResult.Match; - - public static unsafe bool StartsWith(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(Equals(value, text.Slice(0, value.Length)))); - - public static unsafe bool EndsWith(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(Equals(value, text.Slice(text.Length - value.Length)))); - - public static unsafe bool StartsWith(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(Equals(text.Slice(0, value.Length), value))); - - public static unsafe bool EndsWith(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(Equals(text.Slice(text.Length - value.Length), value))); - - public static bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); - - public static bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); - - // TODO adsitnik: discuss whether this overload should exists, as the only difference with ROS.StartsWith(ROS, StringComparison.OrdinalIgnoreCase) - // is throwing an exception for non-ASCII characters found in value - public static bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); - - public static bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); - - public static unsafe bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); - - public static unsafe bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); - - public static unsafe bool StartsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(0, value.Length), value))); - - public static unsafe bool EndsWithIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => value.IsEmpty || (text.Length >= value.Length && Map(SequenceEqualIgnoreCase(text.Slice(text.Length - value.Length), value))); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool Map(EqualsResult equalsResult) - => equalsResult switch - { - EqualsResult.NonAsciiFound => ThrowNonAsciiFound(), - EqualsResult.Match => true, - _ => false - }; - - [DoesNotReturn] - private static bool ThrowNonAsciiFound() => throw new ArgumentException(SR.Arg_ContainsNonAscii, "value"); - - private static EqualsResult Equals(ReadOnlySpan chars, ReadOnlySpan bytes) where TCheck : struct - { - Debug.Assert(typeof(TCheck) == typeof(CheckBytes) || typeof(TCheck) == typeof(CheckChars) || typeof(TCheck) == typeof(SkipChecks)); - Debug.Assert(chars.Length == bytes.Length); - - if (!Vector128.IsHardwareAccelerated || chars.Length < Vector128.Count) - { - for (int i = 0; i < chars.Length; i++) - { - char c = chars[i]; - byte b = bytes[i]; - - if (typeof(TCheck) == typeof(CheckChars)) - { - if (!UnicodeUtility.IsAsciiCodePoint(c)) - { - return EqualsResult.NonAsciiFound; - } - } - else if (typeof(TCheck) == typeof(CheckBytes)) - { - if (!UnicodeUtility.IsAsciiCodePoint(b)) - { - return EqualsResult.NonAsciiFound; - } - } - - if (c != b) - { - return EqualsResult.NoMatch; - } - } - } - else if (Vector256.IsHardwareAccelerated && chars.Length >= Vector256.Count) - { - ref ushort currentCharsSearchSpace = ref Unsafe.As(ref MemoryMarshal.GetReference(chars)); - ref ushort oneVectorAwayFromCharsEnd = ref Unsafe.Add(ref currentCharsSearchSpace, chars.Length - Vector256.Count); - ref byte currentBytesSearchSpace = ref MemoryMarshal.GetReference(bytes); - ref byte oneVectorAwayFromBytesEnd = ref Unsafe.Add(ref currentBytesSearchSpace, bytes.Length - Vector128.Count); - - Vector128 byteValues; - Vector256 charValues; - - // Loop until either we've finished all elements or there's less than a vector's-worth remaining. - do - { - charValues = Vector256.LoadUnsafe(ref currentCharsSearchSpace); - byteValues = Vector128.LoadUnsafe(ref currentBytesSearchSpace); - - if (typeof(TCheck) == typeof(CheckChars)) - { - if (charValues.AsByte().ExtractMostSignificantBits() != 0) - { - return EqualsResult.NonAsciiFound; - } - } - else if (typeof(TCheck) == typeof(CheckBytes)) - { - if (byteValues.ExtractMostSignificantBits() != 0) - { - return EqualsResult.NonAsciiFound; - } - } - - // it's OK to widen the bytes, it's NOT OK to narrow the chars (we could loose some information) - if (Vector256.Equals(Widen(byteValues), charValues) != Vector256.AllBitsSet) - { - return EqualsResult.NoMatch; - } - - currentCharsSearchSpace = ref Unsafe.Add(ref currentCharsSearchSpace, Vector256.Count); - currentBytesSearchSpace = ref Unsafe.Add(ref currentBytesSearchSpace, Vector128.Count); - } - while (!Unsafe.IsAddressGreaterThan(ref currentCharsSearchSpace, ref oneVectorAwayFromCharsEnd)); - - // If any elements remain, process the last vector in the search space. - if ((uint)chars.Length % Vector256.Count != 0) - { - charValues = Vector256.LoadUnsafe(ref oneVectorAwayFromCharsEnd); - byteValues = Vector128.LoadUnsafe(ref oneVectorAwayFromBytesEnd); - - if (typeof(TCheck) == typeof(CheckChars)) - { - if (charValues.AsByte().ExtractMostSignificantBits() != 0) - { - return EqualsResult.NonAsciiFound; - } - } - else if (typeof(TCheck) == typeof(CheckBytes)) - { - if (byteValues.ExtractMostSignificantBits() != 0) - { - return EqualsResult.NonAsciiFound; - } - } - - // it's OK to widen the bytes, it's NOT OK to narrow the chars (we could loose some information) - if (Vector256.Equals(Widen(byteValues), charValues) != Vector256.AllBitsSet) - { - return EqualsResult.NoMatch; - } - } - } - else - { - ref ushort currentCharsSearchSpace = ref Unsafe.As(ref MemoryMarshal.GetReference(chars)); - ref ushort oneVectorAwayFromCharsEnd = ref Unsafe.Add(ref currentCharsSearchSpace, chars.Length - Vector128.Count); - ref byte currentBytesSearchSpace = ref MemoryMarshal.GetReference(bytes); - ref byte oneVectorAwayFromBytesEnd = ref Unsafe.Add(ref currentBytesSearchSpace, bytes.Length - Vector64.Count); - - Vector64 byteValues; - Vector128 charValues; - - // Loop until either we've finished all elements or there's less than a vector's-worth remaining. - do - { - charValues = Vector128.LoadUnsafe(ref currentCharsSearchSpace); - byteValues = Vector64.LoadUnsafe(ref currentBytesSearchSpace); - - if (typeof(TCheck) == typeof(CheckChars)) - { - if (VectorContainsNonAsciiChar(charValues)) - { - return EqualsResult.NonAsciiFound; - } - } - else if (typeof(TCheck) == typeof(CheckBytes)) - { - if (VectorContainsNonAsciiChar(byteValues)) - { - return EqualsResult.NonAsciiFound; - } - } - - // it's OK to widen the bytes, it's NOT OK to narrow the chars (we could loose some information) - if (Vector128.Equals(Widen(byteValues), charValues) != Vector128.AllBitsSet) - { - return EqualsResult.NoMatch; - } - - currentCharsSearchSpace = ref Unsafe.Add(ref currentCharsSearchSpace, Vector128.Count); - currentBytesSearchSpace = ref Unsafe.Add(ref currentBytesSearchSpace, Vector64.Count); - } - while (!Unsafe.IsAddressGreaterThan(ref currentCharsSearchSpace, ref oneVectorAwayFromCharsEnd)); - - // If any elements remain, process the last vector in the search space. - if ((uint)chars.Length % Vector128.Count != 0) - { - charValues = Vector128.LoadUnsafe(ref oneVectorAwayFromCharsEnd); - byteValues = Vector64.LoadUnsafe(ref oneVectorAwayFromBytesEnd); - - if (typeof(TCheck) == typeof(CheckChars)) - { - if (VectorContainsNonAsciiChar(charValues)) - { - return EqualsResult.NonAsciiFound; - } - } - else if (typeof(TCheck) == typeof(CheckBytes)) - { - if (VectorContainsNonAsciiChar(byteValues)) - { - return EqualsResult.NonAsciiFound; - } - } - - // it's OK to widen the bytes, it's NOT OK to narrow the chars (we could loose some information) - if (Vector128.Equals(Widen(byteValues), charValues) != Vector128.AllBitsSet) - { - return EqualsResult.NoMatch; - } - } - } - - return EqualsResult.Match; - } - - private static EqualsResult SequenceEqualIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - where TText : unmanaged, INumberBase - where TValue : unmanaged, INumberBase - where TCheck : struct - { - Debug.Assert(text.Length == value.Length); - - for (int i = 0; i < text.Length; i++) - { - uint valueA = uint.CreateTruncating(text[i]); - uint valueB = uint.CreateTruncating(value[i]); - - if (typeof(TCheck) == typeof(CheckValue)) - { - if (!UnicodeUtility.IsAsciiCodePoint(valueB)) - { - return EqualsResult.NonAsciiFound; - } - } - - if (valueA == valueB) - { - continue; // exact match - } - - valueA |= 0x20u; - if ((uint)(valueA - 'a') > (uint)('z' - 'a')) - { - return EqualsResult.NoMatch; // not exact match, and first input isn't in [A-Za-z] - } - - if (valueA != (valueB | 0x20u)) - { - return EqualsResult.NoMatch; - } - } - - return EqualsResult.Match; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128 Widen(Vector64 bytes) - { - if (AdvSimd.IsSupported) - { - return AdvSimd.ZeroExtendWideningLower(bytes); - } - else - { - (Vector64 lower, Vector64 upper) = Vector64.Widen(bytes); - return Vector128.Create(lower, upper); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 Widen(Vector128 bytes) - { - (Vector128 lower, Vector128 upper) = Vector128.Widen(bytes); - return Vector256.Create(lower, upper); - } - - private static bool VectorContainsNonAsciiChar(Vector64 bytes) - => !Utf8Utility.AllBytesInUInt64AreAscii(bytes.AsUInt64().ToScalar()); - - private enum EqualsResult - { - NoMatch, - Match, - NonAsciiFound - } - } -} diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs deleted file mode 100644 index 4edc21679a75cf..00000000000000 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Searching.cs +++ /dev/null @@ -1,278 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Text; - -#pragma warning disable SA1121 // Use built-in type alias -// used to express: check value for non-ASCII bytes/chars -using CheckValue = System.SByte; - -namespace System.Buffers.Text -{ - public static partial class Ascii - { - private const int StackallocBytesLimit = 512; - - public static int IndexOf(ReadOnlySpan text, ReadOnlySpan value) - => IndexOf(text, value); - - public static int LastIndexOf(ReadOnlySpan text, ReadOnlySpan value) - => LastIndexOf(text, value); - - public static int IndexOf(ReadOnlySpan text, ReadOnlySpan value) - => IndexOf(text, value); - - public static int LastIndexOf(ReadOnlySpan text, ReadOnlySpan value) - => LastIndexOf(text, value); - - public static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => IndexOfIgnoreCase(text, value); - - public static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => IndexOfIgnoreCase(text, value); - - public static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => IndexOfIgnoreCase(text, value); - - public static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => IndexOfIgnoreCase(text, value); - - public static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => LastIndexOfIgnoreCase(text, value); - - public static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => LastIndexOfIgnoreCase(text, value); - - public static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => LastIndexOfIgnoreCase(text, value); - - public static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - => LastIndexOfIgnoreCase(text, value); - - private static int IndexOf(ReadOnlySpan text, ReadOnlySpan value) - where TText : unmanaged, IEquatable? - where TValue : unmanaged, IEquatable? - where TConverter : struct, IConverter - { - if (value.IsEmpty) - { - return 0; - } - else if (value.Length > text.Length) - { - return -1; - } - - TText[]? rented = null; - Span converted = value.Length <= (StackallocBytesLimit / Unsafe.SizeOf()) - ? stackalloc TText[StackallocBytesLimit / Unsafe.SizeOf()] - : (rented = ArrayPool.Shared.Rent(value.Length)); - - try - { - TConverter.Convert(value, converted); - - return MemoryExtensions.IndexOf(text, converted.Slice(0, value.Length)); - } - finally - { - if (rented is not null) - { - ArrayPool.Shared.Return(rented); - } - } - } - - private static int LastIndexOf(ReadOnlySpan text, ReadOnlySpan value) - where TText : unmanaged, IEquatable? - where TValue : unmanaged, IEquatable? - where TConverter : struct, IConverter - { - if (value.IsEmpty) - { - return text.Length; - } - else if (value.Length > text.Length) - { - return -1; - } - - TText[]? rented = null; - Span converted = value.Length <= (StackallocBytesLimit / Unsafe.SizeOf()) - ? stackalloc TText[StackallocBytesLimit / Unsafe.SizeOf()] - : (rented = ArrayPool.Shared.Rent(value.Length)); - - try - { - TConverter.Convert(value, converted); - - return MemoryExtensions.LastIndexOf(text, converted.Slice(0, value.Length)); - } - finally - { - if (rented is not null) - { - ArrayPool.Shared.Return(rented); - } - } - } - - private static int IndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - where TText : unmanaged, IEquatable?, INumberBase - where TValue : unmanaged, IEquatable?, INumberBase - { - if (value.IsEmpty) - { - return 0; - } - else if (value.Length > text.Length) - { - return -1; - } - - TValue firstValue = value[0]; - char firstChar = (char)ushort.CreateTruncating(firstValue); - if (!IsAscii(firstChar)) - { - ThrowNonAsciiFound(); - } - TText valueHead = TText.CreateTruncating(firstValue); - TText valueHeadDifferentCase = TText.CreateTruncating((ushort)GetDifferentCaseOrSame(firstChar)); - - int valueTailLength = value.Length - 1; - if (valueTailLength == 0) - { - return MemoryExtensions.IndexOfAny(text, valueHead, valueHeadDifferentCase); // for single-byte values use plain IndexOf - } - - int searchSpaceMinusValueTailLength = text.Length - valueTailLength; - int offset = 0; - int remainingSearchSpaceLength = searchSpaceMinusValueTailLength; - - while (remainingSearchSpaceLength > 0) - { - // Do a quick search for the first element of "value". - int relativeIndex = MemoryExtensions.IndexOfAny(text.Slice(offset), valueHead, valueHeadDifferentCase); - if (relativeIndex < 0) - break; - - remainingSearchSpaceLength -= relativeIndex; - offset += relativeIndex; - - if (remainingSearchSpaceLength <= 0) - break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. - - // Found the first element of "value". See if the tail matches. - if (Map(SequenceEqualIgnoreCase(text.Slice(offset + 1, value.Length - 1), value.Slice(1)))) // Map throws if non-ASCII char is found in value - return offset; // The tail matched. Return a successful find. - - remainingSearchSpaceLength--; - offset++; - } - - return -1; - } - - private static int LastIndexOfIgnoreCase(ReadOnlySpan text, ReadOnlySpan value) - where TText : unmanaged, IEquatable?, INumberBase - where TValue : unmanaged, IEquatable?, INumberBase - { - if (value.IsEmpty) - { - return text.Length; - } - else if (value.Length > text.Length) - { - return -1; - } - - TValue firstValue = value[0]; - char firstChar = (char)ushort.CreateTruncating(firstValue); - if (!IsAscii(firstChar)) - { - ThrowNonAsciiFound(); - } - TText valueHead = TText.CreateTruncating(firstValue); - TText valueHeadDifferentCase = TText.CreateTruncating((ushort)GetDifferentCaseOrSame(firstChar)); - - int valueTailLength = value.Length - 1; - if (valueTailLength == 0) - { - return MemoryExtensions.LastIndexOfAny(text, valueHead, valueHeadDifferentCase); // for single-byte values use plain IndexOf - } - - int offset = 0; - - while (true) - { - int remainingSearchSpaceLength = text.Length - offset - valueTailLength; - if (remainingSearchSpaceLength <= 0) - break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. - - // Do a quick search for the first element of "value". - int relativeIndex = MemoryExtensions.LastIndexOfAny(text.Slice(0, remainingSearchSpaceLength), valueHead, valueHeadDifferentCase); - if (relativeIndex < 0) - break; - - // Found the first element of "value". See if the tail matches. - if (Map(SequenceEqualIgnoreCase(text.Slice(relativeIndex + 1, value.Length - 1), value.Slice(1)))) - return relativeIndex; // The tail matched. Return a successful find. - - offset += remainingSearchSpaceLength - relativeIndex; - } - - return -1; - } - - private static char GetDifferentCaseOrSame(char c) - => char.IsAsciiLetterLower(c) ? (char)(c + 'A' - 'a') : char.IsAsciiLetterUpper(c) ? (char)(c - 'A' + 'a') : c; - - private interface IConverter - where TFrom : unmanaged - where TTo : unmanaged - { - static abstract void Convert(ReadOnlySpan source, Span destination); - } - - private readonly struct NarrowConverter : IConverter - { - public static unsafe void Convert(ReadOnlySpan source, Span destination) - { - nuint asciiCharCount = 0; - - fixed (char* pValue = &MemoryMarshal.GetReference(source)) - fixed (byte* pNarrowed = &MemoryMarshal.GetReference(destination)) - { - asciiCharCount = NarrowUtf16ToAscii(pValue, pNarrowed, (nuint)source.Length); - } - - if (asciiCharCount != (nuint)source.Length) - { - ThrowNonAsciiFound(); - } - } - } - - private readonly struct WidenConverter : IConverter - { - public static unsafe void Convert(ReadOnlySpan source, Span destination) - { - nuint asciiCharCount = 0; - - fixed (byte* pValue = &MemoryMarshal.GetReference(source)) - fixed (char* pWidened = &MemoryMarshal.GetReference(destination)) - { - asciiCharCount = WidenAsciiToUtf16(pValue, pWidened, (nuint)source.Length); - } - - if (asciiCharCount != (nuint)source.Length) - { - ThrowNonAsciiFound(); - } - } - } - } -} diff --git a/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs b/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs index 4b4317327ae2b7..7bccca1776e4a0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Globalization/TextInfo.cs @@ -219,8 +219,8 @@ private unsafe void ChangeCaseCommon(ReadOnlySpan source, Spa if (IsAsciiCasingSameAsInvariant) { OperationStatus operationStatus = toUpper - ? Ascii.ToUpper(source, destination, out charsConsumed, out _) - : Ascii.ToLower(source, destination, out charsConsumed, out _); + ? Ascii.ToUpper(source, destination, out charsConsumed) + : Ascii.ToLower(source, destination, out charsConsumed); if (operationStatus != OperationStatus.InvalidData) { diff --git a/src/libraries/System.Private.CoreLib/src/System/String.cs b/src/libraries/System.Private.CoreLib/src/System/String.cs index 75319d3adcc61a..ca591f9019e8cb 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.cs @@ -692,7 +692,7 @@ public bool IsNormalized() public bool IsNormalized(NormalizationForm normalizationForm) { - if (Ascii.IsAscii(this)) + if (Ascii.IsValid(this)) { // If its ASCII && one of the 4 main forms, then its already normalized if (normalizationForm == NormalizationForm.FormC || @@ -711,7 +711,7 @@ public string Normalize() public string Normalize(NormalizationForm normalizationForm) { - if (Ascii.IsAscii(this)) + if (Ascii.IsValid(this)) { // If its ASCII && one of the 4 main forms, then its already normalized if (normalizationForm == NormalizationForm.FormC || diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIEncoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIEncoding.cs index ce0f0d5ee23dfc..7c07d363252d95 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIEncoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIEncoding.cs @@ -189,7 +189,7 @@ private protected sealed override unsafe int GetByteCountFast(char* pChars, int if (!(fallback is EncoderReplacementFallback replacementFallback && replacementFallback.MaxCharCount == 1 - && Ascii.IsAscii(replacementFallback.DefaultString[0]))) + && Ascii.IsValid(replacementFallback.DefaultString[0]))) { // Unrecognized fallback mechanism - count chars manually. @@ -355,8 +355,8 @@ private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetBytesCommon private protected sealed override unsafe int GetBytesFast(char* pChars, int charsLength, byte* pBytes, int bytesLength, out int charsConsumed) { - Ascii.FromUtf16(new ReadOnlySpan(pChars, charsLength), new Span(pBytes, bytesLength), out charsConsumed, out int bytesWritten); - return bytesWritten; + Ascii.FromUtf16(new ReadOnlySpan(pChars, charsLength), new Span(pBytes, bytesLength), out charsConsumed); + return charsConsumed; } private protected sealed override unsafe int GetBytesWithFallback(ReadOnlySpan chars, int originalCharsLength, Span bytes, int originalBytesLength, EncoderNLS? encoder) @@ -367,7 +367,7 @@ private protected sealed override unsafe int GetBytesWithFallback(ReadOnlySpan(pBytes, bytesLength), new Span(pChars, charsLength), out bytesConsumed, out int charsWritten); - return charsWritten; + Ascii.ToUtf16(new ReadOnlySpan(pBytes, bytesLength), new Span(pChars, charsLength), out bytesConsumed); + return bytesConsumed; } private protected sealed override unsafe int GetCharsWithFallback(ReadOnlySpan bytes, int originalBytesLength, Span chars, int originalCharsLength, DecoderNLS? decoder) @@ -653,7 +653,7 @@ private protected sealed override unsafe int GetCharsWithFallback(ReadOnlySpan byte if (!bytes.IsEmpty) { byte b = bytes[0]; - if (Ascii.IsAscii(b)) + if (Ascii.IsValid(b)) { // ASCII byte diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs similarity index 90% rename from src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs rename to src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs index f1e548b5be8730..5a43117f586151 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs @@ -1,68 +1,68 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; -using System.Text; using System.Text.Unicode; -namespace System.Buffers.Text +namespace System.Text { public static partial class Ascii { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int bytesConsumed, out int bytesWritten) - => ChangeCase(source, destination, out bytesConsumed, out bytesWritten); + public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int bytesWritten) + => ChangeCase(source, destination, out bytesWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int charsConsumed, out int charsWritten) - => ChangeCase(MemoryMarshal.Cast(source), MemoryMarshal.Cast(destination), out charsConsumed, out charsWritten); + public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int charsWritten) + => ChangeCase(MemoryMarshal.Cast(source), MemoryMarshal.Cast(destination), out charsWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int bytesConsumed, out int charsWritten) - => ChangeCase(source, MemoryMarshal.Cast(destination), out bytesConsumed, out charsWritten); + public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int charsWritten) + => ChangeCase(source, MemoryMarshal.Cast(destination), out charsWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int charsConsumed, out int bytesWritten) - => ChangeCase(MemoryMarshal.Cast(source), destination, out charsConsumed, out bytesWritten); + public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int bytesWritten) + => ChangeCase(MemoryMarshal.Cast(source), destination, out bytesWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int bytesConsumed, out int bytesWritten) - => ChangeCase(source, destination, out bytesConsumed, out bytesWritten); + public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int bytesWritten) + => ChangeCase(source, destination, out bytesWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int charsConsumed, out int charsWritten) - => ChangeCase(MemoryMarshal.Cast(source), MemoryMarshal.Cast(destination), out charsConsumed, out charsWritten); + public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int charsWritten) + => ChangeCase(MemoryMarshal.Cast(source), MemoryMarshal.Cast(destination), out charsWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int bytesConsumed, out int charsWritten) - => ChangeCase(source, MemoryMarshal.Cast(destination), out bytesConsumed, out charsWritten); + public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int charsWritten) + => ChangeCase(source, MemoryMarshal.Cast(destination), out charsWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int charsConsumed, out int bytesWritten) - => ChangeCase(MemoryMarshal.Cast(source), destination, out charsConsumed, out bytesWritten); + public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int bytesWritten) + => ChangeCase(MemoryMarshal.Cast(source), destination, out bytesWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool TryToLowerInPlace(Span value, out int bytesProcessed) - => TryChangeCase(value, out bytesProcessed); + public static OperationStatus ToLowerInPlace(Span value, out int bytesWritten) + => ChangeCase(value, out bytesWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool TryToLowerInPlace(Span value, out int charsProcessed) - => TryChangeCase(MemoryMarshal.Cast(value), out charsProcessed); + public static OperationStatus ToLowerInPlace(Span value, out int charsWritten) + => ChangeCase(MemoryMarshal.Cast(value), out charsWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool TryToUpperInPlace(Span value, out int bytesProcessed) - => TryChangeCase(value, out bytesProcessed); + public static OperationStatus ToUpperInPlace(Span value, out int bytesWritten) + => ChangeCase(value, out bytesWritten); [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool TryToUpperInPlace(Span value, out int charsProcessed) - => TryChangeCase(MemoryMarshal.Cast(value), out charsProcessed); + public static OperationStatus ToUpperInPlace(Span value, out int charsWritten) + => ChangeCase(MemoryMarshal.Cast(value), out charsWritten); - private static unsafe OperationStatus ChangeCase(ReadOnlySpan source, Span destination, out int sourceElementsConsumed, out int destinationElementsWritten) + private static unsafe OperationStatus ChangeCase(ReadOnlySpan source, Span destination, out int destinationElementsWritten) where TFrom : unmanaged, IBinaryInteger where TTo : unmanaged, IBinaryInteger where TCasing : struct @@ -92,13 +92,12 @@ private static unsafe OperationStatus ChangeCase(ReadOnlySp nuint numElementsActuallyConverted = ChangeCase(pSource, pDestination, numElementsToConvert); Debug.Assert(numElementsActuallyConverted <= numElementsToConvert); - sourceElementsConsumed = (int)numElementsActuallyConverted; destinationElementsWritten = (int)numElementsActuallyConverted; return (numElementsToConvert == numElementsActuallyConverted) ? statusToReturnOnSuccess : OperationStatus.InvalidData; } } - private static unsafe bool TryChangeCase(Span buffer, out int elementsProcessed) + private static unsafe OperationStatus ChangeCase(Span buffer, out int elementsWritten) where T : unmanaged, IBinaryInteger where TCasing : struct { @@ -107,8 +106,8 @@ private static unsafe bool TryChangeCase(Span buffer, out int ele nuint numElementsActuallyConverted = ChangeCase(pBuffer, pBuffer, (nuint)buffer.Length); Debug.Assert(numElementsActuallyConverted <= (nuint)buffer.Length); - elementsProcessed = (int)numElementsActuallyConverted; - return elementsProcessed == buffer.Length; + elementsWritten = (int)numElementsActuallyConverted; + return elementsWritten == buffer.Length ? OperationStatus.Done : OperationStatus.InvalidData; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Transcoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Transcoding.cs similarity index 84% rename from src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Transcoding.cs rename to src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Transcoding.cs index 42537ed372c070..0952598f5e0ec8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Ascii.Transcoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Transcoding.cs @@ -1,11 +1,11 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; using System.Diagnostics; using System.Runtime.InteropServices; -using System.Text; -namespace System.Buffers.Text +namespace System.Text { public static partial class Ascii { @@ -15,10 +15,9 @@ public static partial class Ascii /// /// The source buffer from which ASCII text is read. /// The destination buffer to which UTF-16 text is written. - /// The number of bytes actually read from . - /// The number of chars actually written to . + /// The number of chars actually written to . It's the same as the number of bytes actually read from /// An describing the result of the operation. - public static unsafe OperationStatus ToUtf16(ReadOnlySpan source, Span destination, out int bytesConsumed, out int charsWritten) + public static unsafe OperationStatus ToUtf16(ReadOnlySpan source, Span destination, out int charsWritten) { nuint numElementsToConvert; OperationStatus statusToReturnOnSuccess; @@ -40,7 +39,6 @@ public static unsafe OperationStatus ToUtf16(ReadOnlySpan source, Span source, Span /// The source buffer from which UTF-16 text is read. /// The destination buffer to which ASCII text is written. - /// The number of chars actually read from . - /// The number of bytes actually written to . + /// The number of bytes actually written to . It's the same as the number of chars actually read from . /// An describing the result of the operation. - public static unsafe OperationStatus FromUtf16(ReadOnlySpan source, Span destination, out int charsConsumed, out int bytesWritten) + public static unsafe OperationStatus FromUtf16(ReadOnlySpan source, Span destination, out int bytesWritten) { nuint numElementsToConvert; OperationStatus statusToReturnOnSuccess; @@ -77,7 +74,6 @@ public static unsafe OperationStatus FromUtf16(ReadOnlySpan source, SpanThe buffer to scan. /// The index in where the first non-ASCII /// byte appears, or -1 if the buffer contains only ASCII bytes. - public static unsafe int GetIndexOfFirstNonAsciiByte(ReadOnlySpan buffer) + internal static unsafe int GetIndexOfFirstNonAsciiByte(ReadOnlySpan buffer) { if (buffer.IsEmpty) { @@ -36,7 +36,7 @@ public static unsafe int GetIndexOfFirstNonAsciiByte(ReadOnlySpan buffer) /// The buffer to scan. /// The index in where the first non-ASCII /// char appears, or -1 if the buffer contains only ASCII char. - public static unsafe int GetIndexOfFirstNonAsciiChar(ReadOnlySpan buffer) + internal static unsafe int GetIndexOfFirstNonAsciiChar(ReadOnlySpan buffer) { if (buffer.IsEmpty) { @@ -58,7 +58,7 @@ public static unsafe int GetIndexOfFirstNonAsciiChar(ReadOnlySpan buffer) /// The value to inspect. /// True if contains only ASCII bytes or is /// empty; False otherwise. - public static unsafe bool IsAscii(ReadOnlySpan value) => value.IsEmpty || GetIndexOfFirstNonAsciiByte(value) < 0; + public static unsafe bool IsValid(ReadOnlySpan value) => value.IsEmpty || GetIndexOfFirstNonAsciiByte(value) < 0; /// /// Determines whether the provided value contains only ASCII chars. @@ -66,20 +66,20 @@ public static unsafe int GetIndexOfFirstNonAsciiChar(ReadOnlySpan buffer) /// The value to inspect. /// True if contains only ASCII chars or is /// empty; False otherwise. - public static unsafe bool IsAscii(ReadOnlySpan value) => value.IsEmpty || GetIndexOfFirstNonAsciiChar(value) < 0; + public static unsafe bool IsValid(ReadOnlySpan value) => value.IsEmpty || GetIndexOfFirstNonAsciiChar(value) < 0; /// /// Determines whether the provided value is ASCII byte. /// /// The value to inspect. /// True if is ASCII, False otherwise. - public static unsafe bool IsAscii(byte value) => value <= 127; + public static unsafe bool IsValid(byte value) => value <= 127; /// /// Determines whether the provided value is ASCII char. /// /// The value to inspect. /// True if is ASCII, False otherwise. - public static unsafe bool IsAscii(char value) => value <= 127; + public static unsafe bool IsValid(char value) => value <= 127; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs index 3d571437d91e8d..b1c908b15b73ea 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Transcoding.cs @@ -26,7 +26,7 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng Debug.Assert(pOutputBuffer != null || outputCharsRemaining == 0, "Destination length must be zero if destination buffer pointer is null."); // First, try vectorized conversion. - OperationStatus status = Ascii.ToUtf16(new ReadOnlySpan(pInputBuffer, inputLength), new Span(pOutputBuffer, outputCharsRemaining), out int bytesConsumed, out _); + OperationStatus status = Ascii.ToUtf16(new ReadOnlySpan(pInputBuffer, inputLength), new Span(pOutputBuffer, outputCharsRemaining), out int bytesConsumed); pInputBuffer += bytesConsumed; pOutputBuffer += bytesConsumed; @@ -846,7 +846,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt // First, try vectorized conversion. { - OperationStatus status = Ascii.FromUtf16(new ReadOnlySpan(pInputBuffer, inputLength), new Span(pOutputBuffer, outputBytesRemaining), out int charsConsumed, out _); + OperationStatus status = Ascii.FromUtf16(new ReadOnlySpan(pInputBuffer, inputLength), new Span(pOutputBuffer, outputBytesRemaining), out int charsConsumed); pInputBuffer += charsConsumed; pOutputBuffer += charsConsumed; diff --git a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs index a1ec8bf6ac4834..e0a44a6d1ed420 100644 --- a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs @@ -1,7 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Buffers.Text; using System.Diagnostics; using System.Globalization; using System.Runtime.CompilerServices; @@ -212,7 +211,7 @@ internal static string IdnEquivalent(string hostname) // check if only ascii chars // special case since idnmapping will not lowercase if only ascii present - bool allAscii = Ascii.IsAscii(hostname); + bool allAscii = Ascii.IsValid(hostname); if (allAscii) { diff --git a/src/libraries/System.Private.Uri/src/System/UriHelper.cs b/src/libraries/System.Private.Uri/src/System/UriHelper.cs index 4d57da7ce27a6c..9c50b5100e50d9 100644 --- a/src/libraries/System.Private.Uri/src/System/UriHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/UriHelper.cs @@ -1,7 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Buffers.Text; using System.Text; using System.Diagnostics; using System.Runtime.InteropServices; @@ -138,7 +137,7 @@ internal static string EscapeString( Debug.Assert(!noEscape['%'], "Need to treat % specially; it should be part of any escaped set"); int i = 0; char c; - for (; i < stringToEscape.Length && Ascii.IsAscii(c = stringToEscape[i]) && noEscape[c]; i++) ; + for (; i < stringToEscape.Length && Ascii.IsValid(c = stringToEscape[i]) && noEscape[c]; i++) ; if (i == stringToEscape.Length) { return stringToEscape; @@ -177,7 +176,7 @@ internal static unsafe void EscapeString(ReadOnlySpan stringToEscape, ref Debug.Assert(!noEscape['%'], "Need to treat % specially in case checkExistingEscaped is true"); int i = 0; char c; - for (; i < stringToEscape.Length && Ascii.IsAscii(c = stringToEscape[i]) && noEscape[c]; i++) ; + for (; i < stringToEscape.Length && Ascii.IsValid(c = stringToEscape[i]) && noEscape[c]; i++) ; if (i == stringToEscape.Length) { dest.Append(stringToEscape); diff --git a/src/libraries/System.Reflection.Metadata/src/System/Reflection/Internal/Utilities/MemoryBlock.cs b/src/libraries/System.Reflection.Metadata/src/System/Reflection/Internal/Utilities/MemoryBlock.cs index 21069b74c7370c..5931cb2043ab9e 100644 --- a/src/libraries/System.Reflection.Metadata/src/System/Reflection/Internal/Utilities/MemoryBlock.cs +++ b/src/libraries/System.Reflection.Metadata/src/System/Reflection/Internal/Utilities/MemoryBlock.cs @@ -452,9 +452,6 @@ internal bool Utf8NullTerminatedStringStartsWithAsciiPrefix(int offset, string a CheckBounds(offset, 0); -#if NET7_0_OR_GREATER - return Buffers.Text.Ascii.StartsWith(new ReadOnlySpan(Pointer + offset, Length - offset), asciiPrefix); -#else // Make sure that we won't read beyond the block even if the block doesn't end with 0 byte. if (asciiPrefix.Length > Length - offset) { @@ -476,7 +473,6 @@ internal bool Utf8NullTerminatedStringStartsWithAsciiPrefix(int offset, string a } return true; -#endif } internal int CompareUtf8NullTerminatedStringWithAsciiString(int offset, string asciiString) diff --git a/src/libraries/System.Runtime/ref/System.Runtime.cs b/src/libraries/System.Runtime/ref/System.Runtime.cs index 947e00d23abf48..acefb5d00cb9d9 100644 --- a/src/libraries/System.Runtime/ref/System.Runtime.cs +++ b/src/libraries/System.Runtime/ref/System.Runtime.cs @@ -7090,71 +7090,6 @@ public enum OperationStatus } namespace System.Buffers.Text { - public static class Ascii - { - public static System.Buffers.OperationStatus FromUtf16(System.ReadOnlySpan source, System.Span destination, out int charsConsumed, out int bytesWritten) { throw null; } - public static int GetIndexOfFirstNonAsciiByte(System.ReadOnlySpan buffer) { throw null; } - public static int GetIndexOfFirstNonAsciiChar(System.ReadOnlySpan buffer) { throw null; } - public static int IndexOf(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static int IndexOf(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static int IndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static int IndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static int IndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static int IndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static int LastIndexOf(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static int LastIndexOf(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static int LastIndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static int LastIndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static int LastIndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static int LastIndexOfIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static bool IsAscii(System.ReadOnlySpan value) { throw null; } - public static bool IsAscii(System.ReadOnlySpan value) { throw null; } - public static bool IsAscii(byte value) { throw null; } - public static bool IsAscii(char value) { throw null; } - public static bool TryGetHashCode(System.ReadOnlySpan value, out int hashCode) { throw null; } - public static bool TryGetHashCode(System.ReadOnlySpan value, out int hashCode) { throw null; } - public static bool TryGetHashCodeIgnoreCase(System.ReadOnlySpan value, out int hashCode) { throw null; } - public static bool TryGetHashCodeIgnoreCase(System.ReadOnlySpan value, out int hashCode) { throw null; } - public static int GetHashCode(System.ReadOnlySpan value) { throw null; } - public static int GetHashCode(System.ReadOnlySpan value) { throw null; } - public static int GetHashCodeIgnoreCase(System.ReadOnlySpan value) { throw null; } - public static int GetHashCodeIgnoreCase(System.ReadOnlySpan value) { throw null; } - public static bool Equals(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } - public static bool EqualsIgnoreCase(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } - public static bool EqualsIgnoreCase(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } - public static bool EqualsIgnoreCase(System.ReadOnlySpan left, System.ReadOnlySpan right) { throw null; } - public static bool EndsWith(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static bool EndsWith(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static bool EndsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static bool EndsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static bool EndsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static bool EndsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static bool StartsWith(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static bool StartsWith(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static bool StartsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static bool StartsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static bool StartsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static bool StartsWithIgnoreCase(System.ReadOnlySpan text, System.ReadOnlySpan value) { throw null; } - public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int bytesWritten) { throw null; } - public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int charsConsumed, out int charsWritten) { throw null; } - public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int charsWritten) { throw null; } - public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int charsConsumed, out int bytesWritten) { throw null; } - public static System.Buffers.OperationStatus ToUpper(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int bytesWritten) { throw null; } - public static System.Buffers.OperationStatus ToUpper(System.ReadOnlySpan source, System.Span destination, out int charsConsumed, out int charsWritten) { throw null; } - public static System.Buffers.OperationStatus ToUpper(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int charsWritten) { throw null; } - public static System.Buffers.OperationStatus ToUpper(System.ReadOnlySpan source, System.Span destination, out int charsConsumed, out int bytesWritten) { throw null; } - public static System.Buffers.OperationStatus ToUtf16(System.ReadOnlySpan source, System.Span destination, out int bytesConsumed, out int charsWritten) { throw null; } - public static bool TryToLowerInPlace(System.Span value, out int bytesProcessed) { throw null; } - public static bool TryToLowerInPlace(System.Span value, out int charsProcessed) { throw null; } - public static bool TryToUpperInPlace(System.Span value, out int bytesProcessed) { throw null; } - public static bool TryToUpperInPlace(System.Span value, out int charsProcessed) { throw null; } - public static System.Range Trim(System.ReadOnlySpan value) { throw null; } - public static System.Range Trim(System.ReadOnlySpan value) { throw null; } - public static System.Range TrimEnd(System.ReadOnlySpan value) { throw null; } - public static System.Range TrimEnd(System.ReadOnlySpan value) { throw null; } - public static System.Range TrimStart(System.ReadOnlySpan value) { throw null; } - public static System.Range TrimStart(System.ReadOnlySpan value) { throw null; } - } public static partial class Base64 { public static System.Buffers.OperationStatus DecodeFromUtf8(System.ReadOnlySpan utf8, System.Span bytes, out int bytesConsumed, out int bytesWritten, bool isFinalBlock = true) { throw null; } @@ -13843,6 +13778,33 @@ public enum TokenImpersonationLevel } namespace System.Text { + public static class Ascii + { + public static bool IsValid(System.ReadOnlySpan value) { throw null; } + public static bool IsValid(System.ReadOnlySpan value) { throw null; } + public static bool IsValid(byte value) { throw null; } + public static bool IsValid(char value) { throw null; } + public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int bytesWritten) { throw null; } + public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int charsWritten) { throw null; } + public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int charsWritten) { throw null; } + public static System.Buffers.OperationStatus ToLower(System.ReadOnlySpan source, System.Span destination, out int bytesWritten) { throw null; } + public static System.Buffers.OperationStatus ToUpper(System.ReadOnlySpan source, System.Span destination, out int bytesWritten) { throw null; } + public static System.Buffers.OperationStatus ToUpper(System.ReadOnlySpan source, System.Span destination, out int charsWritten) { throw null; } + public static System.Buffers.OperationStatus ToUpper(System.ReadOnlySpan source, System.Span destination, out int charsWritten) { throw null; } + public static System.Buffers.OperationStatus ToUpper(System.ReadOnlySpan source, System.Span destination, out int bytesWritten) { throw null; } + public static System.Buffers.OperationStatus ToLowerInPlace(System.Span value, out int bytesWritten) { throw null; } + public static System.Buffers.OperationStatus ToLowerInPlace(System.Span value, out int charsWritten) { throw null; } + public static System.Buffers.OperationStatus ToUpperInPlace(System.Span value, out int bytesWritten) { throw null; } + public static System.Buffers.OperationStatus ToUpperInPlace(System.Span value, out int charsWritten) { throw null; } + public static System.Buffers.OperationStatus FromUtf16(System.ReadOnlySpan source, System.Span destination, out int bytesWritten) { throw null; } + public static System.Buffers.OperationStatus ToUtf16(System.ReadOnlySpan source, System.Span destination, out int charsWritten) { throw null; } + public static System.Range Trim(System.ReadOnlySpan value) { throw null; } + public static System.Range Trim(System.ReadOnlySpan value) { throw null; } + public static System.Range TrimEnd(System.ReadOnlySpan value) { throw null; } + public static System.Range TrimEnd(System.ReadOnlySpan value) { throw null; } + public static System.Range TrimStart(System.ReadOnlySpan value) { throw null; } + public static System.Range TrimStart(System.ReadOnlySpan value) { throw null; } + } public abstract partial class Decoder { protected Decoder() { } diff --git a/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingDecode.cs b/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingDecode.cs index 4c99abe12a5bb5..1c9df742c6b6cf 100644 --- a/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingDecode.cs +++ b/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingDecode.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers; -using System.Buffers.Text; using System.Collections.Generic; using System.Linq; using Xunit; @@ -41,16 +40,14 @@ public void Decode(byte[] bytes, int index, int count) EncodingHelpers.Decode(exceptionEncoding, bytes, index, count, expected); char[] actual = new char[expected.Length]; - Assert.Equal(OperationStatus.Done, Ascii.ToUtf16(bytes.AsSpan(index, count), actual, out int bytesConsumed, out int charsWritten)); - Assert.Equal(count, bytesConsumed); + Assert.Equal(OperationStatus.Done, Ascii.ToUtf16(bytes.AsSpan(index, count), actual, out int charsWritten)); Assert.Equal(expected.Length, charsWritten); Assert.Equal(expected, new string(actual.AsSpan(0, charsWritten))); if (expected.Length > 1) { actual = new char[expected.Length - 1]; - Assert.Equal(OperationStatus.DestinationTooSmall, Ascii.ToUtf16(bytes.AsSpan(index, count), actual, out bytesConsumed, out charsWritten)); - Assert.Equal(count - 1, bytesConsumed); + Assert.Equal(OperationStatus.DestinationTooSmall, Ascii.ToUtf16(bytes.AsSpan(index, count), actual, out charsWritten)); Assert.Equal(expected.Length - 1, charsWritten); Assert.Equal(expected.Substring(0, expected.Length - 1), new string(actual.AsSpan(0, charsWritten))); } @@ -82,9 +79,8 @@ public void Decode_InvalidBytes(byte[] bytes, int index, int count, int expected NegativeEncodingTests.Decode_Invalid(exceptionEncoding, bytes, index, count); char[] actual = new char[expected.Length]; - Assert.Equal(OperationStatus.InvalidData, Ascii.ToUtf16(bytes.AsSpan(index, count), actual, out int bytesConsumed, out int charsWritten)); - Assert.Equal(expectedBytesConsumed, bytesConsumed); - Assert.Equal(bytesConsumed, charsWritten); + Assert.Equal(OperationStatus.InvalidData, Ascii.ToUtf16(bytes.AsSpan(index, count), actual, out int charsWritten)); + Assert.Equal(expectedBytesConsumed, charsWritten); Assert.Equal(expected.Take(charsWritten).ToArray(), actual.Take(charsWritten).ToArray()); } diff --git a/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingEncode.cs b/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingEncode.cs index c8dc4239abfb10..c4bff337ca37a8 100644 --- a/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingEncode.cs +++ b/src/libraries/System.Text.Encoding/tests/ASCIIEncoding/ASCIIEncodingEncode.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers; -using System.Buffers.Text; using System.Collections.Generic; using System.Linq; using Xunit; @@ -47,16 +46,14 @@ public void Encode(string source, int index, int count) EncodingHelpers.Encode(exceptionEncoding, source, index, count, expected); byte[] actual = new byte[expected.Length]; - Assert.Equal(OperationStatus.Done , Ascii.FromUtf16(source.AsSpan(index, count), actual, out int charsConsumed, out int bytesWritten)); - Assert.Equal(count, charsConsumed); + Assert.Equal(OperationStatus.Done , Ascii.FromUtf16(source.AsSpan(index, count), actual, out int bytesWritten)); Assert.Equal(expected.Length, bytesWritten); Assert.Equal(expected, actual.Take(bytesWritten).ToArray()); if (expected.Length > 1) { actual = new byte[expected.Length - 1]; - Assert.Equal(OperationStatus.DestinationTooSmall, Ascii.FromUtf16(source.AsSpan(index, count), actual, out charsConsumed, out bytesWritten)); - Assert.Equal(count - 1, charsConsumed); + Assert.Equal(OperationStatus.DestinationTooSmall, Ascii.FromUtf16(source.AsSpan(index, count), actual, out bytesWritten)); Assert.Equal(expected.Length - 1, bytesWritten); Assert.Equal(expected.Take(bytesWritten).ToArray(), actual.Take(bytesWritten).ToArray()); } @@ -110,9 +107,8 @@ public void Encode_InvalidChars(string source, int index, int count, int expecte NegativeEncodingTests.Encode_Invalid(exceptionEncoding, source, index, count); byte[] actual = new byte[expected.Length]; - Assert.Equal(OperationStatus.InvalidData, Ascii.FromUtf16(source.AsSpan(index, count), actual, out int charsConsumed, out int bytesWritten)); - Assert.Equal(expectedCharsConsumed, charsConsumed); - Assert.Equal(charsConsumed, bytesWritten); + Assert.Equal(OperationStatus.InvalidData, Ascii.FromUtf16(source.AsSpan(index, count), actual, out int bytesWritten)); + Assert.Equal(expectedCharsConsumed, bytesWritten); Assert.Equal(expected.Take(bytesWritten).ToArray(), actual.Take(bytesWritten).ToArray()); } diff --git a/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs b/src/libraries/System.Text.Encoding/tests/Ascii/CaseConversionTests.cs similarity index 65% rename from src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs rename to src/libraries/System.Text.Encoding/tests/Ascii/CaseConversionTests.cs index d902e4a7e774cb..ceb836ff268a41 100644 --- a/src/libraries/System.Memory/tests/Ascii/CaseConversionTests.cs +++ b/src/libraries/System.Text.Encoding/tests/Ascii/CaseConversionTests.cs @@ -1,12 +1,13 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; using System.Collections.Generic; using System.Linq; using System.Runtime.InteropServices; using Xunit; -namespace System.Buffers.Text.Tests +namespace System.Text.Tests { public static class CaseConversionTests { @@ -19,28 +20,28 @@ public static void OverlappingBuffers_Throws() char[] charBuffer = new char[10]; // byte -> byte - Assert.Throws(() => Ascii.ToLower(byteBuffer, byteBuffer, out _, out _)); - Assert.Throws(() => Ascii.ToLower(byteBuffer.AsSpan(1, 3), byteBuffer.AsSpan(3, 5), out _, out _)); - Assert.Throws(() => Ascii.ToUpper(byteBuffer, byteBuffer, out _, out _)); - Assert.Throws(() => Ascii.ToUpper(byteBuffer.AsSpan(1, 3), byteBuffer.AsSpan(3, 5), out _, out _)); + Assert.Throws(() => Ascii.ToLower(byteBuffer, byteBuffer, out _)); + Assert.Throws(() => Ascii.ToLower(byteBuffer.AsSpan(1, 3), byteBuffer.AsSpan(3, 5), out _)); + Assert.Throws(() => Ascii.ToUpper(byteBuffer, byteBuffer, out _)); + Assert.Throws(() => Ascii.ToUpper(byteBuffer.AsSpan(1, 3), byteBuffer.AsSpan(3, 5), out _)); // byte -> char - Assert.Throws(() => Ascii.ToLower(byteBuffer, MemoryMarshal.Cast(byteBuffer), out _, out _)); - Assert.Throws(() => Ascii.ToLower(byteBuffer, MemoryMarshal.Cast(byteBuffer).Slice(1, 3), out _, out _)); - Assert.Throws(() => Ascii.ToUpper(byteBuffer, MemoryMarshal.Cast(byteBuffer), out _, out _)); - Assert.Throws(() => Ascii.ToUpper(byteBuffer, MemoryMarshal.Cast(byteBuffer).Slice(1, 3), out _, out _)); + Assert.Throws(() => Ascii.ToLower(byteBuffer, MemoryMarshal.Cast(byteBuffer), out _)); + Assert.Throws(() => Ascii.ToLower(byteBuffer, MemoryMarshal.Cast(byteBuffer).Slice(1, 3), out _)); + Assert.Throws(() => Ascii.ToUpper(byteBuffer, MemoryMarshal.Cast(byteBuffer), out _)); + Assert.Throws(() => Ascii.ToUpper(byteBuffer, MemoryMarshal.Cast(byteBuffer).Slice(1, 3), out _)); // char -> char - Assert.Throws(() => Ascii.ToLower(charBuffer, charBuffer, out _, out _)); - Assert.Throws(() => Ascii.ToLower(charBuffer.AsSpan(1, 3), charBuffer.AsSpan(3, 5), out _, out _)); - Assert.Throws(() => Ascii.ToUpper(charBuffer, charBuffer, out _, out _)); - Assert.Throws(() => Ascii.ToUpper(charBuffer.AsSpan(1, 3), charBuffer.AsSpan(3, 5), out _, out _)); + Assert.Throws(() => Ascii.ToLower(charBuffer, charBuffer, out _)); + Assert.Throws(() => Ascii.ToLower(charBuffer.AsSpan(1, 3), charBuffer.AsSpan(3, 5), out _)); + Assert.Throws(() => Ascii.ToUpper(charBuffer, charBuffer, out _)); + Assert.Throws(() => Ascii.ToUpper(charBuffer.AsSpan(1, 3), charBuffer.AsSpan(3, 5), out _)); // char -> byte - Assert.Throws(() => Ascii.ToLower(charBuffer, MemoryMarshal.Cast(charBuffer), out _, out _)); - Assert.Throws(() => Ascii.ToLower(charBuffer, MemoryMarshal.Cast(charBuffer).Slice(1, 3), out _, out _)); - Assert.Throws(() => Ascii.ToUpper(charBuffer, MemoryMarshal.Cast(charBuffer), out _, out _)); - Assert.Throws(() => Ascii.ToUpper(charBuffer, MemoryMarshal.Cast(charBuffer).Slice(1, 3), out _, out _)); + Assert.Throws(() => Ascii.ToLower(charBuffer, MemoryMarshal.Cast(charBuffer), out _)); + Assert.Throws(() => Ascii.ToLower(charBuffer, MemoryMarshal.Cast(charBuffer).Slice(1, 3), out _)); + Assert.Throws(() => Ascii.ToUpper(charBuffer, MemoryMarshal.Cast(charBuffer), out _)); + Assert.Throws(() => Ascii.ToUpper(charBuffer, MemoryMarshal.Cast(charBuffer).Slice(1, 3), out _)); } - private static void VerifySingleChar(OperationStatus status, int value, T expected, T actual, int consumed, int written) + private static void VerifySingleChar(OperationStatus status, int value, T expected, T actual, int written) { Assert.True(typeof(T) == typeof(char) || typeof(T) == typeof(byte)); @@ -48,14 +49,12 @@ private static void VerifySingleChar(OperationStatus status, int value, T exp { Assert.Equal(OperationStatus.Done, status); Assert.Equal(expected, actual); - Assert.Equal(1, consumed); Assert.Equal(1, written); } else { Assert.Equal(OperationStatus.InvalidData, status); Assert.Equal(default, actual); - Assert.Equal(0, consumed); Assert.Equal(0, written); } } @@ -75,14 +74,14 @@ public static void SingleByteConversion() // byte -> byte destinationByte[0] = default; - VerifySingleChar(Ascii.ToLower(sourceByte, destinationByte, out int consumed, out int written), i, expectedToLower, destinationByte[0], consumed, written); + VerifySingleChar(Ascii.ToLower(sourceByte, destinationByte, out int written), i, expectedToLower, destinationByte[0], written); destinationByte[0] = default; - VerifySingleChar(Ascii.ToUpper(sourceByte, destinationByte, out consumed, out written), i, expectedToUpper, destinationByte[0], consumed, written); + VerifySingleChar(Ascii.ToUpper(sourceByte, destinationByte, out written), i, expectedToUpper, destinationByte[0], written); // byte -> char destinationChar[0] = default; - VerifySingleChar(Ascii.ToLower(sourceByte, destinationChar, out consumed, out written), i, (char)expectedToLower, destinationChar[0], consumed, written); + VerifySingleChar(Ascii.ToLower(sourceByte, destinationChar, out written), i, (char)expectedToLower, destinationChar[0], written); destinationChar[0] = default; - VerifySingleChar(Ascii.ToUpper(sourceByte, destinationChar, out consumed, out written), i, (char)expectedToUpper, destinationChar[0], consumed, written); + VerifySingleChar(Ascii.ToUpper(sourceByte, destinationChar, out written), i, (char)expectedToUpper, destinationChar[0], written); } } @@ -101,14 +100,14 @@ public static void SingleCharConversion() // char -> char destinationChar[0] = default; - VerifySingleChar(Ascii.ToLower(sourceChar, destinationChar, out int consumed, out int written), i, expectedLower, destinationChar[0], consumed, written); + VerifySingleChar(Ascii.ToLower(sourceChar, destinationChar, out int written), i, expectedLower, destinationChar[0], written); destinationChar[0] = default; - VerifySingleChar(Ascii.ToUpper(sourceChar, destinationChar, out consumed, out written), i, expectedUpper, destinationChar[0], consumed, written); + VerifySingleChar(Ascii.ToUpper(sourceChar, destinationChar, out written), i, expectedUpper, destinationChar[0], written); // char -> byte destinationByte[0] = default; - VerifySingleChar(Ascii.ToLower(sourceChar, destinationByte, out consumed, out written), i, (byte)expectedLower, destinationByte[0], consumed, written); + VerifySingleChar(Ascii.ToLower(sourceChar, destinationByte, out written), i, (byte)expectedLower, destinationByte[0], written); destinationByte[0] = default; - VerifySingleChar(Ascii.ToUpper(sourceChar, destinationByte, out consumed, out written), i, (byte)expectedUpper, destinationByte[0], consumed, written); + VerifySingleChar(Ascii.ToUpper(sourceChar, destinationByte, out written), i, (byte)expectedUpper, destinationByte[0], written); } } @@ -127,37 +126,30 @@ public static void InvalidCharacters(string sourceChars) } // char => char - VerifyStatus(Ascii.ToLower(sourceChars, destinationChars, out int consumed, out int written), consumed, written); - VerifyStatus(Ascii.ToUpper(sourceChars, destinationChars, out consumed, out written), consumed, written); + VerifyStatus(Ascii.ToLower(sourceChars, destinationChars, out int written), written); + VerifyStatus(Ascii.ToUpper(sourceChars, destinationChars, out written), written); // char => byte - VerifyStatus(Ascii.ToLower(sourceChars, destinationBytes, out consumed, out written), consumed, written); - VerifyStatus(Ascii.ToUpper(sourceChars, destinationBytes, out consumed, out written), consumed, written); + VerifyStatus(Ascii.ToLower(sourceChars, destinationBytes, out written), written); + VerifyStatus(Ascii.ToUpper(sourceChars, destinationBytes, out written), written); // byte => byte - VerifyStatus(Ascii.ToLower(sourceBytes, destinationBytes, out consumed, out written), consumed, written); - VerifyStatus(Ascii.ToUpper(sourceBytes, destinationBytes, out consumed, out written), consumed, written); + VerifyStatus(Ascii.ToLower(sourceBytes, destinationBytes, out written), written); + VerifyStatus(Ascii.ToUpper(sourceBytes, destinationBytes, out written), written); // byte => char - VerifyStatus(Ascii.ToLower(sourceBytes, destinationChars, out consumed, out written), consumed, written); - VerifyStatus(Ascii.ToUpper(sourceBytes, destinationChars, out consumed, out written), consumed, written); + VerifyStatus(Ascii.ToLower(sourceBytes, destinationChars, out written), written); + VerifyStatus(Ascii.ToUpper(sourceBytes, destinationChars, out written), written); - // Try(byte) - VerifyBool(Ascii.TryToLowerInPlace(sourceBytes, out int processed), processed); - VerifyBool(Ascii.TryToUpperInPlace(sourceBytes, out processed), processed); - // Try(char) - VerifyBool(Ascii.TryToLowerInPlace(sourceChars.ToCharArray(), out processed), processed); - VerifyBool(Ascii.TryToUpperInPlace(sourceChars.ToCharArray(), out processed), processed); + // InPlace(byte) + VerifyStatus(Ascii.ToLowerInPlace(sourceBytes, out int processed), processed); + VerifyStatus(Ascii.ToUpperInPlace(sourceBytes, out processed), processed); + // InPlace(char) + VerifyStatus(Ascii.ToLowerInPlace(sourceChars.ToCharArray(), out processed), processed); + VerifyStatus(Ascii.ToUpperInPlace(sourceChars.ToCharArray(), out processed), processed); - static void VerifyStatus(OperationStatus status, int consumed, int written) + static void VerifyStatus(OperationStatus status, int written) { Assert.Equal(OperationStatus.InvalidData, status); - Assert.Equal(0, consumed); Assert.Equal(0, written); } - - static void VerifyBool(bool result, int processed) - { - Assert.False(result); - Assert.Equal(0, processed); - } } public static IEnumerable MultipleValidCharacterConversion_Arguments @@ -189,50 +181,42 @@ public static void MultipleValidCharacterConversion(string sourceChars, string e Assert.Equal(sourceChars.Length, expectedLowerChars.Length); Assert.Equal(expectedLowerChars.Length, expectedUpperChars.Length); - byte[] sourceBytes = System.Text.Encoding.ASCII.GetBytes(sourceChars); - byte[] expectedLowerBytes = System.Text.Encoding.ASCII.GetBytes(expectedLowerChars); - byte[] expectedUpperBytes = System.Text.Encoding.ASCII.GetBytes(expectedUpperChars); + byte[] sourceBytes = Encoding.ASCII.GetBytes(sourceChars); + byte[] expectedLowerBytes = Encoding.ASCII.GetBytes(expectedLowerChars); + byte[] expectedUpperBytes = Encoding.ASCII.GetBytes(expectedUpperChars); char[] destinationChars = new char[expectedLowerChars.Length]; byte[] destinationBytes = new byte[expectedLowerChars.Length]; // char -> char - VerifyStatus(Ascii.ToLower(sourceChars, destinationChars, out int consumed, out int written), expectedLowerChars, destinationChars, consumed, written); - VerifyStatus(Ascii.ToUpper(sourceChars, destinationChars, out consumed, out written), expectedUpperChars, destinationChars, consumed, written); + VerifyStatus(Ascii.ToLower(sourceChars, destinationChars, out int written), expectedLowerChars, destinationChars, written); + VerifyStatus(Ascii.ToUpper(sourceChars, destinationChars, out written), expectedUpperChars, destinationChars, written); // char -> byte - VerifyStatus(Ascii.ToLower(sourceChars, destinationBytes, out consumed, out written), expectedLowerBytes, destinationBytes, consumed, written); - VerifyStatus(Ascii.ToUpper(sourceChars, destinationBytes, out consumed, out written), expectedUpperBytes, destinationBytes, consumed, written); + VerifyStatus(Ascii.ToLower(sourceChars, destinationBytes, out written), expectedLowerBytes, destinationBytes, written); + VerifyStatus(Ascii.ToUpper(sourceChars, destinationBytes, out written), expectedUpperBytes, destinationBytes, written); // byte -> byte - VerifyStatus(Ascii.ToLower(sourceBytes, destinationBytes, out consumed, out written), expectedLowerBytes, destinationBytes, consumed, written); - VerifyStatus(Ascii.ToUpper(sourceBytes, destinationBytes, out consumed, out written), expectedUpperBytes, destinationBytes, consumed, written); + VerifyStatus(Ascii.ToLower(sourceBytes, destinationBytes, out written), expectedLowerBytes, destinationBytes, written); + VerifyStatus(Ascii.ToUpper(sourceBytes, destinationBytes, out written), expectedUpperBytes, destinationBytes, written); // byte -> char - VerifyStatus(Ascii.ToLower(sourceBytes, destinationChars, out consumed, out written), expectedLowerChars, destinationChars, consumed, written); - VerifyStatus(Ascii.ToUpper(sourceBytes, destinationChars, out consumed, out written), expectedUpperChars, destinationChars, consumed, written); + VerifyStatus(Ascii.ToLower(sourceBytes, destinationChars, out written), expectedLowerChars, destinationChars, written); + VerifyStatus(Ascii.ToUpper(sourceBytes, destinationChars, out written), expectedUpperChars, destinationChars, written); - // Try(byte) + // InPlace(byte) byte[] sourceBytesCopy = sourceBytes.ToArray(); - VerifyBool(Ascii.TryToLowerInPlace(sourceBytesCopy, out int processed), processed, expectedLowerBytes, sourceBytesCopy); + VerifyStatus(Ascii.ToLowerInPlace(sourceBytesCopy, out int processed), expectedLowerBytes, sourceBytesCopy, processed); sourceBytesCopy = sourceBytes.ToArray(); - VerifyBool(Ascii.TryToUpperInPlace(sourceBytesCopy, out processed), processed, expectedUpperBytes, sourceBytesCopy); - // Try(char) + VerifyStatus(Ascii.ToUpperInPlace(sourceBytesCopy, out processed), expectedUpperBytes, sourceBytesCopy, processed); + // InPlace(char) char[] sourceCharsCopy = sourceChars.ToCharArray(); - VerifyBool(Ascii.TryToLowerInPlace(sourceCharsCopy, out processed), processed, expectedLowerChars.ToCharArray(), sourceCharsCopy); + VerifyStatus(Ascii.ToLowerInPlace(sourceCharsCopy, out processed), expectedLowerChars.ToCharArray(), sourceCharsCopy, processed); sourceCharsCopy = sourceChars.ToCharArray(); - VerifyBool(Ascii.TryToUpperInPlace(sourceCharsCopy, out processed), processed, expectedUpperChars.ToCharArray(), sourceCharsCopy); + VerifyStatus(Ascii.ToUpperInPlace(sourceCharsCopy, out processed), expectedUpperChars.ToCharArray(), sourceCharsCopy, processed); - static void VerifyStatus(OperationStatus status, ReadOnlySpan expected, ReadOnlySpan actual, int consumed, int written) + static void VerifyStatus(OperationStatus status, ReadOnlySpan expected, ReadOnlySpan actual, int written) { Assert.Equal(OperationStatus.Done, status); - Assert.Equal(expected.Length, consumed); Assert.Equal(expected.Length, written); Assert.Equal(expected.ToArray(), actual.ToArray()); } - - static void VerifyBool(bool result, int processed, T[] expected, T[] actual) - { - Assert.True(result); - Assert.Equal(expected.Length, processed); - Assert.Equal(expected, actual); - } } [Theory] @@ -244,29 +228,28 @@ public static void DestinationTooSmall(string sourceChars, int destinationSize, Assert.Equal(destinationSize, expectedLowerChars.Length); Assert.Equal(expectedLowerChars.Length, expectedUpperChars.Length); - byte[] sourceBytes = System.Text.Encoding.ASCII.GetBytes(sourceChars); - byte[] expectedLowerBytes = System.Text.Encoding.ASCII.GetBytes(expectedLowerChars); - byte[] expectedUpperBytes = System.Text.Encoding.ASCII.GetBytes(expectedUpperChars); + byte[] sourceBytes = Encoding.ASCII.GetBytes(sourceChars); + byte[] expectedLowerBytes = Encoding.ASCII.GetBytes(expectedLowerChars); + byte[] expectedUpperBytes = Encoding.ASCII.GetBytes(expectedUpperChars); char[] destinationChars = new char[destinationSize]; byte[] destinationBytes = new byte[destinationSize]; // char -> char - Verify(Ascii.ToLower(sourceChars, destinationChars, out int consumed, out int written), expectedLowerChars, destinationChars, consumed, written); - Verify(Ascii.ToUpper(sourceChars, destinationChars, out consumed, out written), expectedUpperChars, destinationChars, consumed, written); + Verify(Ascii.ToLower(sourceChars, destinationChars, out int written), expectedLowerChars, destinationChars, written); + Verify(Ascii.ToUpper(sourceChars, destinationChars, out written), expectedUpperChars, destinationChars, written); // char -> byte - Verify(Ascii.ToLower(sourceChars, destinationBytes, out consumed, out written), expectedLowerBytes, destinationBytes, consumed, written); - Verify(Ascii.ToUpper(sourceChars, destinationBytes, out consumed, out written), expectedUpperBytes, destinationBytes, consumed, written); + Verify(Ascii.ToLower(sourceChars, destinationBytes, out written), expectedLowerBytes, destinationBytes, written); + Verify(Ascii.ToUpper(sourceChars, destinationBytes, out written), expectedUpperBytes, destinationBytes, written); // byte -> byte - Verify(Ascii.ToLower(sourceBytes, destinationBytes, out consumed, out written), expectedLowerBytes, destinationBytes, consumed, written); - Verify(Ascii.ToUpper(sourceBytes, destinationBytes, out consumed, out written), expectedUpperBytes, destinationBytes, consumed, written); + Verify(Ascii.ToLower(sourceBytes, destinationBytes, out written), expectedLowerBytes, destinationBytes, written); + Verify(Ascii.ToUpper(sourceBytes, destinationBytes, out written), expectedUpperBytes, destinationBytes, written); // byte -> char - Verify(Ascii.ToLower(sourceBytes, destinationChars, out consumed, out written), expectedLowerChars, destinationChars, consumed, written); - Verify(Ascii.ToUpper(sourceBytes, destinationChars, out consumed, out written), expectedUpperChars, destinationChars, consumed, written); + Verify(Ascii.ToLower(sourceBytes, destinationChars, out written), expectedLowerChars, destinationChars, written); + Verify(Ascii.ToUpper(sourceBytes, destinationChars, out written), expectedUpperChars, destinationChars, written); - static void Verify(OperationStatus status, ReadOnlySpan expected, ReadOnlySpan actual, int consumed, int written) + static void Verify(OperationStatus status, ReadOnlySpan expected, ReadOnlySpan actual, int written) { Assert.Equal(OperationStatus.DestinationTooSmall, status); - Assert.Equal(actual.Length, consumed); Assert.Equal(actual.Length, written); Assert.Equal(expected.ToArray(), actual.ToArray()); } diff --git a/src/libraries/System.Memory/tests/Ascii/FromUtf16Tests.cs b/src/libraries/System.Text.Encoding/tests/Ascii/FromUtf16Tests.cs similarity index 84% rename from src/libraries/System.Memory/tests/Ascii/FromUtf16Tests.cs rename to src/libraries/System.Text.Encoding/tests/Ascii/FromUtf16Tests.cs index b1e221f3195c1d..80a70042abb804 100644 --- a/src/libraries/System.Memory/tests/Ascii/FromUtf16Tests.cs +++ b/src/libraries/System.Text.Encoding/tests/Ascii/FromUtf16Tests.cs @@ -1,19 +1,19 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; using System.Security.Cryptography; using Xunit; -namespace System.Buffers.Text.Tests +namespace System.Text.Tests { public static class FromUtf16Tests { [Fact] public static unsafe void EmptyInputs() { - Assert.Equal(OperationStatus.Done, Ascii.FromUtf16(ReadOnlySpan.Empty, Span.Empty, out int charsConsumed, out int bytesWritten)); - Assert.Equal(0, charsConsumed); - Assert.Equal(charsConsumed, bytesWritten); + Assert.Equal(OperationStatus.Done, Ascii.FromUtf16(ReadOnlySpan.Empty, Span.Empty, out int bytesWritten)); + Assert.Equal(0, bytesWritten); } [Fact] @@ -41,9 +41,8 @@ public static void AllAsciiInput() asciiSpan.Clear(); // remove any data from previous iteration // First, validate that the workhorse saw the incoming data as all-ASCII. - Assert.Equal(OperationStatus.Done, Ascii.FromUtf16(utf16Span.Slice(i), asciiSpan.Slice(i), out int charsConsumed, out int bytesWritten)); - Assert.Equal(128 - i, charsConsumed); - Assert.Equal(charsConsumed, bytesWritten); + Assert.Equal(OperationStatus.Done, Ascii.FromUtf16(utf16Span.Slice(i), asciiSpan.Slice(i), out int bytesWritten)); + Assert.Equal(128 - i, bytesWritten); // Then, validate that the data was transcoded properly. @@ -85,9 +84,8 @@ public static void SomeNonAsciiInput() // correctly saw the data as non-ASCII. utf16Span[i] = '\u0123'; // use U+0123 instead of U+0080 since it catches inappropriate pmovmskb usage - Assert.Equal(OperationStatus.InvalidData, Ascii.FromUtf16(utf16Span, asciiSpan, out int charsConsumed, out int bytesWritten)); - Assert.Equal(i, charsConsumed); - Assert.Equal(charsConsumed, bytesWritten); + Assert.Equal(OperationStatus.InvalidData, Ascii.FromUtf16(utf16Span, asciiSpan, out int bytesWritten)); + Assert.Equal(i, bytesWritten); // Next, validate that the ASCII data was transcoded properly. diff --git a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs b/src/libraries/System.Text.Encoding/tests/Ascii/IsValidByteTests.cs similarity index 81% rename from src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs rename to src/libraries/System.Text.Encoding/tests/Ascii/IsValidByteTests.cs index 00eb0bcbffa60b..07858beb11ca39 100644 --- a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiByteTests.cs +++ b/src/libraries/System.Text.Encoding/tests/Ascii/IsValidByteTests.cs @@ -1,24 +1,24 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; using System.Collections.Generic; using System.Linq; using System.Numerics; using System.Runtime.Intrinsics; using Xunit; -namespace System.Buffers.Text.Tests +namespace System.Text.Tests { - public static class GetIndexOfFirstNonAsciiByteTests + public static class IsValidByteTests { private static byte GetNextValidAsciiByte() => (byte)Random.Shared.Next(0, 127 + 1); private static byte GetNextInvalidAsciiByte() => (byte)Random.Shared.Next(128, 255 + 1); [Fact] - public static void EmptyInput_IndexNotFound() + public static void EmptyInput_ReturnsTrue() { - Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiByte(ReadOnlySpan.Empty)); - Assert.True(Ascii.IsAscii(ReadOnlySpan.Empty)); + Assert.True(Ascii.IsValid(ReadOnlySpan.Empty)); } private static int[] BufferLengths = new[] { @@ -45,11 +45,10 @@ public static IEnumerable AsciiOnlyBuffers [Theory] [MemberData(nameof(AsciiOnlyBuffers))] - public static void AllAscii_IndexNotFound(byte[] buffer) + public static void AllAscii_ReturnsTrue(byte[] buffer) { - Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiByte(buffer)); - Assert.True(Ascii.IsAscii(buffer)); - Assert.All(buffer, character => Assert.True(Ascii.IsAscii(character))); + Assert.True(Ascii.IsValid(buffer)); + Assert.All(buffer, character => Assert.True(Ascii.IsValid(character))); } public static IEnumerable ContainingNonAsciiCharactersBuffers @@ -75,14 +74,13 @@ static byte[] Create(int length, int index) [Theory] [MemberData(nameof(ContainingNonAsciiCharactersBuffers))] - public static void NonAscii_IndexFound(int expectedIndex, byte[] buffer) + public static void NonAsciiAtGivenIndex(int nonAsciiIndex, byte[] buffer) { - Assert.Equal(expectedIndex, Ascii.GetIndexOfFirstNonAsciiByte(buffer)); - Assert.False(Ascii.IsAscii(buffer)); + Assert.False(Ascii.IsValid(buffer)); for (int i = 0; i < buffer.Length; i++) { - Assert.Equal(i != expectedIndex, Ascii.IsAscii(buffer[i])); + Assert.Equal(i != nonAsciiIndex, Ascii.IsValid(buffer[i])); } } @@ -109,7 +107,7 @@ public static void Vector128InnerLoop() for (int i = 2 * Vector128.Count - 1; i >= 0; i--) { bytes[100 + i * 13] = 0x80; // 13 is relatively prime to 32, so it ensures all possible positions are hit - Assert.Equal(100 + i * 13, Ascii.GetIndexOfFirstNonAsciiByte(bytes)); + Assert.False(Ascii.IsValid(bytes)); } } } @@ -136,7 +134,7 @@ public static void Boundaries() for (int i = bytes.Length; i >= 0; i--) { - Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiByte(bytes.Slice(0, i))); + Assert.True(Ascii.IsValid(bytes.Slice(0, i))); } // Then, try it with non-ASCII bytes. @@ -144,7 +142,7 @@ public static void Boundaries() for (int i = bytes.Length; i >= 1; i--) { bytes[i - 1] = 0x80; // set non-ASCII - Assert.Equal(i - 1, Ascii.GetIndexOfFirstNonAsciiByte(bytes.Slice(0, i))); + Assert.False(Ascii.IsValid(bytes.Slice(0, i))); } } } diff --git a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs b/src/libraries/System.Text.Encoding/tests/Ascii/IsValidCharTests.cs similarity index 83% rename from src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs rename to src/libraries/System.Text.Encoding/tests/Ascii/IsValidCharTests.cs index 549294acf77ca9..fced8f2e873275 100644 --- a/src/libraries/System.Memory/tests/Ascii/GetIndexOfFirstNonAsciiCharTests.cs +++ b/src/libraries/System.Text.Encoding/tests/Ascii/IsValidCharTests.cs @@ -1,24 +1,24 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; using System.Collections.Generic; using System.Linq; using System.Numerics; using System.Runtime.Intrinsics; using Xunit; -namespace System.Buffers.Text.Tests +namespace System.Text.Tests { - public static class GetIndexOfFirstNonAsciiCharTests + public static class IsValidCharTests { private static char GetNextValidAsciiChar() => (char)Random.Shared.Next(0, 127 + 1); private static char GetNextInvalidAsciiChar() => (char)Random.Shared.Next(128, ushort.MaxValue + 1); [Fact] - public static void EmptyInput_IndexNotFound() + public static void EmptyInput_ReturnsTrue() { - Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiChar(ReadOnlySpan.Empty)); - Assert.True(Ascii.IsAscii(ReadOnlySpan.Empty)); + Assert.True(Ascii.IsValid(ReadOnlySpan.Empty)); } private static int[] BufferLengths = new[] { @@ -45,11 +45,10 @@ public static IEnumerable AsciiOnlyBuffers [Theory] [MemberData(nameof(AsciiOnlyBuffers))] - public static void AllAscii_IndexNotFound(char[] buffer) + public static void AllAscii_ReturnsTrue(char[] buffer) { - Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiChar(buffer)); - Assert.True(Ascii.IsAscii(buffer)); - Assert.All(buffer, character => Assert.True(Ascii.IsAscii(character))); + Assert.True(Ascii.IsValid(buffer)); + Assert.All(buffer, character => Assert.True(Ascii.IsValid(character))); } public static IEnumerable ContainingNonAsciiCharactersBuffers @@ -75,14 +74,13 @@ static char[] Create(int length, int index) [Theory] [MemberData(nameof(ContainingNonAsciiCharactersBuffers))] - public static void NonAscii_IndexFound(int expectedIndex, char[] buffer) + public static void NonAsciiAtGivenIndex(int nonAsciiIndex, char[] buffer) { - Assert.Equal(expectedIndex, Ascii.GetIndexOfFirstNonAsciiChar(buffer)); - Assert.False(Ascii.IsAscii(buffer)); + Assert.False(Ascii.IsValid(buffer)); for (int i = 0; i < buffer.Length; i++) { - Assert.Equal(i != expectedIndex, Ascii.IsAscii(buffer[i])); + Assert.Equal(i != nonAsciiIndex, Ascii.IsValid(buffer[i])); } } @@ -113,7 +111,7 @@ public static void Vector128InnerLoop() for (int i = 2 * Vector128.Count - 1; i >= 0; i--) { chars[100 + i * 13] = '\u0123'; // 13 is relatively prime to 32, so it ensures all possible positions are hit - Assert.Equal(100 + i * 13, Ascii.GetIndexOfFirstNonAsciiChar(chars)); + Assert.False(Ascii.IsValid(chars)); } } } @@ -144,7 +142,7 @@ public static void Boundaries() for (int i = chars.Length; i >= 0; i--) { - Assert.Equal(-1, Ascii.GetIndexOfFirstNonAsciiChar(chars.Slice(0, i))); + Assert.True(Ascii.IsValid(chars.Slice(0, i))); } // Then, try it with non-ASCII bytes. @@ -152,7 +150,7 @@ public static void Boundaries() for (int i = chars.Length; i >= 1; i--) { chars[i - 1] = '\u0123'; // set non-ASCII - Assert.Equal(i - 1, Ascii.GetIndexOfFirstNonAsciiChar(chars.Slice(0, i))); + Assert.False(Ascii.IsValid(chars.Slice(0, i))); } } } diff --git a/src/libraries/System.Memory/tests/Ascii/ToUtf16Tests.cs b/src/libraries/System.Text.Encoding/tests/Ascii/ToUtf16Tests.cs similarity index 86% rename from src/libraries/System.Memory/tests/Ascii/ToUtf16Tests.cs rename to src/libraries/System.Text.Encoding/tests/Ascii/ToUtf16Tests.cs index 81646e2145619f..be9c71e14fbb82 100644 --- a/src/libraries/System.Memory/tests/Ascii/ToUtf16Tests.cs +++ b/src/libraries/System.Text.Encoding/tests/Ascii/ToUtf16Tests.cs @@ -1,19 +1,19 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers; using System.Runtime.InteropServices; using System.Security.Cryptography; using Xunit; -namespace System.Buffers.Text.Tests +namespace System.Text.Tests { public static class ToUtf16Tests { [Fact] public static void EmptyInputs() { - Assert.Equal(OperationStatus.Done, Ascii.ToUtf16(ReadOnlySpan.Empty, Span.Empty, out int bytesConsumed, out int charsWritten)); - Assert.Equal(0, bytesConsumed); + Assert.Equal(OperationStatus.Done, Ascii.ToUtf16(ReadOnlySpan.Empty, Span.Empty, out int charsWritten)); Assert.Equal(0, charsWritten); } @@ -43,9 +43,8 @@ public static void AllAsciiInput() // First, validate that the workhorse saw the incoming data as all-ASCII. - Assert.Equal(OperationStatus.Done, Ascii.ToUtf16(asciiSpan.Slice(i), utf16Span.Slice(i), out int bytesConsumed, out int charsWritten)); - Assert.Equal(128 - i, bytesConsumed); - Assert.Equal(bytesConsumed, charsWritten); + Assert.Equal(OperationStatus.Done, Ascii.ToUtf16(asciiSpan.Slice(i), utf16Span.Slice(i), out int charsWritten)); + Assert.Equal(128 - i, charsWritten); // Then, validate that the data was transcoded properly. @@ -88,9 +87,8 @@ public static void SomeNonAsciiInput() asciiSpan[i] |= (byte)0x80; - Assert.Equal(OperationStatus.InvalidData, Ascii.ToUtf16(asciiSpan, utf16Span, out int bytesConsumed, out int charsWritten)); - Assert.Equal(i, bytesConsumed); - Assert.Equal(bytesConsumed, charsWritten); + Assert.Equal(OperationStatus.InvalidData, Ascii.ToUtf16(asciiSpan, utf16Span, out int charsWritten)); + Assert.Equal(i, charsWritten); // Next, validate that the ASCII data was transcoded properly. diff --git a/src/libraries/System.Memory/tests/Ascii/TrimTests.cs b/src/libraries/System.Text.Encoding/tests/Ascii/TrimTests.cs similarity index 98% rename from src/libraries/System.Memory/tests/Ascii/TrimTests.cs rename to src/libraries/System.Text.Encoding/tests/Ascii/TrimTests.cs index ec30d971d2a8cc..5873942d87a5dd 100644 --- a/src/libraries/System.Memory/tests/Ascii/TrimTests.cs +++ b/src/libraries/System.Text.Encoding/tests/Ascii/TrimTests.cs @@ -1,10 +1,9 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -using System.Text; using Xunit; -namespace System.Buffers.Text.Tests +namespace System.Text.Tests { public static class TrimTests { diff --git a/src/libraries/System.Text.Encoding/tests/System.Text.Encoding.Tests.csproj b/src/libraries/System.Text.Encoding/tests/System.Text.Encoding.Tests.csproj index 2e59a6b32f4b06..305e53a58290dd 100644 --- a/src/libraries/System.Text.Encoding/tests/System.Text.Encoding.Tests.csproj +++ b/src/libraries/System.Text.Encoding/tests/System.Text.Encoding.Tests.csproj @@ -9,6 +9,12 @@ true + + + + + + diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index d31915af61f99c..4315efc0ae2f42 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -990,7 +990,7 @@ public static bool IsAscii(ReadOnlySpan s) return true; #else - return Buffers.Text.Ascii.IsAscii(s); + return Text.Ascii.IsValid(s); #endif } From bb0a27213637984f5b80a7f7d073b7ee3376e091 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Wed, 7 Dec 2022 16:53:55 +0100 Subject: [PATCH 43/46] add missing XML docs --- .../src/System/Text/Ascii.CaseConversion.cs | 92 +++++++++++++++++++ .../src/System/Text/Ascii.Trimming.cs | 23 +++++ 2 files changed, 115 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs index 5a43117f586151..66b8c6e0fde783 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.CaseConversion.cs @@ -14,50 +14,142 @@ namespace System.Text { public static partial class Ascii { + /// + /// Copies text from a source buffer to a destination buffer, converting + /// ASCII letters to uppercase during the copy. + /// + /// The source buffer from which ASCII text is read. + /// The destination buffer to which uppercase text is written. + /// The number of bytes actually written to . It's the same as the number of bytes actually read from . + /// An describing the result of the operation. + /// In-place conversion is prohibited, please use for that. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int bytesWritten) => ChangeCase(source, destination, out bytesWritten); + /// + /// Copies text from a source buffer to a destination buffer, converting + /// ASCII letters to uppercase during the copy. + /// + /// The source buffer from which ASCII text is read. + /// The destination buffer to which uppercase text is written. + /// The number of characters actually written to . It's the same as the number of characters actually read from . + /// An describing the result of the operation. + /// In-place conversion is prohibited, please use for that. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int charsWritten) => ChangeCase(MemoryMarshal.Cast(source), MemoryMarshal.Cast(destination), out charsWritten); + /// + /// Copies text from a source buffer to a destination buffer, converting + /// ASCII letters to uppercase during the copy. + /// + /// The source buffer from which ASCII text is read. + /// The destination buffer to which uppercase text is written. + /// The number of characters actually written to . It's the same as the number of bytes actually read from . + /// An describing the result of the operation. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int charsWritten) => ChangeCase(source, MemoryMarshal.Cast(destination), out charsWritten); + /// + /// Copies text from a source buffer to a destination buffer, converting + /// ASCII letters to uppercase during the copy. + /// + /// The source buffer from which ASCII text is read. + /// The destination buffer to which uppercase text is written. + /// The number of bytes actually written to . It's the same as the number of characters actually read from . + /// An describing the result of the operation. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToUpper(ReadOnlySpan source, Span destination, out int bytesWritten) => ChangeCase(MemoryMarshal.Cast(source), destination, out bytesWritten); + /// + /// Copies text from a source buffer to a destination buffer, converting + /// ASCII letters to lowercase during the copy. + /// + /// The source buffer from which ASCII text is read. + /// The destination buffer to which lowercase text is written. + /// The number of bytes actually written to . It's the same as the number of bytes actually read from . + /// An describing the result of the operation. + /// In-place conversion is prohibited, please use for that. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int bytesWritten) => ChangeCase(source, destination, out bytesWritten); + /// + /// Copies text from a source buffer to a destination buffer, converting + /// ASCII letters to lowercase during the copy. + /// + /// The source buffer from which ASCII text is read. + /// The destination buffer to which lowercase text is written. + /// The number of characters actually written to . It's the same as the number of characters actually read from . + /// An describing the result of the operation. + /// In-place conversion is prohibited, please use for that. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int charsWritten) => ChangeCase(MemoryMarshal.Cast(source), MemoryMarshal.Cast(destination), out charsWritten); + /// + /// Copies text from a source buffer to a destination buffer, converting + /// ASCII letters to lowercase during the copy. + /// + /// The source buffer from which ASCII text is read. + /// The destination buffer to which lowercase text is written. + /// The number of characters actually written to . It's the same as the number of bytes actually read from . + /// An describing the result of the operation. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int charsWritten) => ChangeCase(source, MemoryMarshal.Cast(destination), out charsWritten); + /// + /// Copies text from a source buffer to a destination buffer, converting + /// ASCII letters to lowercase during the copy. + /// + /// The source buffer from which ASCII text is read. + /// The destination buffer to which lowercase text is written. + /// The number of bytes actually written to . It's the same as the number of characters actually read from . + /// An describing the result of the operation. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToLower(ReadOnlySpan source, Span destination, out int bytesWritten) => ChangeCase(MemoryMarshal.Cast(source), destination, out bytesWritten); + /// + /// Performs in-place uppercase conversion. + /// + /// The ASCII text buffer. + /// The number of processed bytes. + /// An describing the result of the operation. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToLowerInPlace(Span value, out int bytesWritten) => ChangeCase(value, out bytesWritten); + /// + /// Performs in-place uppercase conversion. + /// + /// The ASCII text buffer. + /// The number of processed characters. + /// An describing the result of the operation. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToLowerInPlace(Span value, out int charsWritten) => ChangeCase(MemoryMarshal.Cast(value), out charsWritten); + /// + /// Performs in-place lowercase conversion. + /// + /// The ASCII text buffer. + /// The number of processed bytes. + /// An describing the result of the operation. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToUpperInPlace(Span value, out int bytesWritten) => ChangeCase(value, out bytesWritten); + /// + /// Performs in-place lowercase conversion. + /// + /// The ASCII text buffer. + /// The number of processed characters. + /// An describing the result of the operation. [MethodImpl(MethodImplOptions.AggressiveInlining)] public static OperationStatus ToUpperInPlace(Span value, out int charsWritten) => ChangeCase(MemoryMarshal.Cast(value), out charsWritten); diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Trimming.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Trimming.cs index 6176967023e529..f175db0b2d8262 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Trimming.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.Trimming.cs @@ -7,11 +7,34 @@ namespace System.Text { public static partial class Ascii { + /// + /// Trims all leading and trailing ASCII whitespaces from the buffer. + /// + /// The ASCII buffer. + /// The Range of the untrimmed data. public static Range Trim(ReadOnlySpan value) => TrimHelper(value, TrimType.Both); + + /// public static Range Trim(ReadOnlySpan value) => TrimHelper(value, TrimType.Both); + + /// + /// Trims all leading ASCII whitespaces from the buffer. + /// + /// The ASCII buffer. + /// The Range of the untrimmed data. public static Range TrimStart(ReadOnlySpan value) => TrimHelper(value, TrimType.Head); + + /// public static Range TrimStart(ReadOnlySpan value) => TrimHelper(value, TrimType.Head); + + /// + /// Trims all trailing ASCII whitespaces from the buffer. + /// + /// The ASCII buffer. + /// The Range of the untrimmed data. public static Range TrimEnd(ReadOnlySpan value) => TrimHelper(value, TrimType.Tail); + + /// public static Range TrimEnd(ReadOnlySpan value) => TrimHelper(value, TrimType.Tail); private static Range TrimHelper(ReadOnlySpan value, TrimType trimType) From c0f38d1689e4adedd661385ba8fe7c931fad3ba6 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Thu, 8 Dec 2022 00:40:40 +0100 Subject: [PATCH 44/46] cleanup --- .../Common/src/System/CharArrayHelpers.cs | 1 - .../src/System/Net/WebClient.cs | 1 - .../RandomizedStringEqualityComparer.cs | 2 +- .../src/System/Marvin.OrdinalIgnoreCase.cs | 82 +------------------ .../src/System/String.Comparison.cs | 4 +- 5 files changed, 4 insertions(+), 86 deletions(-) diff --git a/src/libraries/Common/src/System/CharArrayHelpers.cs b/src/libraries/Common/src/System/CharArrayHelpers.cs index 95dad91071a300..371754c4386a36 100644 --- a/src/libraries/Common/src/System/CharArrayHelpers.cs +++ b/src/libraries/Common/src/System/CharArrayHelpers.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; -using System.Runtime.CompilerServices; namespace System { diff --git a/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs b/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs index a3441908dec34d..37ea6293e8b50a 100644 --- a/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs +++ b/src/libraries/System.Net.WebClient/src/System/Net/WebClient.cs @@ -2,7 +2,6 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers; -using System.Buffers.Text; using System.Collections.Specialized; using System.ComponentModel; using System.Diagnostics; diff --git a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/RandomizedStringEqualityComparer.cs b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/RandomizedStringEqualityComparer.cs index e29cfca31390c7..45fd297e5af275 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/RandomizedStringEqualityComparer.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/RandomizedStringEqualityComparer.cs @@ -92,7 +92,7 @@ public override int GetHashCode(string? obj) return Marvin.ComputeHash32OrdinalIgnoreCase( ref obj.GetRawStringData(), obj.Length, - _seed.p0, _seed.p1, out _); + _seed.p0, _seed.p1); } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs b/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs index 96c7b00ebca3c1..1888a5f0c612a3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Marvin.OrdinalIgnoreCase.cs @@ -15,9 +15,7 @@ internal static partial class Marvin /// Compute a Marvin OrdinalIgnoreCase hash and collapse it into a 32-bit hash. /// n.b. is specified as char count, not byte count. /// - /// Additional is needed as it's impossible to distinguish - /// whether method returned 0 because it found some non-ASCII char or whether it calculated such hashcode. - public static int ComputeHash32OrdinalIgnoreCase(ref char data, int count, uint p0, uint p1, out bool nonAsciiFound, bool stopOnNonAscii = false) + public static int ComputeHash32OrdinalIgnoreCase(ref char data, int count, uint p0, uint p1) { uint ucount = (uint)count; // in chars nuint byteOffset = 0; // in bytes @@ -73,18 +71,10 @@ public static int ComputeHash32OrdinalIgnoreCase(ref char data, int count, uint Block(ref p0, ref p1); Block(ref p0, ref p1); - nonAsciiFound = false; return (int)(p1 ^ p0); NotAscii: Debug.Assert(ucount <= int.MaxValue); // this should fit into a signed int - - nonAsciiFound = true; - if (stopOnNonAscii) - { - return 0; - } - return ComputeHash32OrdinalIgnoreCaseSlow(ref Unsafe.AddByteOffset(ref data, byteOffset), (int)ucount, p0, p1); } @@ -110,75 +100,5 @@ private static int ComputeHash32OrdinalIgnoreCaseSlow(ref char data, int count, return hash; } - - /// - /// Compute a Marvin OrdinalIgnoreCase hash and collapse it into a 32-bit hash. - /// n.b. is specified as byte count. - /// - /// True if all bytes were ASCII, false otherwise - internal static bool TryComputeHash32ForAsciiIgnoreCase(ref byte data, int count, uint p0, uint p1, out int hashCode) - { - uint ucount = (uint)count; // in bytes - nuint byteOffset = 0; // in bytes - uint tempValue; - - // We operate on 32-bit integers (four bytes) at a time. - - while (ucount >= 4) - { - tempValue = Unsafe.ReadUnaligned(ref Unsafe.AddByteOffset(ref data, byteOffset)); - if (!Utf8Utility.AllBytesInUInt32AreAscii(tempValue)) - { - goto NotAscii; - } - p0 += Utf8Utility.ConvertAllAsciiBytesInUInt32ToUppercase(tempValue); - Block(ref p0, ref p1); - - byteOffset += 4; - ucount -= 4; - } - - while (ucount > 0) - { - tempValue = Unsafe.AddByteOffset(ref data, byteOffset); - if (tempValue > 0x7Fu) - { - goto NotAscii; - } - - if (BitConverter.IsLittleEndian) - { - // addition is written with -0x80u to allow fall-through to next statement rather than jmp past it - p0 += Utf8Utility.ConvertAllAsciiBytesInUInt32ToUppercase(tempValue) + (0x800000u - 0x80u); - } - else - { - // as above, addition is modified to allow fall-through to next statement rather than jmp past it - p0 += (Utf8Utility.ConvertAllAsciiBytesInUInt32ToUppercase(tempValue) << 16) + 0x8000u - 0x80000000u; - } - - byteOffset += 1; - ucount -= 1; - } - if (BitConverter.IsLittleEndian) - { - p0 += 0x80u; - } - else - { - p0 += 0x80000000u; - } - - Block(ref p0, ref p1); - Block(ref p0, ref p1); - - hashCode = (int)(p1 ^ p0); - return true; - - NotAscii: - Debug.Assert(ucount <= int.MaxValue); // this should fit into a signed int - hashCode = 0; - return false; - } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs index 09353a2ec473bb..b3758d335c9006 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Comparison.cs @@ -764,7 +764,7 @@ public override int GetHashCode() internal int GetHashCodeOrdinalIgnoreCase() { ulong seed = Marvin.DefaultSeed; - return Marvin.ComputeHash32OrdinalIgnoreCase(ref _firstChar, _stringLength /* in chars, not bytes */, (uint)seed, (uint)(seed >> 32), out _); + return Marvin.ComputeHash32OrdinalIgnoreCase(ref _firstChar, _stringLength /* in chars, not bytes */, (uint)seed, (uint)(seed >> 32)); } // A span-based equivalent of String.GetHashCode(). Computes an ordinal hash code. @@ -807,7 +807,7 @@ public static int GetHashCode(ReadOnlySpan value, StringComparison compari internal static int GetHashCodeOrdinalIgnoreCase(ReadOnlySpan value) { ulong seed = Marvin.DefaultSeed; - return Marvin.ComputeHash32OrdinalIgnoreCase(ref MemoryMarshal.GetReference(value), value.Length /* in chars, not bytes */, (uint)seed, (uint)(seed >> 32), out _); + return Marvin.ComputeHash32OrdinalIgnoreCase(ref MemoryMarshal.GetReference(value), value.Length /* in chars, not bytes */, (uint)seed, (uint)(seed >> 32)); } // Use this if and only if 'Denial of Service' attacks are not a concern (i.e. never used for free-form user input), From da943531e4d3760d2bd28b1714183b0abb9a9233 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Fri, 9 Dec 2022 12:12:00 +0100 Subject: [PATCH 45/46] address code review feedback --- .../System.Net.HttpListener/src/System/Net/HttpListener.cs | 7 +++---- .../System.Private.CoreLib/src/System/Text/Ascii.cs | 4 ++-- .../src/System/Text/RegularExpressions/RegexCharClass.cs | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.Net.HttpListener/src/System/Net/HttpListener.cs b/src/libraries/System.Net.HttpListener/src/System/Net/HttpListener.cs index 8f8a24403b41c3..1094d50db0c29b 100644 --- a/src/libraries/System.Net.HttpListener/src/System/Net/HttpListener.cs +++ b/src/libraries/System.Net.HttpListener/src/System/Net/HttpListener.cs @@ -3,6 +3,7 @@ using System.Buffers; using System.Collections; +using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.Security.Authentication.ExtendedProtection; using System.Text; @@ -205,10 +206,8 @@ static string CreateRegisteredPrefix(string uriPrefix, int j, int i) toLowerLength = destination.Length; } - if (Ascii.ToLowerInPlace(destination.Slice(0, toLowerLength), out _) != OperationStatus.Done) - { - throw new IndexOutOfRangeException(); // backward compat for non-ASCII characters - } + OperationStatus operationStatus = Ascii.ToLowerInPlace(destination.Slice(0, toLowerLength), out _); + Debug.Assert(operationStatus == OperationStatus.Done); }); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs index e88dec2c9e556d..fb8b9281fcc411 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs @@ -58,7 +58,7 @@ internal static unsafe int GetIndexOfFirstNonAsciiChar(ReadOnlySpan buffer /// The value to inspect. /// True if contains only ASCII bytes or is /// empty; False otherwise. - public static unsafe bool IsValid(ReadOnlySpan value) => value.IsEmpty || GetIndexOfFirstNonAsciiByte(value) < 0; + public static unsafe bool IsValid(ReadOnlySpan value) => GetIndexOfFirstNonAsciiByte(value) < 0; /// /// Determines whether the provided value contains only ASCII chars. @@ -66,7 +66,7 @@ internal static unsafe int GetIndexOfFirstNonAsciiChar(ReadOnlySpan buffer /// The value to inspect. /// True if contains only ASCII chars or is /// empty; False otherwise. - public static unsafe bool IsValid(ReadOnlySpan value) => value.IsEmpty || GetIndexOfFirstNonAsciiChar(value) < 0; + public static unsafe bool IsValid(ReadOnlySpan value) => GetIndexOfFirstNonAsciiChar(value) < 0; /// /// Determines whether the provided value is ASCII byte. diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 999b1c76cb210d..d390d644911625 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -992,7 +992,7 @@ public static bool IsAscii(ReadOnlySpan s) return true; #else - return Text.Ascii.IsValid(s); + return Ascii.IsValid(s); #endif } From 4483baf8a877d761ec1d838b51bb172091d5a583 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Fri, 9 Dec 2022 12:26:38 +0100 Subject: [PATCH 46/46] Update src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs Co-authored-by: Miha Zupan --- .../System.Private.Uri/src/System/DomainNameHelper.cs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs index c974fb59f192a7..2cfe8bb79b3ca1 100644 --- a/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs +++ b/src/libraries/System.Private.Uri/src/System/DomainNameHelper.cs @@ -198,11 +198,7 @@ public static string IdnEquivalent(string hostname) if (Ascii.IsValid(hostname)) { // just lowercase for ascii - return string.Create(hostname.Length, hostname, static (chars, asciiHostName) => - { - OperationStatus status = Ascii.ToLower(asciiHostName, chars, out _); - Debug.Assert(status == OperationStatus.Done); - }); + return hostname.ToLowerInvariant(); } string bidiStrippedHost = UriHelper.StripBidiControlCharacters(hostname, hostname);