From cdf4d7fd25ddced07285931c565c161b68f6505b Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Fri, 23 Aug 2024 19:28:36 +0200 Subject: [PATCH 1/5] Add SearchValues implementation for values with unique low nibbles --- .../System.Memory/tests/Span/SearchValues.cs | 7 + .../System.Private.CoreLib.Shared.projitems | 2 + .../SearchValues/IndexOfAnyAsciiSearcher.cs | 826 ++++++++++++++++-- .../src/System/SearchValues/SearchValues.cs | 49 +- .../UniqueLowNibbleByteSearchValues.cs | 74 ++ .../UniqueLowNibbleCharSearchValues.cs | 74 ++ 6 files changed, 948 insertions(+), 84 deletions(-) create mode 100644 src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleByteSearchValues.cs create mode 100644 src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleCharSearchValues.cs diff --git a/src/libraries/System.Memory/tests/Span/SearchValues.cs b/src/libraries/System.Memory/tests/Span/SearchValues.cs index 8f39a95bd3c0f4..00e9c71e3dfb75 100644 --- a/src/libraries/System.Memory/tests/Span/SearchValues.cs +++ b/src/libraries/System.Memory/tests/Span/SearchValues.cs @@ -35,6 +35,8 @@ public static IEnumerable Values_MemberData() "abcd", "aeio", "aeiou", + "Aabc", + "Aabcd", "abceiou", "123456789", "123456789123", @@ -82,6 +84,11 @@ public static IEnumerable Values_MemberData() { yield return Pair(value); yield return Pair('a' + value); + yield return Pair('\0' + value); + yield return Pair('\u0001' + value); + yield return Pair('\u00FE' + value); + yield return Pair('\u00FF' + value); + yield return Pair('\uFF00' + value); // Test some more duplicates if (value.Length > 0) diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 72f6a1579d80fd..b686c160d96929 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -440,6 +440,8 @@ + + diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs index 36965c24da7e54..9e9034e0faf8b2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs @@ -16,21 +16,37 @@ internal static class IndexOfAnyAsciiSearcher public struct AsciiState(Vector128 bitmap, BitVector256 lookup) { public Vector256 Bitmap = Vector256.Create(bitmap); - public BitVector256 Lookup = lookup; + public readonly BitVector256 Lookup = lookup; public readonly AsciiState CreateInverse() => new AsciiState(~Bitmap._lower, Lookup.CreateInverse()); } - public struct AnyByteState(Vector128 bitmap0, Vector128 bitmap1, BitVector256 lookup) + public readonly struct AnyByteState(Vector128 bitmap0, Vector128 bitmap1, BitVector256 lookup) { - public Vector256 Bitmap0 = Vector256.Create(bitmap0); - public Vector256 Bitmap1 = Vector256.Create(bitmap1); - public BitVector256 Lookup = lookup; + public readonly Vector256 Bitmap0 = Vector256.Create(bitmap0); + public readonly Vector256 Bitmap1 = Vector256.Create(bitmap1); + public readonly BitVector256 Lookup = lookup; + } + + public readonly struct UniqueLowNibbleState(Vector128 valuesByLowNibble, BitVector256 lookup) + { + public readonly Vector256 ValuesByLowNibble = Vector256.Create(valuesByLowNibble); + public readonly BitVector256 Lookup = lookup; } internal static bool IsVectorizationSupported => Ssse3.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static unsafe void SetBitmapBit(byte* bitmap, int value) + { + Debug.Assert((uint)value <= 127); + + int highNibble = value >> 4; + int lowNibble = value & 0xF; + bitmap[(uint)lowNibble] |= (byte)(1 << highNibble); + } + internal static unsafe void ComputeAnyByteState(ReadOnlySpan values, out AnyByteState state) { // The exact format of these bitmaps differs from the other ComputeBitmap overloads as it's meant for the full [0, 255] range algorithm. @@ -46,16 +62,13 @@ internal static unsafe void ComputeAnyByteState(ReadOnlySpan values, out A { lookupLocal.Set(b); - int highNibble = b >> 4; - int lowNibble = b & 0xF; - - if (highNibble < 8) + if (b < 128) { - bitmapLocal0[(uint)lowNibble] |= (byte)(1 << highNibble); + SetBitmapBit(bitmapLocal0, b); } else { - bitmapLocal1[(uint)lowNibble] |= (byte)(1 << (highNibble - 8)); + SetBitmapBit(bitmapLocal1, b - 128); } } @@ -81,14 +94,87 @@ internal static unsafe void ComputeAsciiState(ReadOnlySpan values, out Asc } lookupLocal.Set(value); + SetBitmapBit(bitmapLocal, value); + } + + state = new AsciiState(bitmapSpace, lookupLocal); + } + + public static bool CanUseUniqueLowNibbleSearch(ReadOnlySpan values, int minInclusive, int maxInclusive) + where T : struct, IUnsignedNumber + { + Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(char)); - int highNibble = value >> 4; - int lowNibble = value & 0xF; + if (!IsVectorizationSupported || values.Length > 16) + { + return false; + } - bitmapLocal[(uint)lowNibble] |= (byte)(1 << highNibble); + if (Ssse3.IsSupported && maxInclusive > 127) + { + // We could support [1, 254] if we did the "& 0xF" before calling into Shuffle in IndexOfAnyLookupUniqueLowNibbleCore. + // We currently optimize for the common case of ASCII characters instead, saving an instruction there. + return false; } - state = new AsciiState(bitmapSpace, lookupLocal); + if (typeof(T) == typeof(char)) + { + if (maxInclusive >= byte.MaxValue) + { + // When packing UTF-16 characters into bytes, values may saturate to 255 (false positives), hence ">=" instead of ">". + return false; + } + + if ((Ssse3.IsSupported || PackedSimd.IsSupported) && minInclusive == 0) + { + // When packing UTF-16 characters into bytes, values may saturate to 0 (false positives). + // We could also support 0 if we flowed Ssse3AndWasmHandleZeroInNeedle through. + return false; + } + } + + // We assume there are no duplicates to simplify the logic (if there are any, they just won't use this searching approach). + int seenNibbles = 0; + + foreach (T tValue in values) + { + int bit = 1 << (int.CreateChecked(tValue) & 0xF); + + if ((seenNibbles & bit) != 0) + { + // We already saw a value with the same low nibble. + return false; + } + + seenNibbles |= bit; + } + + return true; + } + + public static void ComputeUniqueLowNibbleState(ReadOnlySpan values, out UniqueLowNibbleState state) + where T : struct, IUnsignedNumber + { + Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(char)); + + Vector128 valuesByLowNibble = default; + BitVector256 lookup = default; + + foreach (T tValue in values) + { + byte value = byte.CreateTruncating(tValue); + lookup.Set(value); + valuesByLowNibble.SetElementUnsafe(value & 0xF, value); + } + + if (valuesByLowNibble.GetElement(0) == 0 && !lookup.Contains(0)) + { + // Avoid false positives for the zero character if no other character has a low nibble of zero. + // We can replace it with any other byte that has a non-zero low nibble. + valuesByLowNibble.SetElementUnsafe(0, (byte)1); + } + + state = new UniqueLowNibbleState(valuesByLowNibble, lookup); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -104,10 +190,7 @@ private static unsafe bool TryComputeBitmap(ReadOnlySpan values, byte* bit return false; } - int highNibble = c >> 4; - int lowNibble = c & 0xF; - - bitmapLocal[(uint)lowNibble] |= (byte)(1 << highNibble); + SetBitmapBit(bitmapLocal, c); } needleContainsZero = (bitmap[0] & 1) != 0; @@ -1017,73 +1100,682 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace } [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Sse2))] + [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - private static Vector128 IndexOfAnyLookup(Vector128 source0, Vector128 source1, Vector128 bitmapLookup) - where TNegator : struct, INegator - where TOptimizations : struct, IOptimizations - { - Vector128 source = TOptimizations.PackSources(source0.AsUInt16(), source1.AsUInt16()); - - Vector128 result = IndexOfAnyLookupCore(source, bitmapLookup); - - return TNegator.NegateIfNeeded(result); - } + public static bool ContainsAny(ref short searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) + where TNegator : struct, INegator => + IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - private static Vector128 IndexOfAnyLookupCore(Vector128 source, Vector128 bitmapLookup) + public static int IndexOfAny(ref short searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) + where TNegator : struct, INegator => + IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + private static TResult IndexOfAnyCore(ref short searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) + where TResult : struct + where TNegator : struct, INegator + where TResultMapper : struct, IResultMapper { - // On X86, the Ssse3.Shuffle instruction will already perform an implicit 'AND 0xF' on the indices, so we can skip it. - // For values above 127, Ssse3.Shuffle will also set the result to 0. This is fine as we don't want non-ASCII values to match anyway. - Vector128 lowNibbles = Ssse3.IsSupported - ? source - : source & Vector128.Create((byte)0xF); + ref short currentSearchSpace = ref searchSpace; - // On ARM, we have an instruction for an arithmetic right shift of 1-byte signed values. - // The shift will map values above 127 to values above 16, which the shuffle will then map to 0. - // On X86 and WASM, use a logical right shift instead. - Vector128 highNibbles = AdvSimd.IsSupported - ? AdvSimd.ShiftRightArithmetic(source.AsSByte(), 4).AsByte() - : source >>> 4; + if (searchSpaceLength < Vector128.Count) + { + ref short searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength); - // The bitmapLookup represents a 8x16 table of bits, indicating whether a character is present in the needle. - // Lookup the rows via the lower nibble and the column via the higher nibble. - Vector128 bitMask = Vector128.ShuffleUnsafe(bitmapLookup, lowNibbles); + while (!Unsafe.AreSame(ref currentSearchSpace, ref searchSpaceEnd)) + { + char c = (char)currentSearchSpace; + if (TNegator.NegateIfNeeded(state.Lookup.Contains128(c))) + { + return TResultMapper.ScalarResult(ref searchSpace, ref currentSearchSpace); + } - // For values above 127, the high nibble will be above 7. We construct the positions vector for the shuffle such that those values map to 0. - Vector128 bitPositions = Vector128.ShuffleUnsafe(Vector128.Create(0x8040201008040201, 0).AsByte(), highNibbles); + currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 1); + } - Vector128 result = bitMask & bitPositions; - return result; + return TResultMapper.NotFound; + } + +#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx2.IsSupported is false + if (Avx2.IsSupported && searchSpaceLength > 2 * Vector128.Count) +#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough + { + Vector256 values256 = state.ValuesByLowNibble; + + if (searchSpaceLength > 2 * Vector256.Count) + { + // Process the input in chunks of 32 characters (2 * Vector256). + // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector256. + // As packing two Vector256s into a Vector256 is cheap compared to the lookup, we can effectively double the throughput. + // If the input length is a multiple of 32, don't consume the last 32 characters in this loop. + // Let the fallback below handle it instead. This is why the condition is + // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan". + ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - (2 * Vector256.Count)); + + do + { + Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); + Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); + + Vector256 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values256); + if (result != Vector256.Zero) + { + return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); + } + + currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count); + } + while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd)); + } + + // We have 1-32 characters remaining. Process the first and last vector in the search space. + // They may overlap, but we'll handle that in the index calculation if we do get a match. + Debug.Assert(searchSpaceLength >= Vector256.Count, "We expect that the input is long enough for us to load a whole vector."); + { + ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector256.Count); + + ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd) + ? ref oneVectorAwayFromEnd + : ref currentSearchSpace; + + Vector256 source0 = Vector256.LoadUnsafe(ref firstVector); + Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd); + + Vector256 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values256); + if (result != Vector256.Zero) + { + return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); + } + } + + return TResultMapper.NotFound; + } + + Vector128 values = state.ValuesByLowNibble._lower; + +#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx2.IsSupported is false + if (!Avx2.IsSupported && searchSpaceLength > 2 * Vector128.Count) +#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough + { + // Process the input in chunks of 16 characters (2 * Vector128). + // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector128. + // As packing two Vector128s into a Vector128 is cheap compared to the lookup, we can effectively double the throughput. + // If the input length is a multiple of 16, don't consume the last 16 characters in this loop. + // Let the fallback below handle it instead. This is why the condition is + // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan". + ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - (2 * Vector128.Count)); + + do + { + Vector128 source0 = Vector128.LoadUnsafe(ref currentSearchSpace); + Vector128 source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128.Count); + + Vector128 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values); + if (result != Vector128.Zero) + { + return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); + } + + currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector128.Count); + } + while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd)); + } + + // We have 1-16 characters remaining. Process the first and last vector in the search space. + // They may overlap, but we'll handle that in the index calculation if we do get a match. + Debug.Assert(searchSpaceLength >= Vector128.Count, "We expect that the input is long enough for us to load a whole vector."); + { + ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector128.Count); + + ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd) + ? ref oneVectorAwayFromEnd + : ref currentSearchSpace; + + Vector128 source0 = Vector128.LoadUnsafe(ref firstVector); + Vector128 source1 = Vector128.LoadUnsafe(ref oneVectorAwayFromEnd); + + Vector128 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values); + if (result != Vector128.Zero) + { + return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); + } + } + + return TResultMapper.NotFound; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx2))] - private static Vector256 IndexOfAnyLookup(Vector256 source0, Vector256 source1, Vector256 bitmapLookup) + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + public static int LastIndexOfAny(ref short searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) where TNegator : struct, INegator - where TOptimizations : struct, IOptimizations { - Vector256 source = TOptimizations.PackSources(source0.AsUInt16(), source1.AsUInt16()); + if (searchSpaceLength < Vector128.Count) + { + for (int i = searchSpaceLength - 1; i >= 0; i--) + { + char c = (char)Unsafe.Add(ref searchSpace, i); + if (TNegator.NegateIfNeeded(state.Lookup.Contains128(c))) + { + return i; + } + } - Vector256 result = IndexOfAnyLookupCore(source, bitmapLookup); + return -1; + } - return TNegator.NegateIfNeeded(result); - } + ref short currentSearchSpace = ref Unsafe.Add(ref searchSpace, searchSpaceLength); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx2))] - private static Vector256 IndexOfAnyLookupCore(Vector256 source, Vector256 bitmapLookup) - { - // See comments in IndexOfAnyLookupCore(Vector128) above for more details. - Vector256 highNibbles = source >>> 4; - Vector256 bitMask = Avx2.Shuffle(bitmapLookup, source); - Vector256 bitPositions = Avx2.Shuffle(Vector256.Create(0x8040201008040201).AsByte(), highNibbles); - Vector256 result = bitMask & bitPositions; - return result; +#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The else clause is semantically equivalent + if (Avx2.IsSupported && searchSpaceLength > 2 * Vector128.Count) +#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough + { + Vector256 values256 = state.ValuesByLowNibble; + + if (searchSpaceLength > 2 * Vector256.Count) + { + // Process the input in chunks of 32 characters (2 * Vector256). + // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector256. + // As packing two Vector256s into a Vector256 is cheap compared to the lookup, we can effectively double the throughput. + // If the input length is a multiple of 32, don't consume the last 32 characters in this loop. + // Let the fallback below handle it instead. This is why the condition is + // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan". + ref short twoVectorsAfterStart = ref Unsafe.Add(ref searchSpace, 2 * Vector256.Count); + + do + { + currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, 2 * Vector256.Count); + + Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); + Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); + + Vector256 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values256); + if (result != Vector256.Zero) + { + return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); + } + } + while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref twoVectorsAfterStart)); + } + + // We have 1-32 characters remaining. Process the first and last vector in the search space. + // They may overlap, but we'll handle that in the index calculation if we do get a match. + Debug.Assert(searchSpaceLength >= Vector256.Count, "We expect that the input is long enough for us to load a whole vector."); + { + ref short oneVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector256.Count); + + ref short secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAfterStart) + ? ref Unsafe.Subtract(ref currentSearchSpace, Vector256.Count) + : ref searchSpace; + + Vector256 source0 = Vector256.LoadUnsafe(ref searchSpace); + Vector256 source1 = Vector256.LoadUnsafe(ref secondVector); + + Vector256 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values256); + if (result != Vector256.Zero) + { + return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); + } + } + + return -1; + } + + Vector128 values = state.ValuesByLowNibble._lower; + + if (!Avx2.IsSupported && searchSpaceLength > 2 * Vector128.Count) + { + // Process the input in chunks of 16 characters (2 * Vector128). + // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector128. + // As packing two Vector128s into a Vector128 is cheap compared to the lookup, we can effectively double the throughput. + // If the input length is a multiple of 16, don't consume the last 16 characters in this loop. + // Let the fallback below handle it instead. This is why the condition is + // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan". + ref short twoVectorsAfterStart = ref Unsafe.Add(ref searchSpace, 2 * Vector128.Count); + + do + { + currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, 2 * Vector128.Count); + + Vector128 source0 = Vector128.LoadUnsafe(ref currentSearchSpace); + Vector128 source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128.Count); + + Vector128 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values); + if (result != Vector128.Zero) + { + return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); + } + } + while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref twoVectorsAfterStart)); + } + + // We have 1-16 characters remaining. Process the first and last vector in the search space. + // They may overlap, but we'll handle that in the index calculation if we do get a match. + Debug.Assert(searchSpaceLength >= Vector128.Count, "We expect that the input is long enough for us to load a whole vector."); + { + ref short oneVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector128.Count); + + ref short secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAfterStart) + ? ref Unsafe.Subtract(ref currentSearchSpace, Vector128.Count) + : ref searchSpace; + + Vector128 source0 = Vector128.LoadUnsafe(ref searchSpace); + Vector128 source1 = Vector128.LoadUnsafe(ref secondVector); + + Vector128 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values); + if (result != Vector128.Zero) + { + return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); + } + } + + return -1; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + public static bool ContainsAny(ref byte searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) + where TNegator : struct, INegator => + IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + public static int IndexOfAny(ref byte searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) + where TNegator : struct, INegator => + IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + private static TResult IndexOfAnyCore(ref byte searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) + where TResult : struct + where TNegator : struct, INegator + where TResultMapper : struct, IResultMapper + { + ref byte currentSearchSpace = ref searchSpace; + + if (searchSpaceLength < sizeof(ulong)) + { + ref byte searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength); + + while (!Unsafe.AreSame(ref currentSearchSpace, ref searchSpaceEnd)) + { + byte b = currentSearchSpace; + if (TNegator.NegateIfNeeded(state.Lookup.Contains(b))) + { + return TResultMapper.ScalarResult(ref searchSpace, ref currentSearchSpace); + } + + currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 1); + } + + return TResultMapper.NotFound; + } + +#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx2.IsSupported is false + if (Avx2.IsSupported && searchSpaceLength > Vector128.Count) +#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough + { + Vector256 values256 = state.ValuesByLowNibble; + + if (searchSpaceLength > Vector256.Count) + { + // Process the input in chunks of 32 bytes. + // If the input length is a multiple of 32, don't consume the last 32 characters in this loop. + // Let the fallback below handle it instead. This is why the condition is + // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan". + ref byte vectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector256.Count); + + do + { + Vector256 source = Vector256.LoadUnsafe(ref currentSearchSpace); + + Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values256)); + if (result != Vector256.Zero) + { + return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); + } + + currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector256.Count); + } + while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref vectorAwayFromEnd)); + } + + // We have 1-32 bytes remaining. Process the first and last half vectors in the search space. + // They may overlap, but we'll handle that in the index calculation if we do get a match. + Debug.Assert(searchSpaceLength >= Vector128.Count, "We expect that the input is long enough for us to load a Vector128."); + { + ref byte halfVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector128.Count); + + ref byte firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAwayFromEnd) + ? ref halfVectorAwayFromEnd + : ref currentSearchSpace; + + Vector128 source0 = Vector128.LoadUnsafe(ref firstVector); + Vector128 source1 = Vector128.LoadUnsafe(ref halfVectorAwayFromEnd); + Vector256 source = Vector256.Create(source0, source1); + + Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values256)); + if (result != Vector256.Zero) + { + return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result); + } + } + + return TResultMapper.NotFound; + } + + Vector128 values = state.ValuesByLowNibble._lower; + + if (!Avx2.IsSupported && searchSpaceLength > Vector128.Count) + { + // Process the input in chunks of 16 bytes. + // If the input length is a multiple of 16, don't consume the last 16 characters in this loop. + // Let the fallback below handle it instead. This is why the condition is + // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan". + ref byte vectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector128.Count); + + do + { + Vector128 source = Vector128.LoadUnsafe(ref currentSearchSpace); + + Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values)); + if (result != Vector128.Zero) + { + return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); + } + + currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector128.Count); + } + while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref vectorAwayFromEnd)); + } + + // We have 1-16 bytes remaining. Process the first and last half vectors in the search space. + // They may overlap, but we'll handle that in the index calculation if we do get a match. + Debug.Assert(searchSpaceLength >= sizeof(ulong), "We expect that the input is long enough for us to load a ulong."); + { + ref byte halfVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - sizeof(ulong)); + + ref byte firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAwayFromEnd) + ? ref halfVectorAwayFromEnd + : ref currentSearchSpace; + + ulong source0 = Unsafe.ReadUnaligned(ref firstVector); + ulong source1 = Unsafe.ReadUnaligned(ref halfVectorAwayFromEnd); + Vector128 source = Vector128.Create(source0, source1).AsByte(); + + Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values)); + if (result != Vector128.Zero) + { + return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result); + } + } + + return TResultMapper.NotFound; + } + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + public static int LastIndexOfAny(ref byte searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) + where TNegator : struct, INegator + { + if (searchSpaceLength < sizeof(ulong)) + { + for (int i = searchSpaceLength - 1; i >= 0; i--) + { + byte b = Unsafe.Add(ref searchSpace, i); + if (TNegator.NegateIfNeeded(state.Lookup.Contains(b))) + { + return i; + } + } + + return -1; + } + + ref byte currentSearchSpace = ref Unsafe.Add(ref searchSpace, searchSpaceLength); + +#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx2.IsSupported is false + if (Avx2.IsSupported && searchSpaceLength > Vector128.Count) +#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough + { + Vector256 values256 = state.ValuesByLowNibble; + + if (searchSpaceLength > Vector256.Count) + { + // Process the input in chunks of 32 bytes. + // If the input length is a multiple of 32, don't consume the last 32 characters in this loop. + // Let the fallback below handle it instead. This is why the condition is + // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan". + ref byte vectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector256.Count); + + do + { + currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, Vector256.Count); + + Vector256 source = Vector256.LoadUnsafe(ref currentSearchSpace); + + Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values256)); + if (result != Vector256.Zero) + { + return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); + } + } + while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart)); + } + + // We have 1-32 bytes remaining. Process the first and last half vectors in the search space. + // They may overlap, but we'll handle that in the index calculation if we do get a match. + Debug.Assert(searchSpaceLength >= Vector128.Count, "We expect that the input is long enough for us to load a Vector128."); + { + ref byte halfVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector128.Count); + + ref byte secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAfterStart) + ? ref Unsafe.Subtract(ref currentSearchSpace, Vector128.Count) + : ref searchSpace; + + Vector128 source0 = Vector128.LoadUnsafe(ref searchSpace); + Vector128 source1 = Vector128.LoadUnsafe(ref secondVector); + Vector256 source = Vector256.Create(source0, source1); + + Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values256)); + if (result != Vector256.Zero) + { + return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); + } + } + + return -1; + } + + Vector128 values = state.ValuesByLowNibble._lower; + + if (!Avx2.IsSupported && searchSpaceLength > Vector128.Count) + { + // Process the input in chunks of 16 bytes. + // If the input length is a multiple of 16, don't consume the last 16 characters in this loop. + // Let the fallback below handle it instead. This is why the condition is + // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan". + ref byte vectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector128.Count); + + do + { + currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, Vector128.Count); + + Vector128 source = Vector128.LoadUnsafe(ref currentSearchSpace); + + Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values)); + if (result != Vector128.Zero) + { + return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); + } + } + while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart)); + } + + // We have 1-16 bytes remaining. Process the first and last half vectors in the search space. + // They may overlap, but we'll handle that in the index calculation if we do get a match. + Debug.Assert(searchSpaceLength >= sizeof(ulong), "We expect that the input is long enough for us to load a ulong."); + { + ref byte halfVectorAfterStart = ref Unsafe.Add(ref searchSpace, sizeof(ulong)); + + ref byte secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAfterStart) + ? ref Unsafe.Subtract(ref currentSearchSpace, sizeof(ulong)) + : ref searchSpace; + + ulong source0 = Unsafe.ReadUnaligned(ref searchSpace); + ulong source1 = Unsafe.ReadUnaligned(ref secondVector); + Vector128 source = Vector128.Create(source0, source1).AsByte(); + + Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values)); + if (result != Vector128.Zero) + { + return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); + } + } + + return -1; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Sse2))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + private static Vector128 IndexOfAnyLookup(Vector128 source0, Vector128 source1, Vector128 bitmapLookup) + where TNegator : struct, INegator + where TOptimizations : struct, IOptimizations + { + Vector128 source = TOptimizations.PackSources(source0.AsUInt16(), source1.AsUInt16()); + + Vector128 result = IndexOfAnyLookupCore(source, bitmapLookup); + + return TNegator.NegateIfNeeded(result); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + private static Vector128 IndexOfAnyLookupCore(Vector128 source, Vector128 bitmapLookup) + { + // On X86, the Ssse3.Shuffle instruction will already perform an implicit 'AND 0xF' on the indices, so we can skip it. + // For values above 127, Ssse3.Shuffle will also set the result to 0. This is fine as we don't want non-ASCII values to match anyway. + Vector128 lowNibbles = Ssse3.IsSupported + ? source + : source & Vector128.Create((byte)0xF); + + // On ARM, we have an instruction for an arithmetic right shift of 1-byte signed values. + // The shift will map values above 127 to values above 16, which the shuffle will then map to 0. + // On X86 and WASM, use a logical right shift instead. + Vector128 highNibbles = AdvSimd.IsSupported + ? AdvSimd.ShiftRightArithmetic(source.AsSByte(), 4).AsByte() + : source >>> 4; + + // The bitmapLookup represents a 8x16 table of bits, indicating whether a character is present in the needle. + // Lookup the rows via the lower nibble and the column via the higher nibble. + Vector128 bitMask = Vector128.ShuffleUnsafe(bitmapLookup, lowNibbles); + + // For values above 127, the high nibble will be above 7. We construct the positions vector for the shuffle such that those values map to 0. + Vector128 bitPositions = Vector128.ShuffleUnsafe(Vector128.Create(0x8040201008040201, 0).AsByte(), highNibbles); + + Vector128 result = bitMask & bitPositions; + return result; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx2))] + private static Vector256 IndexOfAnyLookup(Vector256 source0, Vector256 source1, Vector256 bitmapLookup) + where TNegator : struct, INegator + where TOptimizations : struct, IOptimizations + { + Vector256 source = TOptimizations.PackSources(source0.AsUInt16(), source1.AsUInt16()); + + Vector256 result = IndexOfAnyLookupCore(source, bitmapLookup); + + return TNegator.NegateIfNeeded(result); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx2))] + private static Vector256 IndexOfAnyLookupCore(Vector256 source, Vector256 bitmapLookup) + { + // See comments in IndexOfAnyLookupCore(Vector128) above for more details. + Vector256 highNibbles = source >>> 4; + Vector256 bitMask = Avx2.Shuffle(bitmapLookup, source); + Vector256 bitPositions = Avx2.Shuffle(Vector256.Create(0x8040201008040201).AsByte(), highNibbles); + Vector256 result = bitMask & bitPositions; + return result; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Sse2))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + private static Vector128 IndexOfAnyLookupUniqueLowNibble(Vector128 source0, Vector128 source1, Vector128 valuesByLowNibble) + where TNegator : struct, INegator + { + Vector128 source = Default.PackSources(source0.AsUInt16(), source1.AsUInt16()); + + Vector128 result = IndexOfAnyLookupUniqueLowNibbleCore(source, valuesByLowNibble); + + return TNegator.NegateIfNeeded(result); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + private static Vector128 IndexOfAnyLookupUniqueLowNibbleCore(Vector128 source, Vector128 valuesByLowNibble) + { + // Based on http://0x80.pl/articles/simd-byte-lookup.html#special-case-3-unique-lower-and-higher-nibbles + + // On X86, the Ssse3.Shuffle instruction will already perform an implicit 'AND 0xF' on the indices, so we can skip it. + // For values above 127, Ssse3.Shuffle will also set the result to 0. This is fine as we only use this approach if + // all values are <= 127 when Ssse3 is supported (see CanUseUniqueLowNibbleSearch). + // False positives from values mapped to 0 will be ruled out by the Vector128.Equals comparison below. + Vector128 lowNibbles = Ssse3.IsSupported + ? source + : source & Vector128.Create((byte)0xF); + + // We use a shuffle to look up potential matches for each byte based on its low nibble. + // Since all values have a unique low nibble, there's at most one potential match per nibble. + Vector128 values = Vector128.ShuffleUnsafe(valuesByLowNibble, lowNibbles); + + // Compare potential matches with the source to rule out false positives that have a different high nibble. + return Vector128.Equals(source, values); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx2))] + private static Vector256 IndexOfAnyLookupUniqueLowNibble(Vector256 source0, Vector256 source1, Vector256 valuesByLowNibble) + where TNegator : struct, INegator + { + Vector256 source = Default.PackSources(source0.AsUInt16(), source1.AsUInt16()); + + Vector256 result = IndexOfAnyLookupUniqueLowNibbleCore(source, valuesByLowNibble); + + return TNegator.NegateIfNeeded(result); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx2))] + private static Vector256 IndexOfAnyLookupUniqueLowNibbleCore(Vector256 source, Vector256 valuesByLowNibble) + { + // See comments in IndexOfAnyLookupUniqueLowNibbleCore(Vector128) above for more details. + Vector256 values = Avx2.Shuffle(valuesByLowNibble, source); + return Vector256.Equals(source, values); } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs index e02192a70ebbe4..fe7b2a159dad05 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs @@ -41,6 +41,13 @@ public static SearchValues Create(params ReadOnlySpan values) return new RangeByteSearchValues(minInclusive, maxInclusive); } + // Depending on the hardware, UniqueLowNibble can be faster than even range or 2 values. + // It's currently consistently faster than 4/5 values on all tested platforms (Arm, Avx2, Avx512). + if (values.Length >= 4 && IndexOfAnyAsciiSearcher.CanUseUniqueLowNibbleSearch(values, minInclusive, maxInclusive)) + { + return new UniqueLowNibbleByteSearchValues(values); + } + if (values.Length <= 5) { Debug.Assert(values.Length is 2 or 3 or 4 or 5); @@ -122,26 +129,34 @@ public static SearchValues Create(params ReadOnlySpan values) : new Any3SearchValues(shortValues); } - // IndexOfAnyAsciiSearcher for chars is slower than Any3CharSearchValues, but faster than Any4SearchValues - if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && maxInclusive < 128) + // If the values are sets of 2 ASCII letters with both cases, we can use an approach that + // reduces the number of comparisons by masking off the bit that differs between lower and upper case (0x20). + // While this most commonly applies to ASCII letters, it also works for other values that differ by 0x20 (e.g. "[]{}" => "{}"). + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && PackedSpanHelpers.PackedIndexOfIsSupported && + maxInclusive < 128 && values.Length == 4 && minInclusive > 0) { - // If the values are sets of 2 ASCII letters with both cases, we can use an approach that - // reduces the number of comparisons by masking off the bit that differs between lower and upper case (0x20). - // While this most commonly applies to ASCII letters, it also works for other values that differ by 0x20 (e.g. "[]{}" => "{}"). - if (PackedSpanHelpers.PackedIndexOfIsSupported && values.Length == 4 && minInclusive > 0) + Span copy = stackalloc char[4]; + values.CopyTo(copy); + copy.Sort(); + + if ((copy[0] ^ copy[2]) == 0x20 && + (copy[1] ^ copy[3]) == 0x20) { - Span copy = stackalloc char[4]; - values.CopyTo(copy); - copy.Sort(); - - if ((copy[0] ^ copy[2]) == 0x20 && - (copy[1] ^ copy[3]) == 0x20) - { - // We pick the higher two values (with the 0x20 bit set). "AaBb" => 'a', 'b' - return new Any2CharPackedIgnoreCaseSearchValues(copy[2], copy[3]); - } + // We pick the higher two values (with the 0x20 bit set). "AaBb" => 'a', 'b' + return new Any2CharPackedIgnoreCaseSearchValues(copy[2], copy[3]); } + } + // Depending on the hardware, UniqueLowNibble can be faster than most implementations we currently prefer above. + // It's currently consistently faster than 4/5 values or Ascii on all tested platforms (Arm, Avx2, Avx512). + if (IndexOfAnyAsciiSearcher.CanUseUniqueLowNibbleSearch(values, minInclusive, maxInclusive)) + { + return new UniqueLowNibbleCharSearchValues(values); + } + + // IndexOfAnyAsciiSearcher for chars is slower than Any3CharSearchValues, but faster than Any4SearchValues + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && maxInclusive < 128) + { return (Ssse3.IsSupported || PackedSimd.IsSupported) && minInclusive == 0 ? new AsciiCharSearchValues(values) : new AsciiCharSearchValues(values); @@ -162,7 +177,7 @@ public static SearchValues Create(params ReadOnlySpan values) // If we have both ASCII and non-ASCII characters, use an implementation that // does an optimistic ASCII fast-path and then falls back to the ProbabilisticMap. - return (Ssse3.IsSupported || PackedSimd.IsSupported) && values.Contains('\0') + return (Ssse3.IsSupported || PackedSimd.IsSupported) && minInclusive == 0 ? new ProbabilisticWithAsciiCharSearchValues(values, maxInclusive) : new ProbabilisticWithAsciiCharSearchValues(values, maxInclusive); } diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleByteSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleByteSearchValues.cs new file mode 100644 index 00000000000000..af98b0393dbe2e --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleByteSearchValues.cs @@ -0,0 +1,74 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Wasm; +using System.Runtime.Intrinsics.X86; + +namespace System.Buffers +{ + internal sealed class UniqueLowNibbleByteSearchValues : SearchValues + { + private IndexOfAnyAsciiSearcher.UniqueLowNibbleState _state; + + public UniqueLowNibbleByteSearchValues(ReadOnlySpan values) => + IndexOfAnyAsciiSearcher.ComputeUniqueLowNibbleState(values, out _state); + + internal override byte[] GetValues() => + _state.Lookup.GetByteValues(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override bool ContainsCore(byte value) => + _state.Lookup.Contains(value); + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override int IndexOfAny(ReadOnlySpan span) => + IndexOfAnyAsciiSearcher.IndexOfAny( + ref MemoryMarshal.GetReference(span), span.Length, ref _state); + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override int IndexOfAnyExcept(ReadOnlySpan span) => + IndexOfAnyAsciiSearcher.IndexOfAny( + ref MemoryMarshal.GetReference(span), span.Length, ref _state); + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override int LastIndexOfAny(ReadOnlySpan span) => + IndexOfAnyAsciiSearcher.LastIndexOfAny( + ref MemoryMarshal.GetReference(span), span.Length, ref _state); + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override int LastIndexOfAnyExcept(ReadOnlySpan span) => + IndexOfAnyAsciiSearcher.LastIndexOfAny( + ref MemoryMarshal.GetReference(span), span.Length, ref _state); + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override bool ContainsAny(ReadOnlySpan span) => + IndexOfAnyAsciiSearcher.ContainsAny( + ref MemoryMarshal.GetReference(span), span.Length, ref _state); + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override bool ContainsAnyExcept(ReadOnlySpan span) => + IndexOfAnyAsciiSearcher.ContainsAny( + ref MemoryMarshal.GetReference(span), span.Length, ref _state); + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleCharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleCharSearchValues.cs new file mode 100644 index 00000000000000..857a2cbce484bc --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleCharSearchValues.cs @@ -0,0 +1,74 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Wasm; +using System.Runtime.Intrinsics.X86; + +namespace System.Buffers +{ + internal sealed class UniqueLowNibbleCharSearchValues : SearchValues + { + private IndexOfAnyAsciiSearcher.UniqueLowNibbleState _state; + + public UniqueLowNibbleCharSearchValues(ReadOnlySpan values) => + IndexOfAnyAsciiSearcher.ComputeUniqueLowNibbleState(values, out _state); + + internal override char[] GetValues() => + _state.Lookup.GetCharValues(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override bool ContainsCore(char value) => + _state.Lookup.Contains256(value); + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override int IndexOfAny(ReadOnlySpan span) => + IndexOfAnyAsciiSearcher.IndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override int IndexOfAnyExcept(ReadOnlySpan span) => + IndexOfAnyAsciiSearcher.IndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override int LastIndexOfAny(ReadOnlySpan span) => + IndexOfAnyAsciiSearcher.LastIndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override int LastIndexOfAnyExcept(ReadOnlySpan span) => + IndexOfAnyAsciiSearcher.LastIndexOfAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override bool ContainsAny(ReadOnlySpan span) => + IndexOfAnyAsciiSearcher.ContainsAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); + + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(PackedSimd))] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override bool ContainsAnyExcept(ReadOnlySpan span) => + IndexOfAnyAsciiSearcher.ContainsAny( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); + } +} From 8ea4ecc770f54eeb9d376933d947ef1920d4d87d Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Fri, 6 Sep 2024 19:16:00 +0200 Subject: [PATCH 2/5] More generics --- .../System.Private.CoreLib.Shared.projitems | 2 - .../Any2CharPackedIgnoreCaseSearchValues.cs | 4 +- .../SearchValues/AsciiByteSearchValues.cs | 34 +- .../SearchValues/AsciiCharSearchValues.cs | 37 +- .../src/System/SearchValues/BitVector256.cs | 4 - .../SearchValues/IndexOfAnyAsciiSearcher.cs | 831 +++--------------- .../ProbabilisticWithAsciiCharSearchValues.cs | 12 +- .../src/System/SearchValues/SearchValues.cs | 16 +- .../Strings/Helpers/AhoCorasick.cs | 6 +- .../UniqueLowNibbleByteSearchValues.cs | 74 -- .../UniqueLowNibbleCharSearchValues.cs | 74 -- 11 files changed, 196 insertions(+), 898 deletions(-) delete mode 100644 src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleByteSearchValues.cs delete mode 100644 src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleCharSearchValues.cs diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index b686c160d96929..72f6a1579d80fd 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -440,8 +440,6 @@ - - diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharPackedIgnoreCaseSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharPackedIgnoreCaseSearchValues.cs index 1073fcf3c81858..7b789febead068 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharPackedIgnoreCaseSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Any2CharPackedIgnoreCaseSearchValues.cs @@ -53,7 +53,7 @@ internal override int IndexOfAnyExcept(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] internal override int LastIndexOfAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( + IndexOfAnyAsciiSearcher.LastIndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -61,7 +61,7 @@ internal override int LastIndexOfAny(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] internal override int LastIndexOfAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( + IndexOfAnyAsciiSearcher.LastIndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiByteSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiByteSearchValues.cs index 57b755b2ce8a42..a3562f8c98a7d6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiByteSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiByteSearchValues.cs @@ -1,20 +1,36 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; +using System.Text; namespace System.Buffers { - internal sealed class AsciiByteSearchValues : SearchValues + internal sealed class AsciiByteSearchValues : SearchValues + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst { private IndexOfAnyAsciiSearcher.AsciiState _state; - public AsciiByteSearchValues(ReadOnlySpan values) => - IndexOfAnyAsciiSearcher.ComputeAsciiState(values, out _state); + public AsciiByteSearchValues(ReadOnlySpan values) + { + // Despite the name being Ascii, this type may be used with non-ASCII values on ARM. + // See IndexOfAnyAsciiSearcher.CanUseUniqueLowNibbleSearch. + Debug.Assert(Ascii.IsValid(values) || (AdvSimd.IsSupported && TUniqueLowNibble.Value)); + + if (TUniqueLowNibble.Value) + { + IndexOfAnyAsciiSearcher.ComputeUniqueLowNibbleState(values, out _state); + } + else + { + IndexOfAnyAsciiSearcher.ComputeAsciiState(values, out _state); + } + } internal override byte[] GetValues() => _state.Lookup.GetByteValues(); @@ -28,7 +44,7 @@ internal override bool ContainsCore(byte value) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int IndexOfAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.IndexOfAny( + IndexOfAnyAsciiSearcher.IndexOfAny( ref MemoryMarshal.GetReference(span), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -36,7 +52,7 @@ internal override int IndexOfAny(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int IndexOfAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.IndexOfAny( + IndexOfAnyAsciiSearcher.IndexOfAny( ref MemoryMarshal.GetReference(span), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -44,7 +60,7 @@ internal override int IndexOfAnyExcept(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int LastIndexOfAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( + IndexOfAnyAsciiSearcher.LastIndexOfAny( ref MemoryMarshal.GetReference(span), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -52,7 +68,7 @@ internal override int LastIndexOfAny(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int LastIndexOfAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( + IndexOfAnyAsciiSearcher.LastIndexOfAny( ref MemoryMarshal.GetReference(span), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -60,7 +76,7 @@ internal override int LastIndexOfAnyExcept(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override bool ContainsAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.ContainsAny( + IndexOfAnyAsciiSearcher.ContainsAny( ref MemoryMarshal.GetReference(span), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -68,7 +84,7 @@ internal override bool ContainsAny(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override bool ContainsAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.ContainsAny( + IndexOfAnyAsciiSearcher.ContainsAny( ref MemoryMarshal.GetReference(span), span.Length, ref _state); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiCharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiCharSearchValues.cs index 111a3ad313b9da..160920108cdb7f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiCharSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiCharSearchValues.cs @@ -1,35 +1,52 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; +using System.Text; namespace System.Buffers { - internal sealed class AsciiCharSearchValues : SearchValues + internal sealed class AsciiCharSearchValues : SearchValues where TOptimizations : struct, IndexOfAnyAsciiSearcher.IOptimizations + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst { private IndexOfAnyAsciiSearcher.AsciiState _state; - public AsciiCharSearchValues(ReadOnlySpan values) => - IndexOfAnyAsciiSearcher.ComputeAsciiState(values, out _state); + public AsciiCharSearchValues(ReadOnlySpan values) + { + // Despite the name being Ascii, this type may be used with non-ASCII values on ARM. + // See IndexOfAnyAsciiSearcher.CanUseUniqueLowNibbleSearch. + Debug.Assert(Ascii.IsValid(values) || (AdvSimd.IsSupported && TUniqueLowNibble.Value)); + + if (TUniqueLowNibble.Value) + { + IndexOfAnyAsciiSearcher.ComputeUniqueLowNibbleState(values, out _state); + } + else + { + IndexOfAnyAsciiSearcher.ComputeAsciiState(values, out _state); + } + } internal override char[] GetValues() => _state.Lookup.GetCharValues(); + // Despite the name being Ascii, this type may be used for non-ASCII values. [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override bool ContainsCore(char value) => - _state.Lookup.Contains128(value); + _state.Lookup.Contains256(value); [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int IndexOfAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.IndexOfAny( + IndexOfAnyAsciiSearcher.IndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -37,7 +54,7 @@ internal override int IndexOfAny(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int IndexOfAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.IndexOfAny( + IndexOfAnyAsciiSearcher.IndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -45,7 +62,7 @@ internal override int IndexOfAnyExcept(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int LastIndexOfAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( + IndexOfAnyAsciiSearcher.LastIndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -53,7 +70,7 @@ internal override int LastIndexOfAny(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override int LastIndexOfAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( + IndexOfAnyAsciiSearcher.LastIndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -61,7 +78,7 @@ internal override int LastIndexOfAnyExcept(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override bool ContainsAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.ContainsAny( + IndexOfAnyAsciiSearcher.ContainsAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); [CompExactlyDependsOn(typeof(Ssse3))] @@ -69,7 +86,7 @@ internal override bool ContainsAny(ReadOnlySpan span) => [CompExactlyDependsOn(typeof(PackedSimd))] [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override bool ContainsAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.ContainsAny( + IndexOfAnyAsciiSearcher.ContainsAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/BitVector256.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/BitVector256.cs index 56e68907c86d70..74d0836960bddc 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/BitVector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/BitVector256.cs @@ -31,10 +31,6 @@ public void Set(int c) _values[offset] |= significantBit; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public readonly bool Contains128(char c) => - c < 128 && ContainsUnchecked(c); - [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly bool Contains256(char c) => c < 256 && ContainsUnchecked(c); diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs index 9e9034e0faf8b2..c05913dbe291bb 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs @@ -13,6 +13,7 @@ namespace System.Buffers { internal static class IndexOfAnyAsciiSearcher { + // Reused for both ASCII and UniqueLowNibble searches since the state looks the same (a Vector128). public struct AsciiState(Vector128 bitmap, BitVector256 lookup) { public Vector256 Bitmap = Vector256.Create(bitmap); @@ -29,12 +30,6 @@ public readonly struct AnyByteState(Vector128 bitmap0, Vector128 bit public readonly BitVector256 Lookup = lookup; } - public readonly struct UniqueLowNibbleState(Vector128 valuesByLowNibble, BitVector256 lookup) - { - public readonly Vector256 ValuesByLowNibble = Vector256.Create(valuesByLowNibble); - public readonly BitVector256 Lookup = lookup; - } - internal static bool IsVectorizationSupported => Ssse3.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported; [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -100,7 +95,7 @@ internal static unsafe void ComputeAsciiState(ReadOnlySpan values, out Asc state = new AsciiState(bitmapSpace, lookupLocal); } - public static bool CanUseUniqueLowNibbleSearch(ReadOnlySpan values, int minInclusive, int maxInclusive) + public static bool CanUseUniqueLowNibbleSearch(ReadOnlySpan values, int maxInclusive) where T : struct, IUnsignedNumber { Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(char)); @@ -112,25 +107,15 @@ public static bool CanUseUniqueLowNibbleSearch(ReadOnlySpan values, int mi if (Ssse3.IsSupported && maxInclusive > 127) { - // We could support [1, 254] if we did the "& 0xF" before calling into Shuffle in IndexOfAnyLookupUniqueLowNibbleCore. + // We could support [1, 254] if we did the "& 0xF" before calling into Shuffle in IndexOfAnyLookupCore. // We currently optimize for the common case of ASCII characters instead, saving an instruction there. return false; } - if (typeof(T) == typeof(char)) + if (typeof(T) == typeof(char) && maxInclusive >= byte.MaxValue) { - if (maxInclusive >= byte.MaxValue) - { - // When packing UTF-16 characters into bytes, values may saturate to 255 (false positives), hence ">=" instead of ">". - return false; - } - - if ((Ssse3.IsSupported || PackedSimd.IsSupported) && minInclusive == 0) - { - // When packing UTF-16 characters into bytes, values may saturate to 0 (false positives). - // We could also support 0 if we flowed Ssse3AndWasmHandleZeroInNeedle through. - return false; - } + // When packing UTF-16 characters into bytes, values may saturate to 255 (false positives), hence ">=" instead of ">". + return false; } // We assume there are no duplicates to simplify the logic (if there are any, they just won't use this searching approach). @@ -152,7 +137,7 @@ public static bool CanUseUniqueLowNibbleSearch(ReadOnlySpan values, int mi return true; } - public static void ComputeUniqueLowNibbleState(ReadOnlySpan values, out UniqueLowNibbleState state) + public static void ComputeUniqueLowNibbleState(ReadOnlySpan values, out AsciiState state) where T : struct, IUnsignedNumber { Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(char)); @@ -174,7 +159,7 @@ public static void ComputeUniqueLowNibbleState(ReadOnlySpan values, out Un valuesByLowNibble.SetElementUnsafe(0, (byte)1); } - state = new UniqueLowNibbleState(valuesByLowNibble, lookup); + state = new AsciiState(valuesByLowNibble, lookup); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -230,8 +215,8 @@ private static unsafe bool TryIndexOfAny(ref short searchSpace, int se state.Bitmap = Vector256.Create(state.Bitmap.GetLower()); index = (Ssse3.IsSupported || PackedSimd.IsSupported) && needleContainsZero - ? IndexOfAny(ref searchSpace, searchSpaceLength, ref state) - : IndexOfAny(ref searchSpace, searchSpaceLength, ref state); + ? IndexOfAny(ref searchSpace, searchSpaceLength, ref state) + : IndexOfAny(ref searchSpace, searchSpaceLength, ref state); return true; } } @@ -257,8 +242,8 @@ private static unsafe bool TryLastIndexOfAny(ref short searchSpace, in state.Bitmap = Vector256.Create(state.Bitmap.GetLower()); index = (Ssse3.IsSupported || PackedSimd.IsSupported) && needleContainsZero - ? LastIndexOfAny(ref searchSpace, searchSpaceLength, ref state) - : LastIndexOfAny(ref searchSpace, searchSpaceLength, ref state); + ? LastIndexOfAny(ref searchSpace, searchSpaceLength, ref state) + : LastIndexOfAny(ref searchSpace, searchSpaceLength, ref state); return true; } } @@ -271,27 +256,30 @@ private static unsafe bool TryLastIndexOfAny(ref short searchSpace, in [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - public static bool ContainsAny(ref short searchSpace, int searchSpaceLength, ref AsciiState state) + public static bool ContainsAny(ref short searchSpace, int searchSpaceLength, ref AsciiState state) where TNegator : struct, INegator - where TOptimizations : struct, IOptimizations => - IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); + where TOptimizations : struct, IOptimizations + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst => + IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - public static int IndexOfAny(ref short searchSpace, int searchSpaceLength, ref AsciiState state) + public static int IndexOfAny(ref short searchSpace, int searchSpaceLength, ref AsciiState state) where TNegator : struct, INegator - where TOptimizations : struct, IOptimizations => - IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); + where TOptimizations : struct, IOptimizations + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst => + IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - private static TResult IndexOfAnyCore(ref short searchSpace, int searchSpaceLength, ref AsciiState state) + private static TResult IndexOfAnyCore(ref short searchSpace, int searchSpaceLength, ref AsciiState state) where TResult : struct where TNegator : struct, INegator where TOptimizations : struct, IOptimizations + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst where TResultMapper : struct, IResultMapper { ref short currentSearchSpace = ref searchSpace; @@ -303,7 +291,7 @@ private static TResult IndexOfAnyCore source0 = Vector256.LoadUnsafe(ref currentSearchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); + Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); @@ -359,7 +347,7 @@ private static TResult IndexOfAnyCore source0 = Vector256.LoadUnsafe(ref firstVector); Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd); - Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); + Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); @@ -388,7 +376,7 @@ private static TResult IndexOfAnyCore source0 = Vector128.LoadUnsafe(ref currentSearchSpace); Vector128 source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128.Count); - Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); + Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); if (result != Vector128.Zero) { return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); @@ -412,7 +400,7 @@ private static TResult IndexOfAnyCore source0 = Vector128.LoadUnsafe(ref firstVector); Vector128 source1 = Vector128.LoadUnsafe(ref oneVectorAwayFromEnd); - Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); + Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); if (result != Vector128.Zero) { return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); @@ -425,16 +413,17 @@ private static TResult IndexOfAnyCore(ref short searchSpace, int searchSpaceLength, ref AsciiState state) + public static int LastIndexOfAny(ref short searchSpace, int searchSpaceLength, ref AsciiState state) where TNegator : struct, INegator where TOptimizations : struct, IOptimizations + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst { if (searchSpaceLength < Vector128.Count) { for (int i = searchSpaceLength - 1; i >= 0; i--) { char c = (char)Unsafe.Add(ref searchSpace, i); - if (TNegator.NegateIfNeeded(state.Lookup.Contains128(c))) + if (TNegator.NegateIfNeeded(state.Lookup.Contains256(c))) { return i; } @@ -468,7 +457,7 @@ public static int LastIndexOfAny(ref short searchSpace Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); + Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); @@ -490,7 +479,7 @@ public static int LastIndexOfAny(ref short searchSpace Vector256 source0 = Vector256.LoadUnsafe(ref searchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref secondVector); - Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); + Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); @@ -519,7 +508,7 @@ public static int LastIndexOfAny(ref short searchSpace Vector128 source0 = Vector128.LoadUnsafe(ref currentSearchSpace); Vector128 source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128.Count); - Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); + Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); if (result != Vector128.Zero) { return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); @@ -541,7 +530,7 @@ public static int LastIndexOfAny(ref short searchSpace Vector128 source0 = Vector128.LoadUnsafe(ref searchSpace); Vector128 source1 = Vector128.LoadUnsafe(ref secondVector); - Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); + Vector128 result = IndexOfAnyLookup(source0, source1, bitmap); if (result != Vector128.Zero) { return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); @@ -555,24 +544,28 @@ public static int LastIndexOfAny(ref short searchSpace [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - public static bool ContainsAny(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) - where TNegator : struct, INegator => - IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); + public static bool ContainsAny(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) + where TNegator : struct, INegator + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst => + IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - public static int IndexOfAny(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) - where TNegator : struct, INegator => - IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); + public static int IndexOfAny(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) + where TNegator : struct, INegator + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst => + IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - private static TResult IndexOfAnyCore(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) + private static TResult IndexOfAnyCore(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) where TResult : struct where TNegator : struct, INegator + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst + where TResultMapper : struct, IResultMapper { ref byte currentSearchSpace = ref searchSpace; @@ -613,7 +606,7 @@ private static TResult IndexOfAnyCore(ref byte { Vector256 source = Vector256.LoadUnsafe(ref currentSearchSpace); - Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); + Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); @@ -638,7 +631,7 @@ private static TResult IndexOfAnyCore(ref byte Vector128 source1 = Vector128.LoadUnsafe(ref halfVectorAwayFromEnd); Vector256 source = Vector256.Create(source0, source1); - Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); + Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result); @@ -662,7 +655,7 @@ private static TResult IndexOfAnyCore(ref byte { Vector128 source = Vector128.LoadUnsafe(ref currentSearchSpace); - Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); + Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); if (result != Vector128.Zero) { return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); @@ -687,7 +680,7 @@ private static TResult IndexOfAnyCore(ref byte ulong source1 = Unsafe.ReadUnaligned(ref halfVectorAwayFromEnd); Vector128 source = Vector128.Create(source0, source1).AsByte(); - Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); + Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); if (result != Vector128.Zero) { return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result); @@ -700,8 +693,9 @@ private static TResult IndexOfAnyCore(ref byte [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - public static int LastIndexOfAny(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) + public static int LastIndexOfAny(ref byte searchSpace, int searchSpaceLength, ref AsciiState state) where TNegator : struct, INegator + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst { if (searchSpaceLength < sizeof(ulong)) { @@ -739,7 +733,7 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace Vector256 source = Vector256.LoadUnsafe(ref currentSearchSpace); - Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); + Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); @@ -762,7 +756,7 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace Vector128 source1 = Vector128.LoadUnsafe(ref secondVector); Vector256 source = Vector256.Create(source0, source1); - Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); + Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); @@ -788,7 +782,7 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace Vector128 source = Vector128.LoadUnsafe(ref currentSearchSpace); - Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); + Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); if (result != Vector128.Zero) { return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); @@ -811,7 +805,7 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace ulong source1 = Unsafe.ReadUnaligned(ref secondVector); Vector128 source = Vector128.Create(source0, source1).AsByte(); - Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); + Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap)); if (result != Vector128.Zero) { return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); @@ -1100,682 +1094,105 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace } [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(Sse2))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - public static bool ContainsAny(ref short searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) - where TNegator : struct, INegator => - IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); + private static Vector128 IndexOfAnyLookup(Vector128 source0, Vector128 source1, Vector128 bitmapLookup) + where TNegator : struct, INegator + where TOptimizations : struct, IOptimizations + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst + { + Vector128 source = TOptimizations.PackSources(source0.AsUInt16(), source1.AsUInt16()); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - public static int IndexOfAny(ref short searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) - where TNegator : struct, INegator => - IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); + Vector128 result = IndexOfAnyLookupCore(source, bitmapLookup); + + return TNegator.NegateIfNeeded(result); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(PackedSimd))] - private static TResult IndexOfAnyCore(ref short searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) - where TResult : struct - where TNegator : struct, INegator - where TResultMapper : struct, IResultMapper + private static Vector128 IndexOfAnyLookupCore(Vector128 source, Vector128 bitmapLookup) + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst { - ref short currentSearchSpace = ref searchSpace; - - if (searchSpaceLength < Vector128.Count) - { - ref short searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength); - - while (!Unsafe.AreSame(ref currentSearchSpace, ref searchSpaceEnd)) - { - char c = (char)currentSearchSpace; - if (TNegator.NegateIfNeeded(state.Lookup.Contains128(c))) - { - return TResultMapper.ScalarResult(ref searchSpace, ref currentSearchSpace); - } - - currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 1); - } - - return TResultMapper.NotFound; - } - -#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx2.IsSupported is false - if (Avx2.IsSupported && searchSpaceLength > 2 * Vector128.Count) -#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough + if (TUniqueLowNibble.Value) { - Vector256 values256 = state.ValuesByLowNibble; - - if (searchSpaceLength > 2 * Vector256.Count) - { - // Process the input in chunks of 32 characters (2 * Vector256). - // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector256. - // As packing two Vector256s into a Vector256 is cheap compared to the lookup, we can effectively double the throughput. - // If the input length is a multiple of 32, don't consume the last 32 characters in this loop. - // Let the fallback below handle it instead. This is why the condition is - // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan". - ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - (2 * Vector256.Count)); - - do - { - Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); - Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - - Vector256 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values256); - if (result != Vector256.Zero) - { - return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); - } - - currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count); - } - while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd)); - } - - // We have 1-32 characters remaining. Process the first and last vector in the search space. - // They may overlap, but we'll handle that in the index calculation if we do get a match. - Debug.Assert(searchSpaceLength >= Vector256.Count, "We expect that the input is long enough for us to load a whole vector."); - { - ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector256.Count); - - ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd) - ? ref oneVectorAwayFromEnd - : ref currentSearchSpace; + // Based on http://0x80.pl/articles/simd-byte-lookup.html#special-case-3-unique-lower-and-higher-nibbles - Vector256 source0 = Vector256.LoadUnsafe(ref firstVector); - Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd); + // On X86, the Ssse3.Shuffle instruction will already perform an implicit 'AND 0xF' on the indices, so we can skip it. + // For values above 127, Ssse3.Shuffle will also set the result to 0. This is fine as we only use this approach if + // all values are <= 127 when Ssse3 is supported (see CanUseUniqueLowNibbleSearch). + // False positives from values mapped to 0 will be ruled out by the Vector128.Equals comparison below. + Vector128 lowNibbles = Ssse3.IsSupported + ? source + : source & Vector128.Create((byte)0xF); - Vector256 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values256); - if (result != Vector256.Zero) - { - return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); - } - } + // We use a shuffle to look up potential matches for each byte based on its low nibble. + // Since all values have a unique low nibble, there's at most one potential match per nibble. + Vector128 values = Vector128.ShuffleUnsafe(bitmapLookup, lowNibbles); - return TResultMapper.NotFound; + // Compare potential matches with the source to rule out false positives that have a different high nibble. + return Vector128.Equals(source, values); } - - Vector128 values = state.ValuesByLowNibble._lower; - -#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx2.IsSupported is false - if (!Avx2.IsSupported && searchSpaceLength > 2 * Vector128.Count) -#pragma warning restore IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough + else { - // Process the input in chunks of 16 characters (2 * Vector128). - // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector128. - // As packing two Vector128s into a Vector128 is cheap compared to the lookup, we can effectively double the throughput. - // If the input length is a multiple of 16, don't consume the last 16 characters in this loop. - // Let the fallback below handle it instead. This is why the condition is - // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan". - ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - (2 * Vector128.Count)); - - do - { - Vector128 source0 = Vector128.LoadUnsafe(ref currentSearchSpace); - Vector128 source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128.Count); - - Vector128 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values); - if (result != Vector128.Zero) - { - return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); - } - - currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector128.Count); - } - while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd)); - } + // On X86, the Ssse3.Shuffle instruction will already perform an implicit 'AND 0xF' on the indices, so we can skip it. + // For values above 127, Ssse3.Shuffle will also set the result to 0. This is fine as we don't want non-ASCII values to match anyway. + Vector128 lowNibbles = Ssse3.IsSupported + ? source + : source & Vector128.Create((byte)0xF); - // We have 1-16 characters remaining. Process the first and last vector in the search space. - // They may overlap, but we'll handle that in the index calculation if we do get a match. - Debug.Assert(searchSpaceLength >= Vector128.Count, "We expect that the input is long enough for us to load a whole vector."); - { - ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector128.Count); + // On ARM, we have an instruction for an arithmetic right shift of 1-byte signed values. + // The shift will map values above 127 to values above 16, which the shuffle will then map to 0. + // On X86 and WASM, use a logical right shift instead. + Vector128 highNibbles = AdvSimd.IsSupported + ? AdvSimd.ShiftRightArithmetic(source.AsSByte(), 4).AsByte() + : source >>> 4; - ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd) - ? ref oneVectorAwayFromEnd - : ref currentSearchSpace; + // The bitmapLookup represents a 8x16 table of bits, indicating whether a character is present in the needle. + // Lookup the rows via the lower nibble and the column via the higher nibble. + Vector128 bitMask = Vector128.ShuffleUnsafe(bitmapLookup, lowNibbles); - Vector128 source0 = Vector128.LoadUnsafe(ref firstVector); - Vector128 source1 = Vector128.LoadUnsafe(ref oneVectorAwayFromEnd); + // For values above 127, the high nibble will be above 7. We construct the positions vector for the shuffle such that those values map to 0. + Vector128 bitPositions = Vector128.ShuffleUnsafe(Vector128.Create(0x8040201008040201, 0).AsByte(), highNibbles); - Vector128 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values); - if (result != Vector128.Zero) - { - return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); - } + return bitMask & bitPositions; } - - return TResultMapper.NotFound; } - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - public static int LastIndexOfAny(ref short searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx2))] + private static Vector256 IndexOfAnyLookup(Vector256 source0, Vector256 source1, Vector256 bitmapLookup) where TNegator : struct, INegator + where TOptimizations : struct, IOptimizations + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst { - if (searchSpaceLength < Vector128.Count) - { - for (int i = searchSpaceLength - 1; i >= 0; i--) - { - char c = (char)Unsafe.Add(ref searchSpace, i); - if (TNegator.NegateIfNeeded(state.Lookup.Contains128(c))) - { - return i; - } - } + Vector256 source = TOptimizations.PackSources(source0.AsUInt16(), source1.AsUInt16()); - return -1; - } + Vector256 result = IndexOfAnyLookupCore(source, bitmapLookup); - ref short currentSearchSpace = ref Unsafe.Add(ref searchSpace, searchSpaceLength); + return TNegator.NegateIfNeeded(result); + } -#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The else clause is semantically equivalent - if (Avx2.IsSupported && searchSpaceLength > 2 * Vector128.Count) -#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(Avx2))] + private static Vector256 IndexOfAnyLookupCore(Vector256 source, Vector256 bitmapLookup) + where TUniqueLowNibble : struct, SearchValues.IRuntimeConst + { + // See comments in IndexOfAnyLookupCore(Vector128) above for more details. + if (TUniqueLowNibble.Value) { - Vector256 values256 = state.ValuesByLowNibble; - - if (searchSpaceLength > 2 * Vector256.Count) - { - // Process the input in chunks of 32 characters (2 * Vector256). - // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector256. - // As packing two Vector256s into a Vector256 is cheap compared to the lookup, we can effectively double the throughput. - // If the input length is a multiple of 32, don't consume the last 32 characters in this loop. - // Let the fallback below handle it instead. This is why the condition is - // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan". - ref short twoVectorsAfterStart = ref Unsafe.Add(ref searchSpace, 2 * Vector256.Count); - - do - { - currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, 2 * Vector256.Count); - - Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); - Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - - Vector256 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values256); - if (result != Vector256.Zero) - { - return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); - } - } - while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref twoVectorsAfterStart)); - } - - // We have 1-32 characters remaining. Process the first and last vector in the search space. - // They may overlap, but we'll handle that in the index calculation if we do get a match. - Debug.Assert(searchSpaceLength >= Vector256.Count, "We expect that the input is long enough for us to load a whole vector."); - { - ref short oneVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector256.Count); - - ref short secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAfterStart) - ? ref Unsafe.Subtract(ref currentSearchSpace, Vector256.Count) - : ref searchSpace; - - Vector256 source0 = Vector256.LoadUnsafe(ref searchSpace); - Vector256 source1 = Vector256.LoadUnsafe(ref secondVector); - - Vector256 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values256); - if (result != Vector256.Zero) - { - return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); - } - } - - return -1; + Vector256 values = Avx2.Shuffle(bitmapLookup, source); + return Vector256.Equals(source, values); } - - Vector128 values = state.ValuesByLowNibble._lower; - - if (!Avx2.IsSupported && searchSpaceLength > 2 * Vector128.Count) + else { - // Process the input in chunks of 16 characters (2 * Vector128). - // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector128. - // As packing two Vector128s into a Vector128 is cheap compared to the lookup, we can effectively double the throughput. - // If the input length is a multiple of 16, don't consume the last 16 characters in this loop. - // Let the fallback below handle it instead. This is why the condition is - // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan". - ref short twoVectorsAfterStart = ref Unsafe.Add(ref searchSpace, 2 * Vector128.Count); - - do - { - currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, 2 * Vector128.Count); - - Vector128 source0 = Vector128.LoadUnsafe(ref currentSearchSpace); - Vector128 source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128.Count); - - Vector128 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values); - if (result != Vector128.Zero) - { - return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); - } - } - while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref twoVectorsAfterStart)); + Vector256 highNibbles = source >>> 4; + Vector256 bitMask = Avx2.Shuffle(bitmapLookup, source); + Vector256 bitPositions = Avx2.Shuffle(Vector256.Create(0x8040201008040201).AsByte(), highNibbles); + return bitMask & bitPositions; } - - // We have 1-16 characters remaining. Process the first and last vector in the search space. - // They may overlap, but we'll handle that in the index calculation if we do get a match. - Debug.Assert(searchSpaceLength >= Vector128.Count, "We expect that the input is long enough for us to load a whole vector."); - { - ref short oneVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector128.Count); - - ref short secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAfterStart) - ? ref Unsafe.Subtract(ref currentSearchSpace, Vector128.Count) - : ref searchSpace; - - Vector128 source0 = Vector128.LoadUnsafe(ref searchSpace); - Vector128 source1 = Vector128.LoadUnsafe(ref secondVector); - - Vector128 result = IndexOfAnyLookupUniqueLowNibble(source0, source1, values); - if (result != Vector128.Zero) - { - return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); - } - } - - return -1; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - public static bool ContainsAny(ref byte searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) - where TNegator : struct, INegator => - IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - public static int IndexOfAny(ref byte searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) - where TNegator : struct, INegator => - IndexOfAnyCore>(ref searchSpace, searchSpaceLength, ref state); - - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - private static TResult IndexOfAnyCore(ref byte searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) - where TResult : struct - where TNegator : struct, INegator - where TResultMapper : struct, IResultMapper - { - ref byte currentSearchSpace = ref searchSpace; - - if (searchSpaceLength < sizeof(ulong)) - { - ref byte searchSpaceEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength); - - while (!Unsafe.AreSame(ref currentSearchSpace, ref searchSpaceEnd)) - { - byte b = currentSearchSpace; - if (TNegator.NegateIfNeeded(state.Lookup.Contains(b))) - { - return TResultMapper.ScalarResult(ref searchSpace, ref currentSearchSpace); - } - - currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 1); - } - - return TResultMapper.NotFound; - } - -#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx2.IsSupported is false - if (Avx2.IsSupported && searchSpaceLength > Vector128.Count) -#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough - { - Vector256 values256 = state.ValuesByLowNibble; - - if (searchSpaceLength > Vector256.Count) - { - // Process the input in chunks of 32 bytes. - // If the input length is a multiple of 32, don't consume the last 32 characters in this loop. - // Let the fallback below handle it instead. This is why the condition is - // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan". - ref byte vectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector256.Count); - - do - { - Vector256 source = Vector256.LoadUnsafe(ref currentSearchSpace); - - Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values256)); - if (result != Vector256.Zero) - { - return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); - } - - currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector256.Count); - } - while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref vectorAwayFromEnd)); - } - - // We have 1-32 bytes remaining. Process the first and last half vectors in the search space. - // They may overlap, but we'll handle that in the index calculation if we do get a match. - Debug.Assert(searchSpaceLength >= Vector128.Count, "We expect that the input is long enough for us to load a Vector128."); - { - ref byte halfVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector128.Count); - - ref byte firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAwayFromEnd) - ? ref halfVectorAwayFromEnd - : ref currentSearchSpace; - - Vector128 source0 = Vector128.LoadUnsafe(ref firstVector); - Vector128 source1 = Vector128.LoadUnsafe(ref halfVectorAwayFromEnd); - Vector256 source = Vector256.Create(source0, source1); - - Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values256)); - if (result != Vector256.Zero) - { - return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result); - } - } - - return TResultMapper.NotFound; - } - - Vector128 values = state.ValuesByLowNibble._lower; - - if (!Avx2.IsSupported && searchSpaceLength > Vector128.Count) - { - // Process the input in chunks of 16 bytes. - // If the input length is a multiple of 16, don't consume the last 16 characters in this loop. - // Let the fallback below handle it instead. This is why the condition is - // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan". - ref byte vectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector128.Count); - - do - { - Vector128 source = Vector128.LoadUnsafe(ref currentSearchSpace); - - Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values)); - if (result != Vector128.Zero) - { - return TResultMapper.FirstIndex(ref searchSpace, ref currentSearchSpace, result); - } - - currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector128.Count); - } - while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref vectorAwayFromEnd)); - } - - // We have 1-16 bytes remaining. Process the first and last half vectors in the search space. - // They may overlap, but we'll handle that in the index calculation if we do get a match. - Debug.Assert(searchSpaceLength >= sizeof(ulong), "We expect that the input is long enough for us to load a ulong."); - { - ref byte halfVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - sizeof(ulong)); - - ref byte firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAwayFromEnd) - ? ref halfVectorAwayFromEnd - : ref currentSearchSpace; - - ulong source0 = Unsafe.ReadUnaligned(ref firstVector); - ulong source1 = Unsafe.ReadUnaligned(ref halfVectorAwayFromEnd); - Vector128 source = Vector128.Create(source0, source1).AsByte(); - - Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values)); - if (result != Vector128.Zero) - { - return TResultMapper.FirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result); - } - } - - return TResultMapper.NotFound; - } - - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - public static int LastIndexOfAny(ref byte searchSpace, int searchSpaceLength, ref UniqueLowNibbleState state) - where TNegator : struct, INegator - { - if (searchSpaceLength < sizeof(ulong)) - { - for (int i = searchSpaceLength - 1; i >= 0; i--) - { - byte b = Unsafe.Add(ref searchSpace, i); - if (TNegator.NegateIfNeeded(state.Lookup.Contains(b))) - { - return i; - } - } - - return -1; - } - - ref byte currentSearchSpace = ref Unsafe.Add(ref searchSpace, searchSpaceLength); - -#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough // The behavior of the rest of the function remains the same if Avx2.IsSupported is false - if (Avx2.IsSupported && searchSpaceLength > Vector128.Count) -#pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough - { - Vector256 values256 = state.ValuesByLowNibble; - - if (searchSpaceLength > Vector256.Count) - { - // Process the input in chunks of 32 bytes. - // If the input length is a multiple of 32, don't consume the last 32 characters in this loop. - // Let the fallback below handle it instead. This is why the condition is - // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan". - ref byte vectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector256.Count); - - do - { - currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, Vector256.Count); - - Vector256 source = Vector256.LoadUnsafe(ref currentSearchSpace); - - Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values256)); - if (result != Vector256.Zero) - { - return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); - } - } - while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart)); - } - - // We have 1-32 bytes remaining. Process the first and last half vectors in the search space. - // They may overlap, but we'll handle that in the index calculation if we do get a match. - Debug.Assert(searchSpaceLength >= Vector128.Count, "We expect that the input is long enough for us to load a Vector128."); - { - ref byte halfVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector128.Count); - - ref byte secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAfterStart) - ? ref Unsafe.Subtract(ref currentSearchSpace, Vector128.Count) - : ref searchSpace; - - Vector128 source0 = Vector128.LoadUnsafe(ref searchSpace); - Vector128 source1 = Vector128.LoadUnsafe(ref secondVector); - Vector256 source = Vector256.Create(source0, source1); - - Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values256)); - if (result != Vector256.Zero) - { - return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); - } - } - - return -1; - } - - Vector128 values = state.ValuesByLowNibble._lower; - - if (!Avx2.IsSupported && searchSpaceLength > Vector128.Count) - { - // Process the input in chunks of 16 bytes. - // If the input length is a multiple of 16, don't consume the last 16 characters in this loop. - // Let the fallback below handle it instead. This is why the condition is - // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan". - ref byte vectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector128.Count); - - do - { - currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, Vector128.Count); - - Vector128 source = Vector128.LoadUnsafe(ref currentSearchSpace); - - Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values)); - if (result != Vector128.Zero) - { - return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); - } - } - while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart)); - } - - // We have 1-16 bytes remaining. Process the first and last half vectors in the search space. - // They may overlap, but we'll handle that in the index calculation if we do get a match. - Debug.Assert(searchSpaceLength >= sizeof(ulong), "We expect that the input is long enough for us to load a ulong."); - { - ref byte halfVectorAfterStart = ref Unsafe.Add(ref searchSpace, sizeof(ulong)); - - ref byte secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAfterStart) - ? ref Unsafe.Subtract(ref currentSearchSpace, sizeof(ulong)) - : ref searchSpace; - - ulong source0 = Unsafe.ReadUnaligned(ref searchSpace); - ulong source1 = Unsafe.ReadUnaligned(ref secondVector); - Vector128 source = Vector128.Create(source0, source1).AsByte(); - - Vector128 result = TNegator.NegateIfNeeded(IndexOfAnyLookupUniqueLowNibbleCore(source, values)); - if (result != Vector128.Zero) - { - return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); - } - } - - return -1; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Sse2))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - private static Vector128 IndexOfAnyLookup(Vector128 source0, Vector128 source1, Vector128 bitmapLookup) - where TNegator : struct, INegator - where TOptimizations : struct, IOptimizations - { - Vector128 source = TOptimizations.PackSources(source0.AsUInt16(), source1.AsUInt16()); - - Vector128 result = IndexOfAnyLookupCore(source, bitmapLookup); - - return TNegator.NegateIfNeeded(result); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - private static Vector128 IndexOfAnyLookupCore(Vector128 source, Vector128 bitmapLookup) - { - // On X86, the Ssse3.Shuffle instruction will already perform an implicit 'AND 0xF' on the indices, so we can skip it. - // For values above 127, Ssse3.Shuffle will also set the result to 0. This is fine as we don't want non-ASCII values to match anyway. - Vector128 lowNibbles = Ssse3.IsSupported - ? source - : source & Vector128.Create((byte)0xF); - - // On ARM, we have an instruction for an arithmetic right shift of 1-byte signed values. - // The shift will map values above 127 to values above 16, which the shuffle will then map to 0. - // On X86 and WASM, use a logical right shift instead. - Vector128 highNibbles = AdvSimd.IsSupported - ? AdvSimd.ShiftRightArithmetic(source.AsSByte(), 4).AsByte() - : source >>> 4; - - // The bitmapLookup represents a 8x16 table of bits, indicating whether a character is present in the needle. - // Lookup the rows via the lower nibble and the column via the higher nibble. - Vector128 bitMask = Vector128.ShuffleUnsafe(bitmapLookup, lowNibbles); - - // For values above 127, the high nibble will be above 7. We construct the positions vector for the shuffle such that those values map to 0. - Vector128 bitPositions = Vector128.ShuffleUnsafe(Vector128.Create(0x8040201008040201, 0).AsByte(), highNibbles); - - Vector128 result = bitMask & bitPositions; - return result; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx2))] - private static Vector256 IndexOfAnyLookup(Vector256 source0, Vector256 source1, Vector256 bitmapLookup) - where TNegator : struct, INegator - where TOptimizations : struct, IOptimizations - { - Vector256 source = TOptimizations.PackSources(source0.AsUInt16(), source1.AsUInt16()); - - Vector256 result = IndexOfAnyLookupCore(source, bitmapLookup); - - return TNegator.NegateIfNeeded(result); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx2))] - private static Vector256 IndexOfAnyLookupCore(Vector256 source, Vector256 bitmapLookup) - { - // See comments in IndexOfAnyLookupCore(Vector128) above for more details. - Vector256 highNibbles = source >>> 4; - Vector256 bitMask = Avx2.Shuffle(bitmapLookup, source); - Vector256 bitPositions = Avx2.Shuffle(Vector256.Create(0x8040201008040201).AsByte(), highNibbles); - Vector256 result = bitMask & bitPositions; - return result; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Sse2))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - private static Vector128 IndexOfAnyLookupUniqueLowNibble(Vector128 source0, Vector128 source1, Vector128 valuesByLowNibble) - where TNegator : struct, INegator - { - Vector128 source = Default.PackSources(source0.AsUInt16(), source1.AsUInt16()); - - Vector128 result = IndexOfAnyLookupUniqueLowNibbleCore(source, valuesByLowNibble); - - return TNegator.NegateIfNeeded(result); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - private static Vector128 IndexOfAnyLookupUniqueLowNibbleCore(Vector128 source, Vector128 valuesByLowNibble) - { - // Based on http://0x80.pl/articles/simd-byte-lookup.html#special-case-3-unique-lower-and-higher-nibbles - - // On X86, the Ssse3.Shuffle instruction will already perform an implicit 'AND 0xF' on the indices, so we can skip it. - // For values above 127, Ssse3.Shuffle will also set the result to 0. This is fine as we only use this approach if - // all values are <= 127 when Ssse3 is supported (see CanUseUniqueLowNibbleSearch). - // False positives from values mapped to 0 will be ruled out by the Vector128.Equals comparison below. - Vector128 lowNibbles = Ssse3.IsSupported - ? source - : source & Vector128.Create((byte)0xF); - - // We use a shuffle to look up potential matches for each byte based on its low nibble. - // Since all values have a unique low nibble, there's at most one potential match per nibble. - Vector128 values = Vector128.ShuffleUnsafe(valuesByLowNibble, lowNibbles); - - // Compare potential matches with the source to rule out false positives that have a different high nibble. - return Vector128.Equals(source, values); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx2))] - private static Vector256 IndexOfAnyLookupUniqueLowNibble(Vector256 source0, Vector256 source1, Vector256 valuesByLowNibble) - where TNegator : struct, INegator - { - Vector256 source = Default.PackSources(source0.AsUInt16(), source1.AsUInt16()); - - Vector256 result = IndexOfAnyLookupUniqueLowNibbleCore(source, valuesByLowNibble); - - return TNegator.NegateIfNeeded(result); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx2))] - private static Vector256 IndexOfAnyLookupUniqueLowNibbleCore(Vector256 source, Vector256 valuesByLowNibble) - { - // See comments in IndexOfAnyLookupUniqueLowNibbleCore(Vector128) above for more details. - Vector256 values = Avx2.Shuffle(valuesByLowNibble, source); - return Vector256.Equals(source, values); } [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs index 7b05c3f8a3b77c..eb07969130a831 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs @@ -56,7 +56,7 @@ internal override int IndexOfAny(ReadOnlySpan span) { Debug.Assert(_inverseAsciiState.Lookup.Contains(0), "The inverse bitmap did not contain a 0."); - offset = IndexOfAnyAsciiSearcher.IndexOfAny( + offset = IndexOfAnyAsciiSearcher.IndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _inverseAsciiState); @@ -66,7 +66,7 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(span)), Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || !_inverseAsciiState.Lookup.Contains(0), "The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle."); - offset = IndexOfAnyAsciiSearcher.IndexOfAny( + offset = IndexOfAnyAsciiSearcher.IndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _inverseAsciiState); @@ -105,7 +105,7 @@ internal override int IndexOfAnyExcept(ReadOnlySpan span) if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count && char.IsAscii(span[0])) { // Do a regular IndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char. - offset = IndexOfAnyAsciiSearcher.IndexOfAny( + offset = IndexOfAnyAsciiSearcher.IndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _asciiState); @@ -155,7 +155,7 @@ internal override int LastIndexOfAny(ReadOnlySpan span) { Debug.Assert(_inverseAsciiState.Lookup.Contains(0), "The inverse bitmap did not contain a 0."); - offset = IndexOfAnyAsciiSearcher.LastIndexOfAny( + offset = IndexOfAnyAsciiSearcher.LastIndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _inverseAsciiState); @@ -165,7 +165,7 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(span)), Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || !_inverseAsciiState.Lookup.Contains(0), "The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle."); - offset = IndexOfAnyAsciiSearcher.LastIndexOfAny( + offset = IndexOfAnyAsciiSearcher.LastIndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _inverseAsciiState); @@ -194,7 +194,7 @@ internal override int LastIndexOfAnyExcept(ReadOnlySpan span) if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count && char.IsAscii(span[^1])) { // Do a regular LastIndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char. - int offset = IndexOfAnyAsciiSearcher.LastIndexOfAny( + int offset = IndexOfAnyAsciiSearcher.LastIndexOfAny( ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _asciiState); diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs index fe7b2a159dad05..1bd81b6b4ff587 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs @@ -43,9 +43,9 @@ public static SearchValues Create(params ReadOnlySpan values) // Depending on the hardware, UniqueLowNibble can be faster than even range or 2 values. // It's currently consistently faster than 4/5 values on all tested platforms (Arm, Avx2, Avx512). - if (values.Length >= 4 && IndexOfAnyAsciiSearcher.CanUseUniqueLowNibbleSearch(values, minInclusive, maxInclusive)) + if (values.Length >= 4 && IndexOfAnyAsciiSearcher.CanUseUniqueLowNibbleSearch(values, maxInclusive)) { - return new UniqueLowNibbleByteSearchValues(values); + return new AsciiByteSearchValues(values); } if (values.Length <= 5) @@ -62,7 +62,7 @@ public static SearchValues Create(params ReadOnlySpan values) if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && maxInclusive < 128) { - return new AsciiByteSearchValues(values); + return new AsciiByteSearchValues(values); } return new AnyByteSearchValues(values); @@ -149,17 +149,19 @@ public static SearchValues Create(params ReadOnlySpan values) // Depending on the hardware, UniqueLowNibble can be faster than most implementations we currently prefer above. // It's currently consistently faster than 4/5 values or Ascii on all tested platforms (Arm, Avx2, Avx512). - if (IndexOfAnyAsciiSearcher.CanUseUniqueLowNibbleSearch(values, minInclusive, maxInclusive)) + if (IndexOfAnyAsciiSearcher.CanUseUniqueLowNibbleSearch(values, maxInclusive)) { - return new UniqueLowNibbleCharSearchValues(values); + return (Ssse3.IsSupported || PackedSimd.IsSupported) && minInclusive == 0 + ? new AsciiCharSearchValues(values) + : new AsciiCharSearchValues(values); } // IndexOfAnyAsciiSearcher for chars is slower than Any3CharSearchValues, but faster than Any4SearchValues if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && maxInclusive < 128) { return (Ssse3.IsSupported || PackedSimd.IsSupported) && minInclusive == 0 - ? new AsciiCharSearchValues(values) - : new AsciiCharSearchValues(values); + ? new AsciiCharSearchValues(values) + : new AsciiCharSearchValues(values); } if (values.Length == 4) diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/AhoCorasick.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/AhoCorasick.cs index ebc94616ae642c..ad3cdfe2e2837a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/AhoCorasick.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/AhoCorasick.cs @@ -49,7 +49,7 @@ public readonly bool ShouldUseAsciiFastScan for (int i = 0; i < 128; i++) { - if (_startingAsciiChars.Lookup.Contains128((char)i)) + if (_startingAsciiChars.Lookup.Contains256((char)i)) { frequency += CharacterFrequencyHelper.AsciiFrequency[i]; } @@ -96,7 +96,7 @@ private readonly int IndexOfAnyCore(ReadOnly // If '\0' is one of the starting chars and we're running on Ssse3 hardware, this may return false-positives. // False-positives here are okay, we'll just rule them out below. While we could flow the Ssse3AndWasmHandleZeroInNeedle // generic through, we expect such values to be rare enough that introducing more code is not worth it. - int offset = IndexOfAnyAsciiSearcher.IndexOfAny( + int offset = IndexOfAnyAsciiSearcher.IndexOfAny( ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetReference(span), i)), remainingLength, ref Unsafe.AsRef(in _startingAsciiChars)); @@ -205,7 +205,7 @@ private readonly int IndexOfAnyCaseInsensitiveUnicode(ReadOnly if (remainingLength >= Vector128.Count) { - int offset = IndexOfAnyAsciiSearcher.IndexOfAny( + int offset = IndexOfAnyAsciiSearcher.IndexOfAny( ref Unsafe.As(ref Unsafe.Add(ref MemoryMarshal.GetReference(span), i)), remainingLength, ref Unsafe.AsRef(in _startingAsciiChars)); diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleByteSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleByteSearchValues.cs deleted file mode 100644 index af98b0393dbe2e..00000000000000 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleByteSearchValues.cs +++ /dev/null @@ -1,74 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics.Arm; -using System.Runtime.Intrinsics.Wasm; -using System.Runtime.Intrinsics.X86; - -namespace System.Buffers -{ - internal sealed class UniqueLowNibbleByteSearchValues : SearchValues - { - private IndexOfAnyAsciiSearcher.UniqueLowNibbleState _state; - - public UniqueLowNibbleByteSearchValues(ReadOnlySpan values) => - IndexOfAnyAsciiSearcher.ComputeUniqueLowNibbleState(values, out _state); - - internal override byte[] GetValues() => - _state.Lookup.GetByteValues(); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal override bool ContainsCore(byte value) => - _state.Lookup.Contains(value); - - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal override int IndexOfAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.IndexOfAny( - ref MemoryMarshal.GetReference(span), span.Length, ref _state); - - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal override int IndexOfAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.IndexOfAny( - ref MemoryMarshal.GetReference(span), span.Length, ref _state); - - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal override int LastIndexOfAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( - ref MemoryMarshal.GetReference(span), span.Length, ref _state); - - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal override int LastIndexOfAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( - ref MemoryMarshal.GetReference(span), span.Length, ref _state); - - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal override bool ContainsAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.ContainsAny( - ref MemoryMarshal.GetReference(span), span.Length, ref _state); - - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal override bool ContainsAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.ContainsAny( - ref MemoryMarshal.GetReference(span), span.Length, ref _state); - } -} diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleCharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleCharSearchValues.cs deleted file mode 100644 index 857a2cbce484bc..00000000000000 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/UniqueLowNibbleCharSearchValues.cs +++ /dev/null @@ -1,74 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics.Arm; -using System.Runtime.Intrinsics.Wasm; -using System.Runtime.Intrinsics.X86; - -namespace System.Buffers -{ - internal sealed class UniqueLowNibbleCharSearchValues : SearchValues - { - private IndexOfAnyAsciiSearcher.UniqueLowNibbleState _state; - - public UniqueLowNibbleCharSearchValues(ReadOnlySpan values) => - IndexOfAnyAsciiSearcher.ComputeUniqueLowNibbleState(values, out _state); - - internal override char[] GetValues() => - _state.Lookup.GetCharValues(); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal override bool ContainsCore(char value) => - _state.Lookup.Contains256(value); - - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal override int IndexOfAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.IndexOfAny( - ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); - - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal override int IndexOfAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.IndexOfAny( - ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); - - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal override int LastIndexOfAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( - ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); - - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal override int LastIndexOfAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.LastIndexOfAny( - ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); - - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal override bool ContainsAny(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.ContainsAny( - ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); - - [CompExactlyDependsOn(typeof(Ssse3))] - [CompExactlyDependsOn(typeof(AdvSimd))] - [CompExactlyDependsOn(typeof(PackedSimd))] - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal override bool ContainsAnyExcept(ReadOnlySpan span) => - IndexOfAnyAsciiSearcher.ContainsAny( - ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length, ref _state); - } -} From fe3ae6721ad67748fb0c3c8bd80adef915061e1f Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Fri, 6 Sep 2024 19:22:43 +0200 Subject: [PATCH 3/5] Tweak comment --- .../src/System/SearchValues/IndexOfAnyAsciiSearcher.cs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs index c05913dbe291bb..8e47b8b6d81540 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs @@ -152,10 +152,16 @@ public static void ComputeUniqueLowNibbleState(ReadOnlySpan values, out As valuesByLowNibble.SetElementUnsafe(value & 0xF, value); } + // Elements of 'valuesByLowNibble' where no value had that low nibble will be left uninitialized at 0. + // For most, that is okay, as only the zero character in the input could ever match against them, + // but where such input characters will always be mapped to the 0th element of 'valuesByLowNibble'. + // + // That does mean we could still see false positivies if none of the values had a low nibble of zero. + // To avoid that, we can replace the 0th element with any other byte that has a non-zero low nibble. + // The zero character will no longer match, and the new value we pick won't match either as + // it will be mapped to a different element in 'valuesByLowNibble' given its non-zero low nibble. if (valuesByLowNibble.GetElement(0) == 0 && !lookup.Contains(0)) { - // Avoid false positives for the zero character if no other character has a low nibble of zero. - // We can replace it with any other byte that has a non-zero low nibble. valuesByLowNibble.SetElementUnsafe(0, (byte)1); } From 74e1d6e8423bfe8ef8fce550c94e6d494a5584fe Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Fri, 6 Sep 2024 19:30:21 +0200 Subject: [PATCH 4/5] Remove extra empty line --- .../src/System/SearchValues/AsciiCharSearchValues.cs | 1 - .../src/System/SearchValues/IndexOfAnyAsciiSearcher.cs | 1 - 2 files changed, 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiCharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiCharSearchValues.cs index 160920108cdb7f..175c2737b9a7ec 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiCharSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/AsciiCharSearchValues.cs @@ -36,7 +36,6 @@ public AsciiCharSearchValues(ReadOnlySpan values) internal override char[] GetValues() => _state.Lookup.GetCharValues(); - // Despite the name being Ascii, this type may be used for non-ASCII values. [MethodImpl(MethodImplOptions.AggressiveInlining)] internal override bool ContainsCore(char value) => _state.Lookup.Contains256(value); diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs index 8e47b8b6d81540..7c0cac377b40fc 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs @@ -571,7 +571,6 @@ private static TResult IndexOfAnyCore { ref byte currentSearchSpace = ref searchSpace; From 2dfd56b8c5640f87aabbb500e85554c0b4724ddd Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Sun, 8 Sep 2024 23:25:10 -0700 Subject: [PATCH 5/5] Update comment --- .../src/System/SearchValues/IndexOfAnyAsciiSearcher.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs index 7c0cac377b40fc..c722387c4f4e14 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs @@ -107,7 +107,7 @@ public static bool CanUseUniqueLowNibbleSearch(ReadOnlySpan values, int ma if (Ssse3.IsSupported && maxInclusive > 127) { - // We could support [1, 254] if we did the "& 0xF" before calling into Shuffle in IndexOfAnyLookupCore. + // We could support values higher than 127 if we did the "& 0xF" before calling into Shuffle in IndexOfAnyLookupCore. // We currently optimize for the common case of ASCII characters instead, saving an instruction there. return false; }