diff --git a/eng/Version.Details.xml b/eng/Version.Details.xml index eafe55366b8c95..3ae6be98cda1ce 100644 --- a/eng/Version.Details.xml +++ b/eng/Version.Details.xml @@ -292,21 +292,21 @@ https://github.com/dotnet/arcade cb54ca21431ee8d96f91abfbc42237bcb001f9d1 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b + f9ae5c9fda841a26d8eaaa07151ac2618725da87 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b + f9ae5c9fda841a26d8eaaa07151ac2618725da87 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b + f9ae5c9fda841a26d8eaaa07151ac2618725da87 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b + f9ae5c9fda841a26d8eaaa07151ac2618725da87 https://github.com/dotnet/hotreload-utils @@ -340,13 +340,13 @@ https://github.com/dotnet/sdk 2fd62c3936f5336b836f6b12df170aa0e90da767 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b + f9ae5c9fda841a26d8eaaa07151ac2618725da87 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b + f9ae5c9fda841a26d8eaaa07151ac2618725da87 diff --git a/eng/Versions.props b/eng/Versions.props index f8940b01835a10..0d7dad51197dbf 100644 --- a/eng/Versions.props +++ b/eng/Versions.props @@ -158,12 +158,12 @@ 8.0.0-beta.23179.4 8.0.0-beta.23179.4 - 1.0.0-prerelease.23175.4 - 1.0.0-prerelease.23175.4 - 1.0.0-prerelease.23175.4 - 1.0.0-prerelease.23175.4 - 1.0.0-prerelease.23175.4 - 1.0.0-prerelease.23175.4 + 1.0.0-prerelease.23220.15 + 1.0.0-prerelease.23220.15 + 1.0.0-prerelease.23220.15 + 1.0.0-prerelease.23220.15 + 1.0.0-prerelease.23220.15 + 1.0.0-prerelease.23220.15 16.11.23-beta1.23063.1 2.0.0-beta4.22564.1 diff --git a/src/libraries/System.Private.CoreLib/src/System/IndexOfAnyValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/IndexOfAnyValues/IndexOfAnyAsciiSearcher.cs index c39639d24f8264..92b588d06f4942 100644 --- a/src/libraries/System.Private.CoreLib/src/System/IndexOfAnyValues/IndexOfAnyAsciiSearcher.cs +++ b/src/libraries/System.Private.CoreLib/src/System/IndexOfAnyValues/IndexOfAnyAsciiSearcher.cs @@ -195,7 +195,12 @@ internal static int IndexOfAnyVectorized(ref short sea Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { - return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result); + result = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte(); + + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = BitOperations.TrailingZeroCount(mask); + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(short)); } currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count); @@ -219,7 +224,18 @@ internal static int IndexOfAnyVectorized(ref short sea Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { - return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); + result = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte(); + + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = BitOperations.TrailingZeroCount(mask); + if (offsetInVector >= Vector256.Count) + { + // We matched within the second vector + firstVector = ref oneVectorAwayFromEnd; + offsetInVector -= Vector256.Count; + } + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / (nuint)sizeof(short)); } } @@ -307,7 +323,12 @@ internal static int LastIndexOfAnyVectorized(ref short Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { - return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); + result = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte(); + + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(short)); } } while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref twoVectorsAfterStart)); @@ -329,7 +350,18 @@ internal static int LastIndexOfAnyVectorized(ref short Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { - return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); + result = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte(); + + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); + if (offsetInVector < Vector256.Count) + { + return offsetInVector; + } + + // We matched within the second vector + return offsetInVector - Vector256.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(short)); } } @@ -411,7 +443,10 @@ internal static int IndexOfAnyVectorized(ref byte searchSpace, int sea Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { - return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = BitOperations.TrailingZeroCount(mask); + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(byte)); } currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector256.Count); @@ -436,7 +471,16 @@ internal static int IndexOfAnyVectorized(ref byte searchSpace, int sea Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { - return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = BitOperations.TrailingZeroCount(mask); + if (offsetInVector >= Vector256.Count) + { + // We matched within the second vector + firstVector = ref halfVectorAwayFromEnd; + offsetInVector -= Vector256.Count; + } + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / (nuint)sizeof(byte)); } } @@ -518,7 +562,10 @@ internal static int LastIndexOfAnyVectorized(ref byte searchSpace, int Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { - return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(byte)); } } while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart)); @@ -541,7 +588,16 @@ internal static int LastIndexOfAnyVectorized(ref byte searchSpace, int Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { - return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); + if (offsetInVector < Vector256.Count) + { + return offsetInVector; + } + + // We matched within the second vector + return offsetInVector - Vector256.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(byte)); } } @@ -622,7 +678,10 @@ internal static int IndexOfAnyVectorized(ref byte searchSpace, int sea Vector256 result = IndexOfAnyLookup(source, bitmap256_0, bitmap256_1); if (result != Vector256.Zero) { - return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = BitOperations.TrailingZeroCount(mask); + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(byte)); } currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector256.Count); @@ -647,7 +706,16 @@ internal static int IndexOfAnyVectorized(ref byte searchSpace, int sea Vector256 result = IndexOfAnyLookup(source, bitmap256_0, bitmap256_1); if (result != Vector256.Zero) { - return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = BitOperations.TrailingZeroCount(mask); + if (offsetInVector >= Vector256.Count) + { + // We matched within the second vector + firstVector = ref halfVectorAwayFromEnd; + offsetInVector -= Vector256.Count; + } + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / (nuint)sizeof(byte)); } } @@ -730,7 +798,10 @@ internal static int LastIndexOfAnyVectorized(ref byte searchSpace, int Vector256 result = IndexOfAnyLookup(source, bitmap256_0, bitmap256_1); if (result != Vector256.Zero) { - return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(byte)); } } while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart)); @@ -753,7 +824,16 @@ internal static int LastIndexOfAnyVectorized(ref byte searchSpace, int Vector256 result = IndexOfAnyLookup(source, bitmap256_0, bitmap256_1); if (result != Vector256.Zero) { - return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); + if (offsetInVector < Vector256.Count) + { + return offsetInVector; + } + + // We matched within the second vector + return offsetInVector - Vector256.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(byte)); } } @@ -992,89 +1072,6 @@ private static unsafe int ComputeLastIndexOverlapped(ref T searchSp return offsetInVector - Vector128.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(T)); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe int ComputeFirstIndex(ref T searchSpace, ref T current, Vector256 result) - where TNegator : struct, INegator - { - if (typeof(T) == typeof(short)) - { - result = FixUpPackedVector256Result(result); - } - - uint mask = TNegator.ExtractMask(result); - - int offsetInVector = BitOperations.TrailingZeroCount(mask); - return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe int ComputeFirstIndexOverlapped(ref T searchSpace, ref T current0, ref T current1, Vector256 result) - where TNegator : struct, INegator - { - if (typeof(T) == typeof(short)) - { - result = FixUpPackedVector256Result(result); - } - - uint mask = TNegator.ExtractMask(result); - - int offsetInVector = BitOperations.TrailingZeroCount(mask); - if (offsetInVector >= Vector256.Count) - { - // We matched within the second vector - current0 = ref current1; - offsetInVector -= Vector256.Count; - } - return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current0) / (nuint)sizeof(T)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe int ComputeLastIndex(ref T searchSpace, ref T current, Vector256 result) - where TNegator : struct, INegator - { - if (typeof(T) == typeof(short)) - { - result = FixUpPackedVector256Result(result); - } - - uint mask = TNegator.ExtractMask(result); - - int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); - return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe int ComputeLastIndexOverlapped(ref T searchSpace, ref T secondVector, Vector256 result) - where TNegator : struct, INegator - { - if (typeof(T) == typeof(short)) - { - result = FixUpPackedVector256Result(result); - } - - uint mask = TNegator.ExtractMask(result); - - int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); - if (offsetInVector < Vector256.Count) - { - return offsetInVector; - } - - // We matched within the second vector - return offsetInVector - Vector256.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(T)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 FixUpPackedVector256Result(Vector256 result) - { - Debug.Assert(Avx2.IsSupported); - // Avx2.PackUnsignedSaturate(Vector256.Create((short)1), Vector256.Create((short)2)) will result in - // 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 - // We want to swap the X and Y bits - // 1, 1, 1, 1, 1, 1, 1, 1, X, X, X, X, X, X, X, X, Y, Y, Y, Y, Y, Y, Y, Y, 2, 2, 2, 2, 2, 2, 2, 2 - return Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte(); - } - internal interface INegator { static abstract bool NegateIfNeeded(bool result); diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs index 6da69bb7ebe630..fa1fec132b12e9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs @@ -120,7 +120,7 @@ public static bool Contains(ref short searchSpace, short value, int length) { Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue, packedSource); if (result != Vector256.Zero) @@ -144,7 +144,7 @@ public static bool Contains(ref short searchSpace, short value, int length) Vector256 source0 = Vector256.LoadUnsafe(ref firstVector); Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue, packedSource); if (result != Vector256.Zero) @@ -257,13 +257,16 @@ private static int IndexOf(ref short searchSpace, short value, int len { Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue, packedSource); result = NegateIfNeeded(result); if (result != Vector256.Zero) { - return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int index = BitOperations.TrailingZeroCount(notEqualsElements); + return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / sizeof(short)); } currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count); @@ -282,13 +285,22 @@ private static int IndexOf(ref short searchSpace, short value, int len Vector256 source0 = Vector256.LoadUnsafe(ref firstVector); Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue, packedSource); result = NegateIfNeeded(result); if (result != Vector256.Zero) { - return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements); + if (offsetInVector >= Vector256.Count) + { + // We matched within the second vector + firstVector = ref oneVectorAwayFromEnd; + offsetInVector -= Vector256.Count; + } + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / sizeof(short)); } } } @@ -406,13 +418,16 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho { Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource); result = NegateIfNeeded(result); if (result != Vector256.Zero) { - return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int index = BitOperations.TrailingZeroCount(notEqualsElements); + return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / sizeof(short)); } currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count); @@ -431,13 +446,22 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho Vector256 source0 = Vector256.LoadUnsafe(ref firstVector); Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource); result = NegateIfNeeded(result); if (result != Vector256.Zero) { - return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements); + if (offsetInVector >= Vector256.Count) + { + // We matched within the second vector + firstVector = ref oneVectorAwayFromEnd; + offsetInVector -= Vector256.Count; + } + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / sizeof(short)); } } } @@ -558,13 +582,16 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho { Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource) | Vector256.Equals(packedValue2, packedSource); result = NegateIfNeeded(result); if (result != Vector256.Zero) { - return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int index = BitOperations.TrailingZeroCount(notEqualsElements); + return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / sizeof(short)); } currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count); @@ -583,13 +610,22 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho Vector256 source0 = Vector256.LoadUnsafe(ref firstVector); Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource) | Vector256.Equals(packedValue2, packedSource); result = NegateIfNeeded(result); if (result != Vector256.Zero) { - return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements); + if (offsetInVector >= Vector256.Count) + { + // We matched within the second vector + firstVector = ref oneVectorAwayFromEnd; + offsetInVector -= Vector256.Count; + } + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / sizeof(short)); } } } @@ -692,13 +728,16 @@ private static int IndexOfAnyInRange(ref short searchSpace, short lowI { Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.LessThanOrEqual(packedSource - lowVector, rangeVector); result = NegateIfNeeded(result); if (result != Vector256.Zero) { - return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int index = BitOperations.TrailingZeroCount(notEqualsElements); + return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / sizeof(short)); } currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count); @@ -717,13 +756,22 @@ private static int IndexOfAnyInRange(ref short searchSpace, short lowI Vector256 source0 = Vector256.LoadUnsafe(ref firstVector); Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.LessThanOrEqual(packedSource - lowVector, rangeVector); result = NegateIfNeeded(result); if (result != Vector256.Zero) { - return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements); + if (offsetInVector >= Vector256.Count) + { + // We matched within the second vector + firstVector = ref oneVectorAwayFromEnd; + offsetInVector -= Vector256.Count; + } + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / sizeof(short)); } } } @@ -784,16 +832,11 @@ private static int IndexOfAnyInRange(ref short searchSpace, short lowI return -1; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 PackSources(Vector256 source0, Vector256 source1) - { - Debug.Assert(Avx2.IsSupported); - // Pack two vectors of characters into bytes. While the type is Vector256, these are really UInt16 characters. - // X86: Downcast every character using saturation. - // - Values <= 32767 result in min(value, 255). - // - Values > 32767 result in 0. Because of this we can't accept needles that contain 0. - return Avx2.PackUnsignedSaturate(source0, source1).AsByte(); - } + // Note: Avx2.PackUnsignedSaturate can't be extracted into a helper function that assumes + // Avx2 support, because this would violate rules for System.Private.CoreLib intrinsics use. + // With R2R and tiered compilation, it is possible for the helper to be prejitted without Avx2 support, + // but called from a rejitted caller that goes down the Avx2 path. + // Sse2 is always supported in crossgen, so can be extracted to a helper. [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128 PackSources(Vector128 source0, Vector128 source1) @@ -824,14 +867,6 @@ private static int ComputeFirstIndex(ref short searchSpace, ref short current, V return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / sizeof(short)); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int ComputeFirstIndex(ref short searchSpace, ref short current, Vector256 equals) - { - uint notEqualsElements = FixUpPackedVector256Result(equals).ExtractMostSignificantBits(); - int index = BitOperations.TrailingZeroCount(notEqualsElements); - return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / sizeof(short)); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int ComputeFirstIndexOverlapped(ref short searchSpace, ref short current0, ref short current1, Vector128 equals) { @@ -845,30 +880,5 @@ private static int ComputeFirstIndexOverlapped(ref short searchSpace, ref short } return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current0) / sizeof(short)); } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int ComputeFirstIndexOverlapped(ref short searchSpace, ref short current0, ref short current1, Vector256 equals) - { - uint notEqualsElements = FixUpPackedVector256Result(equals).ExtractMostSignificantBits(); - int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements); - if (offsetInVector >= Vector256.Count) - { - // We matched within the second vector - current0 = ref current1; - offsetInVector -= Vector256.Count; - } - return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current0) / sizeof(short)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 FixUpPackedVector256Result(Vector256 result) - { - Debug.Assert(Avx2.IsSupported); - // Avx2.PackUnsignedSaturate(Vector256.Create((short)1), Vector256.Create((short)2)) will result in - // 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 - // We want to swap the X and Y bits - // 1, 1, 1, 1, 1, 1, 1, 1, X, X, X, X, X, X, X, X, Y, Y, Y, Y, Y, Y, Y, Y, 2, 2, 2, 2, 2, 2, 2, 2 - return Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte(); - } } }