diff --git a/eng/Version.Details.xml b/eng/Version.Details.xml
index eafe55366b8c95..3ae6be98cda1ce 100644
--- a/eng/Version.Details.xml
+++ b/eng/Version.Details.xml
@@ -292,21 +292,21 @@
https://github.com/dotnet/arcade
cb54ca21431ee8d96f91abfbc42237bcb001f9d1
-
+
https://dev.azure.com/dnceng/internal/_git/dotnet-optimization
- 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b
+ f9ae5c9fda841a26d8eaaa07151ac2618725da87
-
+
https://dev.azure.com/dnceng/internal/_git/dotnet-optimization
- 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b
+ f9ae5c9fda841a26d8eaaa07151ac2618725da87
-
+
https://dev.azure.com/dnceng/internal/_git/dotnet-optimization
- 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b
+ f9ae5c9fda841a26d8eaaa07151ac2618725da87
-
+
https://dev.azure.com/dnceng/internal/_git/dotnet-optimization
- 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b
+ f9ae5c9fda841a26d8eaaa07151ac2618725da87
https://github.com/dotnet/hotreload-utils
@@ -340,13 +340,13 @@
https://github.com/dotnet/sdk
2fd62c3936f5336b836f6b12df170aa0e90da767
-
+
https://dev.azure.com/dnceng/internal/_git/dotnet-optimization
- 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b
+ f9ae5c9fda841a26d8eaaa07151ac2618725da87
-
+
https://dev.azure.com/dnceng/internal/_git/dotnet-optimization
- 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b
+ f9ae5c9fda841a26d8eaaa07151ac2618725da87
diff --git a/eng/Versions.props b/eng/Versions.props
index f8940b01835a10..0d7dad51197dbf 100644
--- a/eng/Versions.props
+++ b/eng/Versions.props
@@ -158,12 +158,12 @@
8.0.0-beta.23179.4
8.0.0-beta.23179.4
- 1.0.0-prerelease.23175.4
- 1.0.0-prerelease.23175.4
- 1.0.0-prerelease.23175.4
- 1.0.0-prerelease.23175.4
- 1.0.0-prerelease.23175.4
- 1.0.0-prerelease.23175.4
+ 1.0.0-prerelease.23220.15
+ 1.0.0-prerelease.23220.15
+ 1.0.0-prerelease.23220.15
+ 1.0.0-prerelease.23220.15
+ 1.0.0-prerelease.23220.15
+ 1.0.0-prerelease.23220.15
16.11.23-beta1.23063.1
2.0.0-beta4.22564.1
diff --git a/src/libraries/System.Private.CoreLib/src/System/IndexOfAnyValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/IndexOfAnyValues/IndexOfAnyAsciiSearcher.cs
index c39639d24f8264..92b588d06f4942 100644
--- a/src/libraries/System.Private.CoreLib/src/System/IndexOfAnyValues/IndexOfAnyAsciiSearcher.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/IndexOfAnyValues/IndexOfAnyAsciiSearcher.cs
@@ -195,7 +195,12 @@ internal static int IndexOfAnyVectorized(ref short sea
Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256);
if (result != Vector256.Zero)
{
- return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
+ result = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte();
+
+ uint mask = TNegator.ExtractMask(result);
+
+ int offsetInVector = BitOperations.TrailingZeroCount(mask);
+ return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(short));
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count);
@@ -219,7 +224,18 @@ internal static int IndexOfAnyVectorized(ref short sea
Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256);
if (result != Vector256.Zero)
{
- return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
+ result = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte();
+
+ uint mask = TNegator.ExtractMask(result);
+
+ int offsetInVector = BitOperations.TrailingZeroCount(mask);
+ if (offsetInVector >= Vector256.Count)
+ {
+ // We matched within the second vector
+ firstVector = ref oneVectorAwayFromEnd;
+ offsetInVector -= Vector256.Count;
+ }
+ return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / (nuint)sizeof(short));
}
}
@@ -307,7 +323,12 @@ internal static int LastIndexOfAnyVectorized(ref short
Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256);
if (result != Vector256.Zero)
{
- return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result);
+ result = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte();
+
+ uint mask = TNegator.ExtractMask(result);
+
+ int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask);
+ return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(short));
}
}
while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref twoVectorsAfterStart));
@@ -329,7 +350,18 @@ internal static int LastIndexOfAnyVectorized(ref short
Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256);
if (result != Vector256.Zero)
{
- return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result);
+ result = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte();
+
+ uint mask = TNegator.ExtractMask(result);
+
+ int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask);
+ if (offsetInVector < Vector256.Count)
+ {
+ return offsetInVector;
+ }
+
+ // We matched within the second vector
+ return offsetInVector - Vector256.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(short));
}
}
@@ -411,7 +443,10 @@ internal static int IndexOfAnyVectorized(ref byte searchSpace, int sea
Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256));
if (result != Vector256.Zero)
{
- return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
+ uint mask = TNegator.ExtractMask(result);
+
+ int offsetInVector = BitOperations.TrailingZeroCount(mask);
+ return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(byte));
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector256.Count);
@@ -436,7 +471,16 @@ internal static int IndexOfAnyVectorized(ref byte searchSpace, int sea
Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256));
if (result != Vector256.Zero)
{
- return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result);
+ uint mask = TNegator.ExtractMask(result);
+
+ int offsetInVector = BitOperations.TrailingZeroCount(mask);
+ if (offsetInVector >= Vector256.Count)
+ {
+ // We matched within the second vector
+ firstVector = ref halfVectorAwayFromEnd;
+ offsetInVector -= Vector256.Count;
+ }
+ return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / (nuint)sizeof(byte));
}
}
@@ -518,7 +562,10 @@ internal static int LastIndexOfAnyVectorized(ref byte searchSpace, int
Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256));
if (result != Vector256.Zero)
{
- return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result);
+ uint mask = TNegator.ExtractMask(result);
+
+ int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask);
+ return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(byte));
}
}
while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart));
@@ -541,7 +588,16 @@ internal static int LastIndexOfAnyVectorized(ref byte searchSpace, int
Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256));
if (result != Vector256.Zero)
{
- return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result);
+ uint mask = TNegator.ExtractMask(result);
+
+ int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask);
+ if (offsetInVector < Vector256.Count)
+ {
+ return offsetInVector;
+ }
+
+ // We matched within the second vector
+ return offsetInVector - Vector256.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(byte));
}
}
@@ -622,7 +678,10 @@ internal static int IndexOfAnyVectorized(ref byte searchSpace, int sea
Vector256 result = IndexOfAnyLookup(source, bitmap256_0, bitmap256_1);
if (result != Vector256.Zero)
{
- return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
+ uint mask = TNegator.ExtractMask(result);
+
+ int offsetInVector = BitOperations.TrailingZeroCount(mask);
+ return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(byte));
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector256.Count);
@@ -647,7 +706,16 @@ internal static int IndexOfAnyVectorized(ref byte searchSpace, int sea
Vector256 result = IndexOfAnyLookup(source, bitmap256_0, bitmap256_1);
if (result != Vector256.Zero)
{
- return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result);
+ uint mask = TNegator.ExtractMask(result);
+
+ int offsetInVector = BitOperations.TrailingZeroCount(mask);
+ if (offsetInVector >= Vector256.Count)
+ {
+ // We matched within the second vector
+ firstVector = ref halfVectorAwayFromEnd;
+ offsetInVector -= Vector256.Count;
+ }
+ return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / (nuint)sizeof(byte));
}
}
@@ -730,7 +798,10 @@ internal static int LastIndexOfAnyVectorized(ref byte searchSpace, int
Vector256 result = IndexOfAnyLookup(source, bitmap256_0, bitmap256_1);
if (result != Vector256.Zero)
{
- return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result);
+ uint mask = TNegator.ExtractMask(result);
+
+ int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask);
+ return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(byte));
}
}
while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart));
@@ -753,7 +824,16 @@ internal static int LastIndexOfAnyVectorized(ref byte searchSpace, int
Vector256 result = IndexOfAnyLookup(source, bitmap256_0, bitmap256_1);
if (result != Vector256.Zero)
{
- return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result);
+ uint mask = TNegator.ExtractMask(result);
+
+ int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask);
+ if (offsetInVector < Vector256.Count)
+ {
+ return offsetInVector;
+ }
+
+ // We matched within the second vector
+ return offsetInVector - Vector256.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(byte));
}
}
@@ -992,89 +1072,6 @@ private static unsafe int ComputeLastIndexOverlapped(ref T searchSp
return offsetInVector - Vector128.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(T));
}
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static unsafe int ComputeFirstIndex(ref T searchSpace, ref T current, Vector256 result)
- where TNegator : struct, INegator
- {
- if (typeof(T) == typeof(short))
- {
- result = FixUpPackedVector256Result(result);
- }
-
- uint mask = TNegator.ExtractMask(result);
-
- int offsetInVector = BitOperations.TrailingZeroCount(mask);
- return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T));
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static unsafe int ComputeFirstIndexOverlapped(ref T searchSpace, ref T current0, ref T current1, Vector256 result)
- where TNegator : struct, INegator
- {
- if (typeof(T) == typeof(short))
- {
- result = FixUpPackedVector256Result(result);
- }
-
- uint mask = TNegator.ExtractMask(result);
-
- int offsetInVector = BitOperations.TrailingZeroCount(mask);
- if (offsetInVector >= Vector256.Count)
- {
- // We matched within the second vector
- current0 = ref current1;
- offsetInVector -= Vector256.Count;
- }
- return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current0) / (nuint)sizeof(T));
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static unsafe int ComputeLastIndex(ref T searchSpace, ref T current, Vector256 result)
- where TNegator : struct, INegator
- {
- if (typeof(T) == typeof(short))
- {
- result = FixUpPackedVector256Result(result);
- }
-
- uint mask = TNegator.ExtractMask(result);
-
- int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask);
- return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T));
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static unsafe int ComputeLastIndexOverlapped(ref T searchSpace, ref T secondVector, Vector256 result)
- where TNegator : struct, INegator
- {
- if (typeof(T) == typeof(short))
- {
- result = FixUpPackedVector256Result(result);
- }
-
- uint mask = TNegator.ExtractMask(result);
-
- int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask);
- if (offsetInVector < Vector256.Count)
- {
- return offsetInVector;
- }
-
- // We matched within the second vector
- return offsetInVector - Vector256.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(T));
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static Vector256 FixUpPackedVector256Result(Vector256 result)
- {
- Debug.Assert(Avx2.IsSupported);
- // Avx2.PackUnsignedSaturate(Vector256.Create((short)1), Vector256.Create((short)2)) will result in
- // 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2
- // We want to swap the X and Y bits
- // 1, 1, 1, 1, 1, 1, 1, 1, X, X, X, X, X, X, X, X, Y, Y, Y, Y, Y, Y, Y, Y, 2, 2, 2, 2, 2, 2, 2, 2
- return Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte();
- }
-
internal interface INegator
{
static abstract bool NegateIfNeeded(bool result);
diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs
index 6da69bb7ebe630..fa1fec132b12e9 100644
--- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs
@@ -120,7 +120,7 @@ public static bool Contains(ref short searchSpace, short value, int length)
{
Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count);
- Vector256 packedSource = PackSources(source0, source1);
+ Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte();
Vector256 result = Vector256.Equals(packedValue, packedSource);
if (result != Vector256.Zero)
@@ -144,7 +144,7 @@ public static bool Contains(ref short searchSpace, short value, int length)
Vector256 source0 = Vector256.LoadUnsafe(ref firstVector);
Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd);
- Vector256 packedSource = PackSources(source0, source1);
+ Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte();
Vector256 result = Vector256.Equals(packedValue, packedSource);
if (result != Vector256.Zero)
@@ -257,13 +257,16 @@ private static int IndexOf(ref short searchSpace, short value, int len
{
Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count);
- Vector256 packedSource = PackSources(source0, source1);
+ Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte();
Vector256 result = Vector256.Equals(packedValue, packedSource);
result = NegateIfNeeded(result);
if (result != Vector256.Zero)
{
- return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
+ uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits();
+
+ int index = BitOperations.TrailingZeroCount(notEqualsElements);
+ return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / sizeof(short));
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count);
@@ -282,13 +285,22 @@ private static int IndexOf(ref short searchSpace, short value, int len
Vector256 source0 = Vector256.LoadUnsafe(ref firstVector);
Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd);
- Vector256 packedSource = PackSources(source0, source1);
+ Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte();
Vector256 result = Vector256.Equals(packedValue, packedSource);
result = NegateIfNeeded(result);
if (result != Vector256.Zero)
{
- return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
+ uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits();
+
+ int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements);
+ if (offsetInVector >= Vector256.Count)
+ {
+ // We matched within the second vector
+ firstVector = ref oneVectorAwayFromEnd;
+ offsetInVector -= Vector256.Count;
+ }
+ return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / sizeof(short));
}
}
}
@@ -406,13 +418,16 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho
{
Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count);
- Vector256 packedSource = PackSources(source0, source1);
+ Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte();
Vector256 result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource);
result = NegateIfNeeded(result);
if (result != Vector256.Zero)
{
- return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
+ uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits();
+
+ int index = BitOperations.TrailingZeroCount(notEqualsElements);
+ return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / sizeof(short));
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count);
@@ -431,13 +446,22 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho
Vector256 source0 = Vector256.LoadUnsafe(ref firstVector);
Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd);
- Vector256 packedSource = PackSources(source0, source1);
+ Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte();
Vector256 result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource);
result = NegateIfNeeded(result);
if (result != Vector256.Zero)
{
- return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
+ uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits();
+
+ int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements);
+ if (offsetInVector >= Vector256.Count)
+ {
+ // We matched within the second vector
+ firstVector = ref oneVectorAwayFromEnd;
+ offsetInVector -= Vector256.Count;
+ }
+ return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / sizeof(short));
}
}
}
@@ -558,13 +582,16 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho
{
Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count);
- Vector256 packedSource = PackSources(source0, source1);
+ Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte();
Vector256 result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource) | Vector256.Equals(packedValue2, packedSource);
result = NegateIfNeeded(result);
if (result != Vector256.Zero)
{
- return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
+ uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits();
+
+ int index = BitOperations.TrailingZeroCount(notEqualsElements);
+ return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / sizeof(short));
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count);
@@ -583,13 +610,22 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho
Vector256 source0 = Vector256.LoadUnsafe(ref firstVector);
Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd);
- Vector256 packedSource = PackSources(source0, source1);
+ Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte();
Vector256 result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource) | Vector256.Equals(packedValue2, packedSource);
result = NegateIfNeeded(result);
if (result != Vector256.Zero)
{
- return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
+ uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits();
+
+ int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements);
+ if (offsetInVector >= Vector256.Count)
+ {
+ // We matched within the second vector
+ firstVector = ref oneVectorAwayFromEnd;
+ offsetInVector -= Vector256.Count;
+ }
+ return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / sizeof(short));
}
}
}
@@ -692,13 +728,16 @@ private static int IndexOfAnyInRange(ref short searchSpace, short lowI
{
Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count);
- Vector256 packedSource = PackSources(source0, source1);
+ Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte();
Vector256 result = Vector256.LessThanOrEqual(packedSource - lowVector, rangeVector);
result = NegateIfNeeded(result);
if (result != Vector256.Zero)
{
- return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result);
+ uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits();
+
+ int index = BitOperations.TrailingZeroCount(notEqualsElements);
+ return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / sizeof(short));
}
currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count);
@@ -717,13 +756,22 @@ private static int IndexOfAnyInRange(ref short searchSpace, short lowI
Vector256 source0 = Vector256.LoadUnsafe(ref firstVector);
Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd);
- Vector256 packedSource = PackSources(source0, source1);
+ Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte();
Vector256 result = Vector256.LessThanOrEqual(packedSource - lowVector, rangeVector);
result = NegateIfNeeded(result);
if (result != Vector256.Zero)
{
- return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
+ uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits();
+
+ int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements);
+ if (offsetInVector >= Vector256.Count)
+ {
+ // We matched within the second vector
+ firstVector = ref oneVectorAwayFromEnd;
+ offsetInVector -= Vector256.Count;
+ }
+ return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / sizeof(short));
}
}
}
@@ -784,16 +832,11 @@ private static int IndexOfAnyInRange(ref short searchSpace, short lowI
return -1;
}
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static Vector256 PackSources(Vector256 source0, Vector256 source1)
- {
- Debug.Assert(Avx2.IsSupported);
- // Pack two vectors of characters into bytes. While the type is Vector256, these are really UInt16 characters.
- // X86: Downcast every character using saturation.
- // - Values <= 32767 result in min(value, 255).
- // - Values > 32767 result in 0. Because of this we can't accept needles that contain 0.
- return Avx2.PackUnsignedSaturate(source0, source1).AsByte();
- }
+ // Note: Avx2.PackUnsignedSaturate can't be extracted into a helper function that assumes
+ // Avx2 support, because this would violate rules for System.Private.CoreLib intrinsics use.
+ // With R2R and tiered compilation, it is possible for the helper to be prejitted without Avx2 support,
+ // but called from a rejitted caller that goes down the Avx2 path.
+ // Sse2 is always supported in crossgen, so can be extracted to a helper.
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128 PackSources(Vector128 source0, Vector128 source1)
@@ -824,14 +867,6 @@ private static int ComputeFirstIndex(ref short searchSpace, ref short current, V
return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / sizeof(short));
}
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static int ComputeFirstIndex(ref short searchSpace, ref short current, Vector256 equals)
- {
- uint notEqualsElements = FixUpPackedVector256Result(equals).ExtractMostSignificantBits();
- int index = BitOperations.TrailingZeroCount(notEqualsElements);
- return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / sizeof(short));
- }
-
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int ComputeFirstIndexOverlapped(ref short searchSpace, ref short current0, ref short current1, Vector128 equals)
{
@@ -845,30 +880,5 @@ private static int ComputeFirstIndexOverlapped(ref short searchSpace, ref short
}
return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current0) / sizeof(short));
}
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static int ComputeFirstIndexOverlapped(ref short searchSpace, ref short current0, ref short current1, Vector256 equals)
- {
- uint notEqualsElements = FixUpPackedVector256Result(equals).ExtractMostSignificantBits();
- int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements);
- if (offsetInVector >= Vector256.Count)
- {
- // We matched within the second vector
- current0 = ref current1;
- offsetInVector -= Vector256.Count;
- }
- return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current0) / sizeof(short));
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static Vector256 FixUpPackedVector256Result(Vector256 result)
- {
- Debug.Assert(Avx2.IsSupported);
- // Avx2.PackUnsignedSaturate(Vector256.Create((short)1), Vector256.Create((short)2)) will result in
- // 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2
- // We want to swap the X and Y bits
- // 1, 1, 1, 1, 1, 1, 1, 1, X, X, X, X, X, X, X, X, Y, Y, Y, Y, Y, Y, Y, Y, 2, 2, 2, 2, 2, 2, 2, 2
- return Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte();
- }
}
}