From 200f93e856e85b029bb45f9a9596a56d83f681a1 Mon Sep 17 00:00:00 2001 From: "dotnet-maestro[bot]" Date: Thu, 20 Apr 2023 23:02:01 +0000 Subject: [PATCH 1/5] Update dependencies from https://dev.azure.com/dnceng/internal/_git/dotnet-optimization build 20230420.15 optimization.linux-arm64.MIBC.Runtime , optimization.linux-x64.MIBC.Runtime , optimization.windows_nt-arm64.MIBC.Runtime , optimization.windows_nt-x64.MIBC.Runtime , optimization.windows_nt-x86.MIBC.Runtime , optimization.PGO.CoreCLR From Version 1.0.0-prerelease.23175.4 -> To Version 1.0.0-prerelease.23220.15 --- eng/Version.Details.xml | 24 ++++++++++++------------ eng/Versions.props | 12 ++++++------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/eng/Version.Details.xml b/eng/Version.Details.xml index eafe55366b8c95..3ae6be98cda1ce 100644 --- a/eng/Version.Details.xml +++ b/eng/Version.Details.xml @@ -292,21 +292,21 @@ https://github.com/dotnet/arcade cb54ca21431ee8d96f91abfbc42237bcb001f9d1 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b + f9ae5c9fda841a26d8eaaa07151ac2618725da87 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b + f9ae5c9fda841a26d8eaaa07151ac2618725da87 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b + f9ae5c9fda841a26d8eaaa07151ac2618725da87 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b + f9ae5c9fda841a26d8eaaa07151ac2618725da87 https://github.com/dotnet/hotreload-utils @@ -340,13 +340,13 @@ https://github.com/dotnet/sdk 2fd62c3936f5336b836f6b12df170aa0e90da767 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b + f9ae5c9fda841a26d8eaaa07151ac2618725da87 - + https://dev.azure.com/dnceng/internal/_git/dotnet-optimization - 103c1eaca9ad80cdd1746abfb97c7f3c9d0b0f3b + f9ae5c9fda841a26d8eaaa07151ac2618725da87 diff --git a/eng/Versions.props b/eng/Versions.props index f8940b01835a10..0d7dad51197dbf 100644 --- a/eng/Versions.props +++ b/eng/Versions.props @@ -158,12 +158,12 @@ 8.0.0-beta.23179.4 8.0.0-beta.23179.4 - 1.0.0-prerelease.23175.4 - 1.0.0-prerelease.23175.4 - 1.0.0-prerelease.23175.4 - 1.0.0-prerelease.23175.4 - 1.0.0-prerelease.23175.4 - 1.0.0-prerelease.23175.4 + 1.0.0-prerelease.23220.15 + 1.0.0-prerelease.23220.15 + 1.0.0-prerelease.23220.15 + 1.0.0-prerelease.23220.15 + 1.0.0-prerelease.23220.15 + 1.0.0-prerelease.23220.15 16.11.23-beta1.23063.1 2.0.0-beta4.22564.1 From ab43da48db7e4f0fc9ea36212de0866bc5dc61fe Mon Sep 17 00:00:00 2001 From: Sven Boemer Date: Fri, 21 Apr 2023 19:57:50 +0000 Subject: [PATCH 2/5] Re-enable PGO --- eng/nativepgo.targets | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eng/nativepgo.targets b/eng/nativepgo.targets index cfd41d5e975a8f..fd289cc83a7131 100644 --- a/eng/nativepgo.targets +++ b/eng/nativepgo.targets @@ -2,7 +2,7 @@ true - false + true false false From 71730303fd19d606f265dd0b27ad97c746a2cf71 Mon Sep 17 00:00:00 2001 From: Sven Boemer Date: Mon, 24 Apr 2023 17:35:05 +0000 Subject: [PATCH 3/5] Revert "Re-enable PGO" This reverts commit ab43da48db7e4f0fc9ea36212de0866bc5dc61fe. --- eng/nativepgo.targets | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eng/nativepgo.targets b/eng/nativepgo.targets index fd289cc83a7131..cfd41d5e975a8f 100644 --- a/eng/nativepgo.targets +++ b/eng/nativepgo.targets @@ -2,7 +2,7 @@ true - true + false false false From 0342acc202d56149ac1c5bbe5f6c67410937b183 Mon Sep 17 00:00:00 2001 From: Sven Boemer Date: Mon, 24 Apr 2023 17:53:04 +0000 Subject: [PATCH 4/5] Fix Avx2 intrinsics --- .../src/System/SpanHelpers.Packed.cs | 37 ++++++++----------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs index 6da69bb7ebe630..630170df36d2a1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers.Binary; @@ -120,7 +120,7 @@ public static bool Contains(ref short searchSpace, short value, int length) { Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue, packedSource); if (result != Vector256.Zero) @@ -144,7 +144,7 @@ public static bool Contains(ref short searchSpace, short value, int length) Vector256 source0 = Vector256.LoadUnsafe(ref firstVector); Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue, packedSource); if (result != Vector256.Zero) @@ -257,7 +257,7 @@ private static int IndexOf(ref short searchSpace, short value, int len { Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue, packedSource); result = NegateIfNeeded(result); @@ -282,7 +282,7 @@ private static int IndexOf(ref short searchSpace, short value, int len Vector256 source0 = Vector256.LoadUnsafe(ref firstVector); Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue, packedSource); result = NegateIfNeeded(result); @@ -406,7 +406,7 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho { Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource); result = NegateIfNeeded(result); @@ -431,7 +431,7 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho Vector256 source0 = Vector256.LoadUnsafe(ref firstVector); Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource); result = NegateIfNeeded(result); @@ -558,7 +558,7 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho { Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource) | Vector256.Equals(packedValue2, packedSource); result = NegateIfNeeded(result); @@ -583,7 +583,7 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho Vector256 source0 = Vector256.LoadUnsafe(ref firstVector); Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.Equals(packedValue0, packedSource) | Vector256.Equals(packedValue1, packedSource) | Vector256.Equals(packedValue2, packedSource); result = NegateIfNeeded(result); @@ -692,7 +692,7 @@ private static int IndexOfAnyInRange(ref short searchSpace, short lowI { Vector256 source0 = Vector256.LoadUnsafe(ref currentSearchSpace); Vector256 source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256.Count); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.LessThanOrEqual(packedSource - lowVector, rangeVector); result = NegateIfNeeded(result); @@ -717,7 +717,7 @@ private static int IndexOfAnyInRange(ref short searchSpace, short lowI Vector256 source0 = Vector256.LoadUnsafe(ref firstVector); Vector256 source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd); - Vector256 packedSource = PackSources(source0, source1); + Vector256 packedSource = Avx2.PackUnsignedSaturate(source0, source1).AsByte(); Vector256 result = Vector256.LessThanOrEqual(packedSource - lowVector, rangeVector); result = NegateIfNeeded(result); @@ -784,16 +784,11 @@ private static int IndexOfAnyInRange(ref short searchSpace, short lowI return -1; } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 PackSources(Vector256 source0, Vector256 source1) - { - Debug.Assert(Avx2.IsSupported); - // Pack two vectors of characters into bytes. While the type is Vector256, these are really UInt16 characters. - // X86: Downcast every character using saturation. - // - Values <= 32767 result in min(value, 255). - // - Values > 32767 result in 0. Because of this we can't accept needles that contain 0. - return Avx2.PackUnsignedSaturate(source0, source1).AsByte(); - } + // Note: Avx2.PackUnsignedSaturate can't be extracted into a helper function that assumes + // Avx2 support, because this would violate rules for System.Private.CoreLib intrinsics use. + // With R2R and tiered compilation, it is possible for the helper to be prejitted without Avx2 support, + // but called from a rejitted caller that goes down the Avx2 path. + // Sse2 is always supported in crossgen, so can be extracted to a helper. [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128 PackSources(Vector128 source0, Vector128 source1) From ef9e53b1399b0e363db5bcdd704ef38057812365 Mon Sep 17 00:00:00 2001 From: Sven Boemer Date: Mon, 24 Apr 2023 20:12:47 +0000 Subject: [PATCH 5/5] Inline more Avx2 helpers --- .../IndexOfAnyAsciiSearcher.cs | 187 +++++++++--------- .../src/System/SpanHelpers.Packed.cs | 99 ++++++---- 2 files changed, 149 insertions(+), 137 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/IndexOfAnyValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/IndexOfAnyValues/IndexOfAnyAsciiSearcher.cs index c39639d24f8264..92b588d06f4942 100644 --- a/src/libraries/System.Private.CoreLib/src/System/IndexOfAnyValues/IndexOfAnyAsciiSearcher.cs +++ b/src/libraries/System.Private.CoreLib/src/System/IndexOfAnyValues/IndexOfAnyAsciiSearcher.cs @@ -195,7 +195,12 @@ internal static int IndexOfAnyVectorized(ref short sea Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { - return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result); + result = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte(); + + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = BitOperations.TrailingZeroCount(mask); + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(short)); } currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count); @@ -219,7 +224,18 @@ internal static int IndexOfAnyVectorized(ref short sea Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { - return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); + result = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte(); + + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = BitOperations.TrailingZeroCount(mask); + if (offsetInVector >= Vector256.Count) + { + // We matched within the second vector + firstVector = ref oneVectorAwayFromEnd; + offsetInVector -= Vector256.Count; + } + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / (nuint)sizeof(short)); } } @@ -307,7 +323,12 @@ internal static int LastIndexOfAnyVectorized(ref short Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { - return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); + result = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte(); + + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(short)); } } while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref twoVectorsAfterStart)); @@ -329,7 +350,18 @@ internal static int LastIndexOfAnyVectorized(ref short Vector256 result = IndexOfAnyLookup(source0, source1, bitmap256); if (result != Vector256.Zero) { - return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); + result = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte(); + + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); + if (offsetInVector < Vector256.Count) + { + return offsetInVector; + } + + // We matched within the second vector + return offsetInVector - Vector256.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(short)); } } @@ -411,7 +443,10 @@ internal static int IndexOfAnyVectorized(ref byte searchSpace, int sea Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { - return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = BitOperations.TrailingZeroCount(mask); + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(byte)); } currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector256.Count); @@ -436,7 +471,16 @@ internal static int IndexOfAnyVectorized(ref byte searchSpace, int sea Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { - return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = BitOperations.TrailingZeroCount(mask); + if (offsetInVector >= Vector256.Count) + { + // We matched within the second vector + firstVector = ref halfVectorAwayFromEnd; + offsetInVector -= Vector256.Count; + } + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / (nuint)sizeof(byte)); } } @@ -518,7 +562,10 @@ internal static int LastIndexOfAnyVectorized(ref byte searchSpace, int Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { - return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(byte)); } } while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart)); @@ -541,7 +588,16 @@ internal static int LastIndexOfAnyVectorized(ref byte searchSpace, int Vector256 result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256)); if (result != Vector256.Zero) { - return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); + if (offsetInVector < Vector256.Count) + { + return offsetInVector; + } + + // We matched within the second vector + return offsetInVector - Vector256.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(byte)); } } @@ -622,7 +678,10 @@ internal static int IndexOfAnyVectorized(ref byte searchSpace, int sea Vector256 result = IndexOfAnyLookup(source, bitmap256_0, bitmap256_1); if (result != Vector256.Zero) { - return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = BitOperations.TrailingZeroCount(mask); + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(byte)); } currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector256.Count); @@ -647,7 +706,16 @@ internal static int IndexOfAnyVectorized(ref byte searchSpace, int sea Vector256 result = IndexOfAnyLookup(source, bitmap256_0, bitmap256_1); if (result != Vector256.Zero) { - return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = BitOperations.TrailingZeroCount(mask); + if (offsetInVector >= Vector256.Count) + { + // We matched within the second vector + firstVector = ref halfVectorAwayFromEnd; + offsetInVector -= Vector256.Count; + } + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / (nuint)sizeof(byte)); } } @@ -730,7 +798,10 @@ internal static int LastIndexOfAnyVectorized(ref byte searchSpace, int Vector256 result = IndexOfAnyLookup(source, bitmap256_0, bitmap256_1); if (result != Vector256.Zero) { - return ComputeLastIndex(ref searchSpace, ref currentSearchSpace, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / (nuint)sizeof(byte)); } } while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart)); @@ -753,7 +824,16 @@ internal static int LastIndexOfAnyVectorized(ref byte searchSpace, int Vector256 result = IndexOfAnyLookup(source, bitmap256_0, bitmap256_1); if (result != Vector256.Zero) { - return ComputeLastIndexOverlapped(ref searchSpace, ref secondVector, result); + uint mask = TNegator.ExtractMask(result); + + int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); + if (offsetInVector < Vector256.Count) + { + return offsetInVector; + } + + // We matched within the second vector + return offsetInVector - Vector256.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(byte)); } } @@ -992,89 +1072,6 @@ private static unsafe int ComputeLastIndexOverlapped(ref T searchSp return offsetInVector - Vector128.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(T)); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe int ComputeFirstIndex(ref T searchSpace, ref T current, Vector256 result) - where TNegator : struct, INegator - { - if (typeof(T) == typeof(short)) - { - result = FixUpPackedVector256Result(result); - } - - uint mask = TNegator.ExtractMask(result); - - int offsetInVector = BitOperations.TrailingZeroCount(mask); - return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe int ComputeFirstIndexOverlapped(ref T searchSpace, ref T current0, ref T current1, Vector256 result) - where TNegator : struct, INegator - { - if (typeof(T) == typeof(short)) - { - result = FixUpPackedVector256Result(result); - } - - uint mask = TNegator.ExtractMask(result); - - int offsetInVector = BitOperations.TrailingZeroCount(mask); - if (offsetInVector >= Vector256.Count) - { - // We matched within the second vector - current0 = ref current1; - offsetInVector -= Vector256.Count; - } - return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current0) / (nuint)sizeof(T)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe int ComputeLastIndex(ref T searchSpace, ref T current, Vector256 result) - where TNegator : struct, INegator - { - if (typeof(T) == typeof(short)) - { - result = FixUpPackedVector256Result(result); - } - - uint mask = TNegator.ExtractMask(result); - - int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); - return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe int ComputeLastIndexOverlapped(ref T searchSpace, ref T secondVector, Vector256 result) - where TNegator : struct, INegator - { - if (typeof(T) == typeof(short)) - { - result = FixUpPackedVector256Result(result); - } - - uint mask = TNegator.ExtractMask(result); - - int offsetInVector = 31 - BitOperations.LeadingZeroCount(mask); - if (offsetInVector < Vector256.Count) - { - return offsetInVector; - } - - // We matched within the second vector - return offsetInVector - Vector256.Count + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref secondVector) / (nuint)sizeof(T)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 FixUpPackedVector256Result(Vector256 result) - { - Debug.Assert(Avx2.IsSupported); - // Avx2.PackUnsignedSaturate(Vector256.Create((short)1), Vector256.Create((short)2)) will result in - // 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 - // We want to swap the X and Y bits - // 1, 1, 1, 1, 1, 1, 1, 1, X, X, X, X, X, X, X, X, Y, Y, Y, Y, Y, Y, Y, Y, 2, 2, 2, 2, 2, 2, 2, 2 - return Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte(); - } - internal interface INegator { static abstract bool NegateIfNeeded(bool result); diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs index 630170df36d2a1..fa1fec132b12e9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Packed.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers.Binary; @@ -263,7 +263,10 @@ private static int IndexOf(ref short searchSpace, short value, int len if (result != Vector256.Zero) { - return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int index = BitOperations.TrailingZeroCount(notEqualsElements); + return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / sizeof(short)); } currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count); @@ -288,7 +291,16 @@ private static int IndexOf(ref short searchSpace, short value, int len if (result != Vector256.Zero) { - return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements); + if (offsetInVector >= Vector256.Count) + { + // We matched within the second vector + firstVector = ref oneVectorAwayFromEnd; + offsetInVector -= Vector256.Count; + } + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / sizeof(short)); } } } @@ -412,7 +424,10 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho if (result != Vector256.Zero) { - return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int index = BitOperations.TrailingZeroCount(notEqualsElements); + return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / sizeof(short)); } currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count); @@ -437,7 +452,16 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho if (result != Vector256.Zero) { - return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements); + if (offsetInVector >= Vector256.Count) + { + // We matched within the second vector + firstVector = ref oneVectorAwayFromEnd; + offsetInVector -= Vector256.Count; + } + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / sizeof(short)); } } } @@ -564,7 +588,10 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho if (result != Vector256.Zero) { - return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int index = BitOperations.TrailingZeroCount(notEqualsElements); + return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / sizeof(short)); } currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count); @@ -589,7 +616,16 @@ private static int IndexOfAny(ref short searchSpace, short value0, sho if (result != Vector256.Zero) { - return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements); + if (offsetInVector >= Vector256.Count) + { + // We matched within the second vector + firstVector = ref oneVectorAwayFromEnd; + offsetInVector -= Vector256.Count; + } + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / sizeof(short)); } } } @@ -698,7 +734,10 @@ private static int IndexOfAnyInRange(ref short searchSpace, short lowI if (result != Vector256.Zero) { - return ComputeFirstIndex(ref searchSpace, ref currentSearchSpace, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int index = BitOperations.TrailingZeroCount(notEqualsElements); + return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref currentSearchSpace) / sizeof(short)); } currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256.Count); @@ -723,7 +762,16 @@ private static int IndexOfAnyInRange(ref short searchSpace, short lowI if (result != Vector256.Zero) { - return ComputeFirstIndexOverlapped(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result); + uint notEqualsElements = Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte().ExtractMostSignificantBits(); + + int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements); + if (offsetInVector >= Vector256.Count) + { + // We matched within the second vector + firstVector = ref oneVectorAwayFromEnd; + offsetInVector -= Vector256.Count; + } + return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref firstVector) / sizeof(short)); } } } @@ -819,14 +867,6 @@ private static int ComputeFirstIndex(ref short searchSpace, ref short current, V return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / sizeof(short)); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int ComputeFirstIndex(ref short searchSpace, ref short current, Vector256 equals) - { - uint notEqualsElements = FixUpPackedVector256Result(equals).ExtractMostSignificantBits(); - int index = BitOperations.TrailingZeroCount(notEqualsElements); - return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / sizeof(short)); - } - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int ComputeFirstIndexOverlapped(ref short searchSpace, ref short current0, ref short current1, Vector128 equals) { @@ -840,30 +880,5 @@ private static int ComputeFirstIndexOverlapped(ref short searchSpace, ref short } return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current0) / sizeof(short)); } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int ComputeFirstIndexOverlapped(ref short searchSpace, ref short current0, ref short current1, Vector256 equals) - { - uint notEqualsElements = FixUpPackedVector256Result(equals).ExtractMostSignificantBits(); - int offsetInVector = BitOperations.TrailingZeroCount(notEqualsElements); - if (offsetInVector >= Vector256.Count) - { - // We matched within the second vector - current0 = ref current1; - offsetInVector -= Vector256.Count; - } - return offsetInVector + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current0) / sizeof(short)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 FixUpPackedVector256Result(Vector256 result) - { - Debug.Assert(Avx2.IsSupported); - // Avx2.PackUnsignedSaturate(Vector256.Create((short)1), Vector256.Create((short)2)) will result in - // 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 - // We want to swap the X and Y bits - // 1, 1, 1, 1, 1, 1, 1, 1, X, X, X, X, X, X, X, X, Y, Y, Y, Y, Y, Y, Y, Y, 2, 2, 2, 2, 2, 2, 2, 2 - return Avx2.Permute4x64(result.AsInt64(), 0b_11_01_10_00).AsByte(); - } } }