From ff0f0070b5d7fc2246cf0c5fb90be4ec4c87e7d0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 16:31:07 +0000 Subject: [PATCH 1/6] Replace ExtractMostSignificantBits+BitOp patterns with Vector helpers Replace patterns of ExtractMostSignificantBits() followed by PopCount/TrailingZeroCount/LeadingZeroCount with the optimized Vector helpers: CountWhereAllBitsSet, IndexOfWhereAllBitsSet, and LastIndexOfWhereAllBitsSet. Remove AdvSimd special paths from Vector64/Vector128 internal helpers (CountMatches, IndexOfFirstMatch, IndexOfLastMatch) and the now-unused AdvSimdExtractBitMask/AdvSimdFixupBitCount methods. Agent-Logs-Url: https://github.com/dotnet/runtime/sessions/fcd7f0bb-7e64-41fa-8773-089705b9a737 Co-authored-by: EgorBo <523221+EgorBo@users.noreply.github.com> --- .../TensorPrimitives.HammingDistance.cs | 12 +-- .../netcore/TensorPrimitives.IndexOfMax.cs | 6 +- .../System/Runtime/Intrinsics/Vector128.cs | 83 +----------------- .../src/System/Runtime/Intrinsics/Vector64.cs | 84 +------------------ .../src/System/SpanHelpers.T.cs | 6 +- 5 files changed, 20 insertions(+), 171 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.HammingDistance.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.HammingDistance.cs index c38b60e5f0ac3b..c9dd58c9af5144 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.HammingDistance.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.HammingDistance.cs @@ -88,7 +88,7 @@ private static int CountUnequalElements(ReadOnlySpan x, ReadOnlySpan y) Vector512 xVec = Vector512.LoadUnsafe(ref xRef, (uint)i); Vector512 yVec = Vector512.LoadUnsafe(ref yRef, (uint)i); - count += BitOperations.PopCount((~Vector512.Equals(xVec, yVec)).ExtractMostSignificantBits()); + count += Vector512.CountWhereAllBitsSet(~Vector512.Equals(xVec, yVec)); i += Vector512.Count; } @@ -104,7 +104,7 @@ private static int CountUnequalElements(ReadOnlySpan x, ReadOnlySpan y) xVec &= remainderMask; yVec &= remainderMask; - count += BitOperations.PopCount((~Vector512.Equals(xVec, yVec)).ExtractMostSignificantBits()); + count += Vector512.CountWhereAllBitsSet(~Vector512.Equals(xVec, yVec)); } } else @@ -120,7 +120,7 @@ private static int CountUnequalElements(ReadOnlySpan x, ReadOnlySpan y) Vector256 xVec = Vector256.LoadUnsafe(ref xRef, (uint)i); Vector256 yVec = Vector256.LoadUnsafe(ref yRef, (uint)i); - count += BitOperations.PopCount((~Vector256.Equals(xVec, yVec)).ExtractMostSignificantBits()); + count += Vector256.CountWhereAllBitsSet(~Vector256.Equals(xVec, yVec)); i += Vector256.Count; } @@ -136,7 +136,7 @@ private static int CountUnequalElements(ReadOnlySpan x, ReadOnlySpan y) xVec &= remainderMask; yVec &= remainderMask; - count += BitOperations.PopCount((~Vector256.Equals(xVec, yVec)).ExtractMostSignificantBits()); + count += Vector256.CountWhereAllBitsSet(~Vector256.Equals(xVec, yVec)); } } } @@ -153,7 +153,7 @@ private static int CountUnequalElements(ReadOnlySpan x, ReadOnlySpan y) Vector128 xVec = Vector128.LoadUnsafe(ref xRef, (uint)i); Vector128 yVec = Vector128.LoadUnsafe(ref yRef, (uint)i); - count += BitOperations.PopCount((~Vector128.Equals(xVec, yVec)).ExtractMostSignificantBits()); + count += Vector128.CountWhereAllBitsSet(~Vector128.Equals(xVec, yVec)); i += Vector128.Count; } @@ -169,7 +169,7 @@ private static int CountUnequalElements(ReadOnlySpan x, ReadOnlySpan y) xVec &= remainderMask; yVec &= remainderMask; - count += BitOperations.PopCount((~Vector128.Equals(xVec, yVec)).ExtractMostSignificantBits()); + count += Vector128.CountWhereAllBitsSet(~Vector128.Equals(xVec, yVec)); } } } diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs index f40f7e1e2e2ba0..4a8778cea5b141 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs @@ -420,13 +420,13 @@ static Vector128 CreateVector128T(int i) => } private static int IndexOfFirstMatch(Vector128 mask) => - BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits()); + Vector128.IndexOfWhereAllBitsSet(mask); private static int IndexOfFirstMatch(Vector256 mask) => - BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits()); + Vector256.IndexOfWhereAllBitsSet(mask); private static int IndexOfFirstMatch(Vector512 mask) => - BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits()); + Vector512.IndexOfWhereAllBitsSet(mask); [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe Vector256 IndexLessThan(Vector256 indices1, Vector256 indices2) => diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index 1cebc6f55a6b81..626e5d8cebc216 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -4502,64 +4502,7 @@ public static Vector128 WithUpper(this Vector128 vector, Vector64 va public static Vector128 Xor(Vector128 left, Vector128 right) => left ^ right; [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(AdvSimd))] - internal static ulong AdvSimdExtractBitMask(Vector128 vector) - { - if (!AdvSimd.IsSupported) - { - ThrowHelper.ThrowNotSupportedException(); - } - - // This expects vector to have each element be one of Zero or AllBitsSet - // and will not produce correct results otherwise. - // - // Given this, we can treat it as ushort and do a logical-right-shift by 4 to - // compact the mask into half the space, giving us the following possibilities for - // each pair of bytes: - // * 0x00_00 - 0x00 - // * 0x00_FF - 0x0F - // * 0xFF_00 - 0xF0 - // * 0xFF_FF - 0xFF - // - // This allows us to extract the full metadata as a 64-bit scalar which can then - // be consumed by bit-counting APIs, such as PopCount, LeadingZeroCount, or TrailingZeroCount, - // and then adjusted by AdvSimdFixupBitCount to get the actual count of elements - // that were masked. - - return AdvSimd.ShiftRightLogicalNarrowingLower(vector.AsUInt16(), 4).AsUInt64().ToScalar(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(AdvSimd))] - internal static int AdvSimdFixupBitCount(int bitCount) - { - if (!AdvSimd.IsSupported) - { - ThrowHelper.ThrowNotSupportedException(); - } - - // This API is meant to be consumed alongside AdvSimdExtractBitMask and will - // not produce correct results for arbitrary inputs. It adjusts the bit count - // assuming that sequences of 1 or 0 were in groups of 4 bits per byte. - - unsafe - { - return bitCount >>> (2 + int.Log2(sizeof(T))); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int CountMatches(Vector128 vector) - { - if (AdvSimd.IsSupported) - { - return AdvSimdFixupBitCount(BitOperations.PopCount(AdvSimdExtractBitMask(vector))); - } - else - { - return BitOperations.PopCount(vector.ExtractMostSignificantBits()); - } - } + internal static int CountMatches(Vector128 vector) => BitOperations.PopCount(vector.ExtractMostSignificantBits()); [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static T GetElementUnsafe(in this Vector128 vector, int index) @@ -4572,30 +4515,12 @@ internal static T GetElementUnsafe(in this Vector128 vector, int index) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfFirstMatch(Vector128 vector) { - if (AdvSimd.IsSupported) - { - int result = AdvSimdFixupBitCount(BitOperations.TrailingZeroCount(AdvSimdExtractBitMask(vector))); - return (result != Vector128.Count) ? result : -1; - } - else - { - int result = BitOperations.TrailingZeroCount(vector.ExtractMostSignificantBits()); - return (result != 32) ? result : -1; - } + int result = BitOperations.TrailingZeroCount(vector.ExtractMostSignificantBits()); + return (result != 32) ? result : -1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int IndexOfLastMatch(Vector128 vector) - { - if (AdvSimd.IsSupported) - { - return (Vector128.Count - 1) - AdvSimdFixupBitCount(BitOperations.LeadingZeroCount(AdvSimdExtractBitMask(vector))); - } - else - { - return 31 - BitOperations.LeadingZeroCount(vector.ExtractMostSignificantBits()); - } - } + internal static int IndexOfLastMatch(Vector128 vector) => 31 - BitOperations.LeadingZeroCount(vector.ExtractMostSignificantBits()); [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static void SetElementUnsafe(in this Vector128 vector, int index, T value) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 8655d9778f0529..1cccde196ab01b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -6,7 +6,6 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using System.Runtime.Intrinsics.Arm; namespace System.Runtime.Intrinsics { @@ -4398,64 +4397,7 @@ public static Vector64 WithElement(this Vector64 vector, int index, T v public static Vector64 Xor(Vector64 left, Vector64 right) => left ^ right; [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(AdvSimd))] - internal static uint AdvSimdExtractBitMask(Vector64 vector) - { - if (!AdvSimd.IsSupported) - { - ThrowHelper.ThrowNotSupportedException(); - } - - // This expects vector to have each element be one of Zero or AllBitsSet - // and will not produce correct results otherwise. - // - // Given this, we can treat it as ushort and do a logical-right-shift by 4 to - // compact the mask into half the space, giving us the following possibilities for - // each pair of bytes: - // * 0x00_00 - 0x00 - // * 0x00_FF - 0x0F - // * 0xFF_00 - 0xF0 - // * 0xFF_FF - 0xFF - // - // This allows us to extract the full metadata as a 32-bit scalar which can then - // be consumed by bit-counting APIs, such as PopCount, LeadingZeroCount, or TrailingZeroCount, - // and then adjusted by AdvSimdFixupBitCount to get the actual count of elements - // that were masked. - - return AdvSimd.ShiftRightLogicalNarrowingLower(vector.ToVector128().AsUInt16(), 4).AsUInt32().ToScalar(); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(AdvSimd))] - internal static int AdvSimdFixupBitCount(int bitCount) - { - if (!AdvSimd.IsSupported) - { - ThrowHelper.ThrowNotSupportedException(); - } - - // This API is meant to be consumed alongside AdvSimdExtractBitMask and will - // not produce correct results for arbitrary inputs. It adjusts the bit count - // assuming that sequences of 1 or 0 were in groups of 4 bits per byte. - - unsafe - { - return bitCount >>> (2 + int.Log2(sizeof(T))); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int CountMatches(Vector64 vector) - { - if (AdvSimd.IsSupported) - { - return AdvSimdFixupBitCount(BitOperations.PopCount(AdvSimdExtractBitMask(vector))); - } - else - { - return BitOperations.PopCount(vector.ExtractMostSignificantBits()); - } - } + internal static int CountMatches(Vector64 vector) => BitOperations.PopCount(vector.ExtractMostSignificantBits()); [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static T GetElementUnsafe(in this Vector64 vector, int index) @@ -4468,30 +4410,12 @@ internal static T GetElementUnsafe(in this Vector64 vector, int index) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfFirstMatch(Vector64 vector) { - if (AdvSimd.IsSupported) - { - int result = AdvSimdFixupBitCount(BitOperations.TrailingZeroCount(AdvSimdExtractBitMask(vector))); - return (result != Vector64.Count) ? result : -1; - } - else - { - int result = BitOperations.TrailingZeroCount(vector.ExtractMostSignificantBits()); - return (result != 32) ? result : -1; - } + int result = BitOperations.TrailingZeroCount(vector.ExtractMostSignificantBits()); + return (result != 32) ? result : -1; } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int IndexOfLastMatch(Vector64 vector) - { - if (AdvSimd.IsSupported) - { - return (Vector64.Count - 1) - AdvSimdFixupBitCount(BitOperations.LeadingZeroCount(AdvSimdExtractBitMask(vector))); - } - else - { - return 31 - BitOperations.LeadingZeroCount(vector.ExtractMostSignificantBits()); - } - } + internal static int IndexOfLastMatch(Vector64 vector) => 31 - BitOperations.LeadingZeroCount(vector.ExtractMostSignificantBits()); [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static void SetElementUnsafe(in this Vector64 vector, int index, T value) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index 8e53a21801ec29..12278446319180 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -4176,7 +4176,7 @@ public static unsafe int CountValueType(ref T current, T value, int length) w ref T oneVectorAwayFromEnd = ref Unsafe.Subtract(ref end, Vector512.Count); while (Unsafe.IsAddressLessThan(ref current, ref oneVectorAwayFromEnd)) { - count += BitOperations.PopCount(Vector512.Equals(Vector512.LoadUnsafe(ref current), targetVector).ExtractMostSignificantBits()); + count += Vector512.CountWhereAllBitsSet(Vector512.Equals(Vector512.LoadUnsafe(ref current), targetVector)); current = ref Unsafe.Add(ref current, Vector512.Count); } @@ -4191,7 +4191,7 @@ public static unsafe int CountValueType(ref T current, T value, int length) w ref T oneVectorAwayFromEnd = ref Unsafe.Subtract(ref end, Vector256.Count); while (Unsafe.IsAddressLessThan(ref current, ref oneVectorAwayFromEnd)) { - count += BitOperations.PopCount(Vector256.Equals(Vector256.LoadUnsafe(ref current), targetVector).ExtractMostSignificantBits()); + count += Vector256.CountWhereAllBitsSet(Vector256.Equals(Vector256.LoadUnsafe(ref current), targetVector)); current = ref Unsafe.Add(ref current, Vector256.Count); } @@ -4206,7 +4206,7 @@ public static unsafe int CountValueType(ref T current, T value, int length) w ref T oneVectorAwayFromEnd = ref Unsafe.Subtract(ref end, Vector128.Count); while (Unsafe.IsAddressLessThan(ref current, ref oneVectorAwayFromEnd)) { - count += BitOperations.PopCount(Vector128.Equals(Vector128.LoadUnsafe(ref current), targetVector).ExtractMostSignificantBits()); + count += Vector128.CountWhereAllBitsSet(Vector128.Equals(Vector128.LoadUnsafe(ref current), targetVector)); current = ref Unsafe.Add(ref current, Vector128.Count); } From 31f94e327960b9f7dc1f26b51820baa1f00083e5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 17:34:54 +0000 Subject: [PATCH 2/6] Remove IndexOfFirstMatch wrappers; inline IndexOfWhereAllBitsSet directly Agent-Logs-Url: https://github.com/dotnet/runtime/sessions/fd3915a5-bbf3-4530-8406-f3983be66ffb Co-authored-by: tannergooding <10487869+tannergooding@users.noreply.github.com> --- .../netcore/TensorPrimitives.IndexOfMax.cs | 25 +++++++------------ .../Tensors/netcore/TensorPrimitives.Max.cs | 18 ++++++------- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs index 4a8778cea5b141..884109a8e2057c 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs @@ -176,7 +176,7 @@ static Vector512 CreateVector512T(int i) => nanMask = ~Vector512.Equals(result, result); if (nanMask != Vector512.Zero) { - return IndexOfFirstMatch(nanMask); + return Vector512.IndexOfWhereAllBitsSet(nanMask); } } @@ -195,7 +195,7 @@ static Vector512 CreateVector512T(int i) => nanMask = ~Vector512.Equals(current, current); if (nanMask != Vector512.Zero) { - return i + IndexOfFirstMatch(nanMask); + return i + Vector512.IndexOfWhereAllBitsSet(nanMask); } } @@ -215,7 +215,7 @@ static Vector512 CreateVector512T(int i) => nanMask = ~Vector512.Equals(current, current); if (nanMask != Vector512.Zero) { - int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask); + int indexInVectorOfFirstMatch = Vector512.IndexOfWhereAllBitsSet(nanMask); return typeof(T) == typeof(double) ? (int)(long)(object)currentIndex.As()[indexInVectorOfFirstMatch] : (int)(object)currentIndex.As()[indexInVectorOfFirstMatch]; @@ -260,7 +260,7 @@ static Vector256 CreateVector256T(int i) => nanMask = ~Vector256.Equals(result, result); if (nanMask != Vector256.Zero) { - return IndexOfFirstMatch(nanMask); + return Vector256.IndexOfWhereAllBitsSet(nanMask); } } @@ -279,7 +279,7 @@ static Vector256 CreateVector256T(int i) => nanMask = ~Vector256.Equals(current, current); if (nanMask != Vector256.Zero) { - return i + IndexOfFirstMatch(nanMask); + return i + Vector256.IndexOfWhereAllBitsSet(nanMask); } } @@ -299,7 +299,7 @@ static Vector256 CreateVector256T(int i) => nanMask = ~Vector256.Equals(current, current); if (nanMask != Vector256.Zero) { - int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask); + int indexInVectorOfFirstMatch = Vector256.IndexOfWhereAllBitsSet(nanMask); return typeof(T) == typeof(double) ? (int)(long)(object)currentIndex.As()[indexInVectorOfFirstMatch] : (int)(object)currentIndex.As()[indexInVectorOfFirstMatch]; @@ -344,7 +344,7 @@ static Vector128 CreateVector128T(int i) => nanMask = ~Vector128.Equals(result, result); if (nanMask != Vector128.Zero) { - return IndexOfFirstMatch(nanMask); + return Vector128.IndexOfWhereAllBitsSet(nanMask); } } @@ -363,7 +363,7 @@ static Vector128 CreateVector128T(int i) => nanMask = ~Vector128.Equals(current, current); if (nanMask != Vector128.Zero) { - return i + IndexOfFirstMatch(nanMask); + return i + Vector128.IndexOfWhereAllBitsSet(nanMask); } } @@ -383,7 +383,7 @@ static Vector128 CreateVector128T(int i) => nanMask = ~Vector128.Equals(current, current); if (nanMask != Vector128.Zero) { - int indexInVectorOfFirstMatch = IndexOfFirstMatch(nanMask); + int indexInVectorOfFirstMatch = Vector128.IndexOfWhereAllBitsSet(nanMask); return typeof(T) == typeof(double) ? (int)(long)(object)currentIndex.As()[indexInVectorOfFirstMatch] : (int)(object)currentIndex.As()[indexInVectorOfFirstMatch]; @@ -419,14 +419,7 @@ static Vector128 CreateVector128T(int i) => return curIn; } - private static int IndexOfFirstMatch(Vector128 mask) => - Vector128.IndexOfWhereAllBitsSet(mask); - private static int IndexOfFirstMatch(Vector256 mask) => - Vector256.IndexOfWhereAllBitsSet(mask); - - private static int IndexOfFirstMatch(Vector512 mask) => - Vector512.IndexOfWhereAllBitsSet(mask); [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe Vector256 IndexLessThan(Vector256 indices1, Vector256 indices2) => diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Max.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Max.cs index 8e42381c48e165..2f16e8b6b8d4f2 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Max.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Max.cs @@ -258,7 +258,7 @@ private static T MinMaxCore(ReadOnlySpan x) nanMask = IsNaN(result); if (nanMask != Vector512.Zero) { - return result.GetElement(IndexOfFirstMatch(nanMask)); + return result.GetElement(Vector512.IndexOfWhereAllBitsSet(nanMask)); } } @@ -277,7 +277,7 @@ private static T MinMaxCore(ReadOnlySpan x) nanMask = ~Vector512.Equals(current, current); if (nanMask != Vector512.Zero) { - return current.GetElement(IndexOfFirstMatch(nanMask)); + return current.GetElement(Vector512.IndexOfWhereAllBitsSet(nanMask)); } } @@ -296,7 +296,7 @@ private static T MinMaxCore(ReadOnlySpan x) nanMask = ~Vector512.Equals(current, current); if (nanMask != Vector512.Zero) { - return current.GetElement(IndexOfFirstMatch(nanMask)); + return current.GetElement(Vector512.IndexOfWhereAllBitsSet(nanMask)); } } @@ -323,7 +323,7 @@ private static T MinMaxCore(ReadOnlySpan x) nanMask = ~Vector256.Equals(result, result); if (nanMask != Vector256.Zero) { - return result.GetElement(IndexOfFirstMatch(nanMask)); + return result.GetElement(Vector256.IndexOfWhereAllBitsSet(nanMask)); } } @@ -342,7 +342,7 @@ private static T MinMaxCore(ReadOnlySpan x) nanMask = ~Vector256.Equals(current, current); if (nanMask != Vector256.Zero) { - return current.GetElement(IndexOfFirstMatch(nanMask)); + return current.GetElement(Vector256.IndexOfWhereAllBitsSet(nanMask)); } } @@ -362,7 +362,7 @@ private static T MinMaxCore(ReadOnlySpan x) nanMask = ~Vector256.Equals(current, current); if (nanMask != Vector256.Zero) { - return current.GetElement(IndexOfFirstMatch(nanMask)); + return current.GetElement(Vector256.IndexOfWhereAllBitsSet(nanMask)); } } @@ -389,7 +389,7 @@ private static T MinMaxCore(ReadOnlySpan x) nanMask = IsNaN(result); if (nanMask != Vector128.Zero) { - return result.GetElement(IndexOfFirstMatch(nanMask)); + return result.GetElement(Vector128.IndexOfWhereAllBitsSet(nanMask)); } } @@ -408,7 +408,7 @@ private static T MinMaxCore(ReadOnlySpan x) nanMask = IsNaN(current); if (nanMask != Vector128.Zero) { - return current.GetElement(IndexOfFirstMatch(nanMask)); + return current.GetElement(Vector128.IndexOfWhereAllBitsSet(nanMask)); } } @@ -427,7 +427,7 @@ private static T MinMaxCore(ReadOnlySpan x) nanMask = IsNaN(current); if (nanMask != Vector128.Zero) { - return current.GetElement(IndexOfFirstMatch(nanMask)); + return current.GetElement(Vector128.IndexOfWhereAllBitsSet(nanMask)); } } From 499c77ec2f03d6af75198cfe5c12777680caf8d9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 18:02:53 +0000 Subject: [PATCH 3/6] Revert Vector128.cs, Vector64.cs, and HammingDistance.cs changes Agent-Logs-Url: https://github.com/dotnet/runtime/sessions/527f6141-5f1f-41b3-bb40-336677a9c787 Co-authored-by: tannergooding <10487869+tannergooding@users.noreply.github.com> --- .../TensorPrimitives.HammingDistance.cs | 12 +-- .../System/Runtime/Intrinsics/Vector128.cs | 83 +++++++++++++++++- .../src/System/Runtime/Intrinsics/Vector64.cs | 84 ++++++++++++++++++- 3 files changed, 165 insertions(+), 14 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.HammingDistance.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.HammingDistance.cs index c9dd58c9af5144..c38b60e5f0ac3b 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.HammingDistance.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.HammingDistance.cs @@ -88,7 +88,7 @@ private static int CountUnequalElements(ReadOnlySpan x, ReadOnlySpan y) Vector512 xVec = Vector512.LoadUnsafe(ref xRef, (uint)i); Vector512 yVec = Vector512.LoadUnsafe(ref yRef, (uint)i); - count += Vector512.CountWhereAllBitsSet(~Vector512.Equals(xVec, yVec)); + count += BitOperations.PopCount((~Vector512.Equals(xVec, yVec)).ExtractMostSignificantBits()); i += Vector512.Count; } @@ -104,7 +104,7 @@ private static int CountUnequalElements(ReadOnlySpan x, ReadOnlySpan y) xVec &= remainderMask; yVec &= remainderMask; - count += Vector512.CountWhereAllBitsSet(~Vector512.Equals(xVec, yVec)); + count += BitOperations.PopCount((~Vector512.Equals(xVec, yVec)).ExtractMostSignificantBits()); } } else @@ -120,7 +120,7 @@ private static int CountUnequalElements(ReadOnlySpan x, ReadOnlySpan y) Vector256 xVec = Vector256.LoadUnsafe(ref xRef, (uint)i); Vector256 yVec = Vector256.LoadUnsafe(ref yRef, (uint)i); - count += Vector256.CountWhereAllBitsSet(~Vector256.Equals(xVec, yVec)); + count += BitOperations.PopCount((~Vector256.Equals(xVec, yVec)).ExtractMostSignificantBits()); i += Vector256.Count; } @@ -136,7 +136,7 @@ private static int CountUnequalElements(ReadOnlySpan x, ReadOnlySpan y) xVec &= remainderMask; yVec &= remainderMask; - count += Vector256.CountWhereAllBitsSet(~Vector256.Equals(xVec, yVec)); + count += BitOperations.PopCount((~Vector256.Equals(xVec, yVec)).ExtractMostSignificantBits()); } } } @@ -153,7 +153,7 @@ private static int CountUnequalElements(ReadOnlySpan x, ReadOnlySpan y) Vector128 xVec = Vector128.LoadUnsafe(ref xRef, (uint)i); Vector128 yVec = Vector128.LoadUnsafe(ref yRef, (uint)i); - count += Vector128.CountWhereAllBitsSet(~Vector128.Equals(xVec, yVec)); + count += BitOperations.PopCount((~Vector128.Equals(xVec, yVec)).ExtractMostSignificantBits()); i += Vector128.Count; } @@ -169,7 +169,7 @@ private static int CountUnequalElements(ReadOnlySpan x, ReadOnlySpan y) xVec &= remainderMask; yVec &= remainderMask; - count += Vector128.CountWhereAllBitsSet(~Vector128.Equals(xVec, yVec)); + count += BitOperations.PopCount((~Vector128.Equals(xVec, yVec)).ExtractMostSignificantBits()); } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index 626e5d8cebc216..1cebc6f55a6b81 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -4502,7 +4502,64 @@ public static Vector128 WithUpper(this Vector128 vector, Vector64 va public static Vector128 Xor(Vector128 left, Vector128 right) => left ^ right; [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int CountMatches(Vector128 vector) => BitOperations.PopCount(vector.ExtractMostSignificantBits()); + [CompExactlyDependsOn(typeof(AdvSimd))] + internal static ulong AdvSimdExtractBitMask(Vector128 vector) + { + if (!AdvSimd.IsSupported) + { + ThrowHelper.ThrowNotSupportedException(); + } + + // This expects vector to have each element be one of Zero or AllBitsSet + // and will not produce correct results otherwise. + // + // Given this, we can treat it as ushort and do a logical-right-shift by 4 to + // compact the mask into half the space, giving us the following possibilities for + // each pair of bytes: + // * 0x00_00 - 0x00 + // * 0x00_FF - 0x0F + // * 0xFF_00 - 0xF0 + // * 0xFF_FF - 0xFF + // + // This allows us to extract the full metadata as a 64-bit scalar which can then + // be consumed by bit-counting APIs, such as PopCount, LeadingZeroCount, or TrailingZeroCount, + // and then adjusted by AdvSimdFixupBitCount to get the actual count of elements + // that were masked. + + return AdvSimd.ShiftRightLogicalNarrowingLower(vector.AsUInt16(), 4).AsUInt64().ToScalar(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(AdvSimd))] + internal static int AdvSimdFixupBitCount(int bitCount) + { + if (!AdvSimd.IsSupported) + { + ThrowHelper.ThrowNotSupportedException(); + } + + // This API is meant to be consumed alongside AdvSimdExtractBitMask and will + // not produce correct results for arbitrary inputs. It adjusts the bit count + // assuming that sequences of 1 or 0 were in groups of 4 bits per byte. + + unsafe + { + return bitCount >>> (2 + int.Log2(sizeof(T))); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static int CountMatches(Vector128 vector) + { + if (AdvSimd.IsSupported) + { + return AdvSimdFixupBitCount(BitOperations.PopCount(AdvSimdExtractBitMask(vector))); + } + else + { + return BitOperations.PopCount(vector.ExtractMostSignificantBits()); + } + } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static T GetElementUnsafe(in this Vector128 vector, int index) @@ -4515,12 +4572,30 @@ internal static T GetElementUnsafe(in this Vector128 vector, int index) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfFirstMatch(Vector128 vector) { - int result = BitOperations.TrailingZeroCount(vector.ExtractMostSignificantBits()); - return (result != 32) ? result : -1; + if (AdvSimd.IsSupported) + { + int result = AdvSimdFixupBitCount(BitOperations.TrailingZeroCount(AdvSimdExtractBitMask(vector))); + return (result != Vector128.Count) ? result : -1; + } + else + { + int result = BitOperations.TrailingZeroCount(vector.ExtractMostSignificantBits()); + return (result != 32) ? result : -1; + } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int IndexOfLastMatch(Vector128 vector) => 31 - BitOperations.LeadingZeroCount(vector.ExtractMostSignificantBits()); + internal static int IndexOfLastMatch(Vector128 vector) + { + if (AdvSimd.IsSupported) + { + return (Vector128.Count - 1) - AdvSimdFixupBitCount(BitOperations.LeadingZeroCount(AdvSimdExtractBitMask(vector))); + } + else + { + return 31 - BitOperations.LeadingZeroCount(vector.ExtractMostSignificantBits()); + } + } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static void SetElementUnsafe(in this Vector128 vector, int index, T value) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 1cccde196ab01b..8655d9778f0529 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -6,6 +6,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.Arm; namespace System.Runtime.Intrinsics { @@ -4397,7 +4398,64 @@ public static Vector64 WithElement(this Vector64 vector, int index, T v public static Vector64 Xor(Vector64 left, Vector64 right) => left ^ right; [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int CountMatches(Vector64 vector) => BitOperations.PopCount(vector.ExtractMostSignificantBits()); + [CompExactlyDependsOn(typeof(AdvSimd))] + internal static uint AdvSimdExtractBitMask(Vector64 vector) + { + if (!AdvSimd.IsSupported) + { + ThrowHelper.ThrowNotSupportedException(); + } + + // This expects vector to have each element be one of Zero or AllBitsSet + // and will not produce correct results otherwise. + // + // Given this, we can treat it as ushort and do a logical-right-shift by 4 to + // compact the mask into half the space, giving us the following possibilities for + // each pair of bytes: + // * 0x00_00 - 0x00 + // * 0x00_FF - 0x0F + // * 0xFF_00 - 0xF0 + // * 0xFF_FF - 0xFF + // + // This allows us to extract the full metadata as a 32-bit scalar which can then + // be consumed by bit-counting APIs, such as PopCount, LeadingZeroCount, or TrailingZeroCount, + // and then adjusted by AdvSimdFixupBitCount to get the actual count of elements + // that were masked. + + return AdvSimd.ShiftRightLogicalNarrowingLower(vector.ToVector128().AsUInt16(), 4).AsUInt32().ToScalar(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + [CompExactlyDependsOn(typeof(AdvSimd))] + internal static int AdvSimdFixupBitCount(int bitCount) + { + if (!AdvSimd.IsSupported) + { + ThrowHelper.ThrowNotSupportedException(); + } + + // This API is meant to be consumed alongside AdvSimdExtractBitMask and will + // not produce correct results for arbitrary inputs. It adjusts the bit count + // assuming that sequences of 1 or 0 were in groups of 4 bits per byte. + + unsafe + { + return bitCount >>> (2 + int.Log2(sizeof(T))); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static int CountMatches(Vector64 vector) + { + if (AdvSimd.IsSupported) + { + return AdvSimdFixupBitCount(BitOperations.PopCount(AdvSimdExtractBitMask(vector))); + } + else + { + return BitOperations.PopCount(vector.ExtractMostSignificantBits()); + } + } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static T GetElementUnsafe(in this Vector64 vector, int index) @@ -4410,12 +4468,30 @@ internal static T GetElementUnsafe(in this Vector64 vector, int index) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfFirstMatch(Vector64 vector) { - int result = BitOperations.TrailingZeroCount(vector.ExtractMostSignificantBits()); - return (result != 32) ? result : -1; + if (AdvSimd.IsSupported) + { + int result = AdvSimdFixupBitCount(BitOperations.TrailingZeroCount(AdvSimdExtractBitMask(vector))); + return (result != Vector64.Count) ? result : -1; + } + else + { + int result = BitOperations.TrailingZeroCount(vector.ExtractMostSignificantBits()); + return (result != 32) ? result : -1; + } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int IndexOfLastMatch(Vector64 vector) => 31 - BitOperations.LeadingZeroCount(vector.ExtractMostSignificantBits()); + internal static int IndexOfLastMatch(Vector64 vector) + { + if (AdvSimd.IsSupported) + { + return (Vector64.Count - 1) - AdvSimdFixupBitCount(BitOperations.LeadingZeroCount(AdvSimdExtractBitMask(vector))); + } + else + { + return 31 - BitOperations.LeadingZeroCount(vector.ExtractMostSignificantBits()); + } + } [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static void SetElementUnsafe(in this Vector64 vector, int index, T value) From 8dc533d1608b9d5bfcbba2ab22faf656b184e67d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 18:37:28 +0000 Subject: [PATCH 4/6] Use CountMatches internal helper in SpanHelpers.T.cs to avoid x64 regression Agent-Logs-Url: https://github.com/dotnet/runtime/sessions/83aad277-6910-4c67-8aac-88c2e15047b3 Co-authored-by: tannergooding <10487869+tannergooding@users.noreply.github.com> --- .../System.Private.CoreLib/src/System/SpanHelpers.T.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index 12278446319180..fdba77200e94a7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -4176,7 +4176,7 @@ public static unsafe int CountValueType(ref T current, T value, int length) w ref T oneVectorAwayFromEnd = ref Unsafe.Subtract(ref end, Vector512.Count); while (Unsafe.IsAddressLessThan(ref current, ref oneVectorAwayFromEnd)) { - count += Vector512.CountWhereAllBitsSet(Vector512.Equals(Vector512.LoadUnsafe(ref current), targetVector)); + count += Vector512.CountMatches(Vector512.Equals(Vector512.LoadUnsafe(ref current), targetVector)); current = ref Unsafe.Add(ref current, Vector512.Count); } @@ -4191,7 +4191,7 @@ public static unsafe int CountValueType(ref T current, T value, int length) w ref T oneVectorAwayFromEnd = ref Unsafe.Subtract(ref end, Vector256.Count); while (Unsafe.IsAddressLessThan(ref current, ref oneVectorAwayFromEnd)) { - count += Vector256.CountWhereAllBitsSet(Vector256.Equals(Vector256.LoadUnsafe(ref current), targetVector)); + count += Vector256.CountMatches(Vector256.Equals(Vector256.LoadUnsafe(ref current), targetVector)); current = ref Unsafe.Add(ref current, Vector256.Count); } @@ -4206,7 +4206,7 @@ public static unsafe int CountValueType(ref T current, T value, int length) w ref T oneVectorAwayFromEnd = ref Unsafe.Subtract(ref end, Vector128.Count); while (Unsafe.IsAddressLessThan(ref current, ref oneVectorAwayFromEnd)) { - count += Vector128.CountWhereAllBitsSet(Vector128.Equals(Vector128.LoadUnsafe(ref current), targetVector)); + count += Vector128.CountMatches(Vector128.Equals(Vector128.LoadUnsafe(ref current), targetVector)); current = ref Unsafe.Add(ref current, Vector128.Count); } From d3a08e41c5945037d24af13b238cc83645caa716 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 18:38:28 +0000 Subject: [PATCH 5/6] Clean up extra blank lines from removed IndexOfFirstMatch methods Agent-Logs-Url: https://github.com/dotnet/runtime/sessions/83aad277-6910-4c67-8aac-88c2e15047b3 Co-authored-by: tannergooding <10487869+tannergooding@users.noreply.github.com> --- .../Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs index 884109a8e2057c..d3114e7becd199 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.IndexOfMax.cs @@ -419,8 +419,6 @@ static Vector128 CreateVector128T(int i) => return curIn; } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe Vector256 IndexLessThan(Vector256 indices1, Vector256 indices2) => sizeof(T) == sizeof(long) ? Vector256.LessThan(indices1.AsInt64(), indices2.AsInt64()).As() : From 9b3f39fb0b83d08b635f36ba075f59bd2bcacc40 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:10:02 +0000 Subject: [PATCH 6/6] Replace EMSB+BitOp patterns with IndexOfFirstMatch/IndexOfLastMatch in SpanHelpers Agent-Logs-Url: https://github.com/dotnet/runtime/sessions/32b80803-7f44-4863-8845-f76d999fe220 Co-authored-by: EgorBo <523221+EgorBo@users.noreply.github.com> --- .../src/System/SpanHelpers.Byte.cs | 55 +++++++++---------- .../src/System/SpanHelpers.T.cs | 24 ++------ 2 files changed, 31 insertions(+), 48 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index e7194f4098ed50..67cd528d198236 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -531,8 +531,8 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) Vector128 search = Vector128.Load(searchSpace + offset); // Same method as below - uint matches = Vector128.Equals(Vector128.Zero, search).ExtractMostSignificantBits(); - if (matches == 0) + Vector128 cmp = Vector128.Equals(Vector128.Zero, search); + if (cmp == Vector128.Zero) { // Zero flags set so no matches offset += (nuint)Vector128.Count; @@ -540,7 +540,7 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) else { // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); + return (int)(offset + (uint)Vector128.IndexOfFirstMatch(cmp)); } } @@ -553,8 +553,8 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) Vector256 search = Vector256.Load(searchSpace + offset); // Same method as below - uint matches = Vector256.Equals(Vector256.Zero, search).ExtractMostSignificantBits(); - if (matches == 0) + Vector256 cmp = Vector256.Equals(Vector256.Zero, search); + if (cmp == Vector256.Zero) { // Zero flags set so no matches offset += (nuint)Vector256.Count; @@ -562,7 +562,7 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) else { // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); + return (int)(offset + (uint)Vector256.IndexOfFirstMatch(cmp)); } } lengthToExamine = GetByteVector512SpanLength(offset, Length); @@ -571,10 +571,8 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) do { Vector512 search = Vector512.Load(searchSpace + offset); - ulong matches = Vector512.Equals(Vector512.Zero, search).ExtractMostSignificantBits(); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) + Vector512 cmp = Vector512.Equals(Vector512.Zero, search); + if (cmp == Vector512.Zero) { // Zero flags set so no matches offset += (nuint)Vector512.Count; @@ -582,7 +580,7 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) } // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); + return (int)(offset + (uint)Vector512.IndexOfFirstMatch(cmp)); } while (lengthToExamine > offset); } @@ -592,8 +590,8 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) Vector256 search = Vector256.Load(searchSpace + offset); // Same method as above - uint matches = Vector256.Equals(Vector256.Zero, search).ExtractMostSignificantBits(); - if (matches == 0) + Vector256 cmp = Vector256.Equals(Vector256.Zero, search); + if (cmp == Vector256.Zero) { // Zero flags set so no matches offset += (nuint)Vector256.Count; @@ -601,7 +599,7 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) else { // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); + return (int)(offset + (uint)Vector256.IndexOfFirstMatch(cmp)); } } @@ -611,8 +609,8 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) Vector128 search = Vector128.Load(searchSpace + offset); // Same method as above - uint matches = Vector128.Equals(Vector128.Zero, search).ExtractMostSignificantBits(); - if (matches == 0) + Vector128 cmp = Vector128.Equals(Vector128.Zero, search); + if (cmp == Vector128.Zero) { // Zero flags set so no matches offset += (nuint)Vector128.Count; @@ -620,7 +618,7 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) else { // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); + return (int)(offset + (uint)Vector128.IndexOfFirstMatch(cmp)); } } @@ -644,8 +642,8 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) Vector128 search = Vector128.Load(searchSpace + offset); // Same method as below - uint matches = Vector128.Equals(Vector128.Zero, search).ExtractMostSignificantBits(); - if (matches == 0) + Vector128 cmp = Vector128.Equals(Vector128.Zero, search); + if (cmp == Vector128.Zero) { // Zero flags set so no matches offset += (nuint)Vector128.Count; @@ -653,7 +651,7 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) else { // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); + return (int)(offset + (uint)Vector128.IndexOfFirstMatch(cmp)); } } @@ -663,10 +661,8 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) do { Vector256 search = Vector256.Load(searchSpace + offset); - uint matches = Vector256.Equals(Vector256.Zero, search).ExtractMostSignificantBits(); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) + Vector256 cmp = Vector256.Equals(Vector256.Zero, search); + if (cmp == Vector256.Zero) { // Zero flags set so no matches offset += (nuint)Vector256.Count; @@ -674,7 +670,7 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) } // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); + return (int)(offset + (uint)Vector256.IndexOfFirstMatch(cmp)); } while (lengthToExamine > offset); } @@ -684,8 +680,8 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) Vector128 search = Vector128.Load(searchSpace + offset); // Same method as above - uint matches = Vector128.Equals(Vector128.Zero, search).ExtractMostSignificantBits(); - if (matches == 0) + Vector128 cmp = Vector128.Equals(Vector128.Zero, search); + if (cmp == Vector128.Zero) { // Zero flags set so no matches offset += (nuint)Vector128.Count; @@ -693,7 +689,7 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) else { // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); + return (int)(offset + (uint)Vector128.IndexOfFirstMatch(cmp)); } } @@ -724,8 +720,7 @@ internal static unsafe int IndexOfNullByte(byte* searchSpace) } // Find bitflag offset of first match and add to current offset - uint matches = compareResult.ExtractMostSignificantBits(); - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); + return (int)(offset + (uint)Vector128.IndexOfFirstMatch(compareResult)); } if (offset < (nuint)(uint)Length) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index fdba77200e94a7..7f562e59d626aa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -3713,49 +3713,37 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe int ComputeFirstIndex(ref T searchSpace, ref T current, Vector128 equals) where T : struct { - uint notEqualsElements = equals.ExtractMostSignificantBits(); - int index = BitOperations.TrailingZeroCount(notEqualsElements); - return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T)); + return Vector128.IndexOfFirstMatch(equals) + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe int ComputeFirstIndex(ref T searchSpace, ref T current, Vector256 equals) where T : struct { - uint notEqualsElements = equals.ExtractMostSignificantBits(); - int index = BitOperations.TrailingZeroCount(notEqualsElements); - return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T)); + return Vector256.IndexOfFirstMatch(equals) + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe int ComputeFirstIndex(ref T searchSpace, ref T current, Vector512 equals) where T : struct { - ulong notEqualsElements = equals.ExtractMostSignificantBits(); - int index = BitOperations.TrailingZeroCount(notEqualsElements); - return index + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T)); + return Vector512.IndexOfFirstMatch(equals) + (int)((nuint)Unsafe.ByteOffset(ref searchSpace, ref current) / (nuint)sizeof(T)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int ComputeLastIndex(nint offset, Vector128 equals) where T : struct { - uint notEqualsElements = equals.ExtractMostSignificantBits(); - int index = 31 - BitOperations.LeadingZeroCount(notEqualsElements); // 31 = 32 (bits in Int32) - 1 (indexing from zero) - return (int)offset + index; + return (int)offset + Vector128.IndexOfLastMatch(equals); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int ComputeLastIndex(nint offset, Vector256 equals) where T : struct { - uint notEqualsElements = equals.ExtractMostSignificantBits(); - int index = 31 - BitOperations.LeadingZeroCount(notEqualsElements); // 31 = 32 (bits in Int32) - 1 (indexing from zero) - return (int)offset + index; + return (int)offset + Vector256.IndexOfLastMatch(equals); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int ComputeLastIndex(nint offset, Vector512 equals) where T : struct { - ulong notEqualsElements = equals.ExtractMostSignificantBits(); - int index = 63 - BitOperations.LeadingZeroCount(notEqualsElements); // 31 = 32 (bits in Int32) - 1 (indexing from zero) - return (int)offset + index; + return (int)offset + Vector512.IndexOfLastMatch(equals); } internal interface INegator where T : struct