diff --git a/src/benchmarks/micro/sve/Partition.cs b/src/benchmarks/micro/sve/Partition.cs index 713a49ee410..71444c0e52e 100644 --- a/src/benchmarks/micro/sve/Partition.cs +++ b/src/benchmarks/micro/sve/Partition.cs @@ -49,23 +49,26 @@ public unsafe ulong Scalar() ulong indexLeft = 0; ulong indexRight = 0; - uint first = _input[0]; - - for (i = 0; i < Size; i++) + fixed (uint* input = _input, left = _left, right = _right) { - if (_input[i] < first) - { - _left[indexLeft] = _input[i]; - indexLeft++; - } - else + uint first = input[0]; + + for (i = 0; i < Size; i++) { - _right[indexRight] = _input[i]; - indexRight++; + if (input[i] < first) + { + left[indexLeft] = input[i]; + indexLeft++; + } + else + { + right[indexRight] = input[i]; + indexRight++; + } } - } - return indexRight; + return indexRight; + } } [Benchmark] @@ -108,7 +111,7 @@ public unsafe ulong SvePartition() Sve.StoreAndZip(pLoop, left + indexLeft, compacted); // Increment the position in the first output array by the number of elements found. - indexLeft += Sve.GetActiveElementCount(Sve.CreateTrueMaskUInt32(), pInner); + indexLeft = Sve.SaturatingIncrementByActiveElementCount(indexLeft, pInner); // Find all elements in input array NOT less than the first element. // (Flip the pCompare predicate by XORing with ones) @@ -117,7 +120,7 @@ public unsafe ulong SvePartition() // Repeat for the right array. compacted = Sve.Compact(pInner, data); Sve.StoreAndZip(pLoop, right + indexRight, compacted); - indexRight += Sve.GetActiveElementCount(Sve.CreateTrueMaskUInt32(), pInner); + indexRight = Sve.SaturatingIncrementByActiveElementCount(indexRight, pInner); i = Sve.SaturatingIncrementBy32BitElementCount(i, 1); pLoop = Sve.CreateWhileLessThanMask32Bit(i, Size); @@ -129,6 +132,72 @@ public unsafe ulong SvePartition() } return 0; } + + [Benchmark] + public unsafe ulong SveTail() + { + if (Sve.IsSupported) + { + fixed (uint* input = _input, left = _left, right = _right) + { + long i = 0; + + ulong indexLeft = 0; + ulong indexRight = 0; + + Vector firstElemVec = Sve.DuplicateSelectedScalarToVector( + Sve.LoadVector(Sve.CreateTrueMaskUInt32(), input), 0 + ); + + Vector pTrue = Sve.CreateTrueMaskUInt32(); + + while (i < (Size - (int)Sve.Count32BitElements())) + { + Vector data = Sve.LoadVector(pTrue, input + i); + + // Predicate for elements in input array less than the first element. + Vector pInner = Sve.CompareLessThan(data, firstElemVec); + + // Squash all found elements to the lower lanes of the vector. + Vector compacted = Sve.Compact(pInner, data); + + // Store the squashed elements to the first output array. + Sve.StoreAndZip(pTrue, left + indexLeft, compacted); + + // Increment the position in the first output array by the number of elements found. + indexLeft = Sve.SaturatingIncrementByActiveElementCount(indexLeft, pInner); + + // Find elements greater than or equal to the first element. + pInner = Sve.CompareGreaterThanOrEqual(data, firstElemVec); + + // Repeat for the right array. + compacted = Sve.Compact(pInner, data); + Sve.StoreAndZip(pTrue, right + indexRight, compacted); + indexRight = Sve.SaturatingIncrementByActiveElementCount(indexRight, pInner); + + i = Sve.SaturatingIncrementBy32BitElementCount(i, 1); + } + + // Handler remaining elements. + for (; i < Size; i++) + { + if (input[i] < input[0]) + { + left[indexLeft] = input[i]; + indexLeft++; + } + else + { + right[indexRight] = input[i]; + indexRight++; + } + } + + return indexRight; + } + } + return 0; + } } } diff --git a/src/benchmarks/micro/sve/StrCmp.cs b/src/benchmarks/micro/sve/StrCmp.cs index 4e43c996e4a..3f884e5a178 100644 --- a/src/benchmarks/micro/sve/StrCmp.cs +++ b/src/benchmarks/micro/sve/StrCmp.cs @@ -60,17 +60,20 @@ public virtual void Setup() } [Benchmark] - public int Scalar() + public unsafe int Scalar() { if (_arr1.Length == _arr2.Length) { - for (int i = 0; i < Size; i++) + fixed (byte* arr1_ptr = _arr1, arr2_ptr = _arr2) { - if (_arr1[i] != _arr2[i]) - return _arr1[i] - _arr2[i]; - } + for (int i = 0; i < Size; i++) + { + if (arr1_ptr[i] != arr2_ptr[i]) + return arr1_ptr[i] - arr2_ptr[i]; + } - return 0; + return 0; + } } Debug.Assert(false, "Different array lengths are not expected"); @@ -192,9 +195,7 @@ public unsafe long SveTail() cmp = Sve.CompareNotEqualTo(arr1_data, arr2_data); - byte allEqual = (byte)Sve.AddAcross(cmp).ToScalar(); - - if (allEqual > 0) + if (Sve.TestAnyTrue(ptrue, cmp)) { break; } @@ -219,4 +220,4 @@ public unsafe long SveTail() } } -#pragma warning restore SYSLIB5003 \ No newline at end of file +#pragma warning restore SYSLIB5003 diff --git a/src/benchmarks/micro/sve/StrIndexOf.cs b/src/benchmarks/micro/sve/StrIndexOf.cs index 214d90e6b4f..f08efc79824 100644 --- a/src/benchmarks/micro/sve/StrIndexOf.cs +++ b/src/benchmarks/micro/sve/StrIndexOf.cs @@ -26,7 +26,7 @@ public Config() } } - [Params(15, 127, 527, 10015)] + [Params(19, 127, 527, 10015)] public int Size; private char[] _array; @@ -42,16 +42,19 @@ public virtual void Setup() } [Benchmark] - public int Scalar() + public unsafe int Scalar() { - for (int i = 0; i < _array.Length; i++) + fixed (char* arr = _array) { - if (_array[i] == _searchValue) + for (int i = 0; i < Size; i++) { - return i; + if (arr[i] == _searchValue) + { + return i; + } } + return -1; } - return -1; } [Benchmark] @@ -72,18 +75,20 @@ public unsafe int Vector128IndexOf() // Compare each vector value with the target Vector128 cmp = Vector128.Equals(vals, target); - ushort cmpSum = Vector128.Sum(cmp); + // Check if there is any match in vals by doing a pairwise maximum. + // The cmpMax UInt64 value will be non-zero if the character is found. + ulong cmpMax = AdvSimd.Arm64.MaxPairwise(cmp, cmp).AsUInt64().ToScalar(); - if (cmpSum > 0) + if (cmpMax != 0) { - // find index of matching item - for (int j = 0; j < incr; j++) - { - if (cmp.GetElement(j) == ushort.MaxValue) - { - return i + j; - } - } + // Convert to byte vector and extract the odd bytes into a 64-bit scalar. + Vector128 cmpByte = cmp.AsByte(); + ulong cmpUnzip = AdvSimd.Arm64.UnzipOdd(cmpByte, cmpByte).AsUInt64().ToScalar(); + + // Offset is the number of trailing bits (little endian) divided by 8. + int offset = BitOperations.TrailingZeroCount(cmpUnzip) >> 3; + + return i + offset; } } @@ -110,25 +115,21 @@ public unsafe int SveIndexOf() Vector target = new Vector((ushort)_searchValue); var pLoop = (Vector)Sve.CreateWhileLessThanMask16Bit(i, Size); - for (; Sve.TestFirstTrue(Sve.CreateTrueMaskUInt16(), pLoop); i += (int)Sve.Count16BitElements()) + while (Sve.TestFirstTrue(Sve.CreateTrueMaskUInt16(), pLoop)) { Vector vals = Sve.LoadVector(pLoop, ((ushort*)arr_ptr) + i); Vector cmpVec = Sve.CompareEqual(vals, target); - ushort cmpSum = (ushort)Sve.AddAcross(cmpVec).ToScalar(); - - if (cmpSum > 0) + // Test if the character is found in the current values. + if (Sve.TestAnyTrue(Sve.CreateTrueMaskUInt16(), cmpVec)) { - // find index of matching item - for (int j = 0; j < Vector.Count; j++) - { - if (cmpVec.GetElement(j) == 1) - { - return i + j; - } - } + // Set elements up to and including the first active element to 1 and the rest to 0. + Vector brkVec = Sve.CreateBreakAfterMask(Sve.CreateTrueMaskUInt16(), cmpVec); + // The offset is the number of active elements minus 1. + return (int)Sve.SaturatingIncrementByActiveElementCount(i - 1, brkVec); } + i += (int)Sve.Count16BitElements(); pLoop = (Vector)Sve.CreateWhileLessThanMask16Bit(i, Size); } } @@ -149,30 +150,26 @@ public unsafe int SveTail() Vector target = new Vector((ushort)_searchValue); var pLoop = (Vector)Sve.CreateTrueMaskInt16(); - - for (; (Size - i) > (int)Sve.Count16BitElements(); i += (int)Sve.Count16BitElements()) + while (i < (Size - (int)Sve.Count16BitElements())) { Vector vals = Sve.LoadVector(pLoop, ((ushort*)arr_ptr) + i); Vector cmpVec = Sve.CompareEqual(vals, target); - ushort cmpSum = (ushort)Sve.AddAcross(cmpVec).ToScalar(); - - if (cmpSum > 0) + // Test if the character is found in the current values. + if (Sve.TestAnyTrue(Sve.CreateTrueMaskUInt16(), cmpVec)) { - // find index of matching item - for (int j = 0; j < Vector.Count; j++) - { - if (cmpVec.GetElement(j) == 1) - { - return i + j; - } - } + // Set elements up to and including the first active element to 1 and the rest to 0. + Vector brkVec = Sve.CreateBreakAfterMask(Sve.CreateTrueMaskUInt16(), cmpVec); + // The offset is the number of active elements minus 1. + return (int)Sve.SaturatingIncrementByActiveElementCount(i - 1, brkVec); } + + i += (int)Sve.Count16BitElements(); } for (; i < Size; i++) { - if (_array[i] == _searchValue) + if (arr_ptr[i] == _searchValue) return i; } @@ -186,4 +183,4 @@ public unsafe int SveTail() } } -#pragma warning restore SYSLIB5003 \ No newline at end of file +#pragma warning restore SYSLIB5003 diff --git a/src/benchmarks/micro/sve/StrLen.cs b/src/benchmarks/micro/sve/StrLen.cs index 246bc64d834..c932a8b921c 100644 --- a/src/benchmarks/micro/sve/StrLen.cs +++ b/src/benchmarks/micro/sve/StrLen.cs @@ -29,13 +29,11 @@ public Config() public int Size; private byte[] _array; - private ulong _length; [GlobalSetup] public virtual void Setup() { _array = ValuesGenerator.Array(Size + 1); - _length = 0; var random = new Random(); for (int i = 0; i < _array.Length; i++) @@ -56,19 +54,13 @@ public unsafe ulong Scalar() { fixed (byte* arr_ptr = _array) { - if (arr_ptr == null) - return 0; - - byte* ptr = arr_ptr; - - while (*ptr != 0) + ulong i = 0; + while (arr_ptr[i] != 0) { - _length++; - ptr++; + i++; } + return i; } - - return _length; } [Benchmark] @@ -159,4 +151,4 @@ public unsafe ulong SveStrLen() } } -#pragma warning restore SYSLIB5003 \ No newline at end of file +#pragma warning restore SYSLIB5003