From fabc37e1dba57ec37cfafa9993adbe1a31c17377 Mon Sep 17 00:00:00 2001 From: Steven He Date: Sun, 3 May 2026 19:07:19 +0900 Subject: [PATCH 01/24] Add managed API surface --- .../ref/System.Numerics.Vectors.cs | 16 ++ .../src/System/Numerics/Vector.cs | 237 ++++++++++++++++++ .../src/System/Numerics/Vector_1.cs | 9 + .../System/Runtime/Intrinsics/Vector128.cs | 194 ++++++++++++++ .../System/Runtime/Intrinsics/Vector128_1.cs | 9 + .../System/Runtime/Intrinsics/Vector256.cs | 194 ++++++++++++++ .../System/Runtime/Intrinsics/Vector256_1.cs | 9 + .../System/Runtime/Intrinsics/Vector512.cs | 194 ++++++++++++++ .../System/Runtime/Intrinsics/Vector512_1.cs | 9 + .../src/System/Runtime/Intrinsics/Vector64.cs | 194 ++++++++++++++ .../System/Runtime/Intrinsics/Vector64_1.cs | 9 + .../ref/System.Runtime.Intrinsics.cs | 66 ++++- 12 files changed, 1138 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Numerics.Vectors/ref/System.Numerics.Vectors.cs b/src/libraries/System.Numerics.Vectors/ref/System.Numerics.Vectors.cs index 630ba00644a847..6a68e7de235454 100644 --- a/src/libraries/System.Numerics.Vectors/ref/System.Numerics.Vectors.cs +++ b/src/libraries/System.Numerics.Vectors/ref/System.Numerics.Vectors.cs @@ -312,6 +312,21 @@ public static partial class Vector public static System.Numerics.Vector CreateScalar(T value) { throw null; } public static System.Numerics.Vector CreateScalarUnsafe(T value) { throw null; } public static System.Numerics.Vector CreateSequence(T start, T step) { throw null; } + public static System.Numerics.Vector CreateGeometricSequence(T initial, [System.Diagnostics.CodeAnalysis.ConstantExpected] T multiplier) { throw null; } + public static System.Numerics.Vector CreateAlternatingSequence(T even, T odd) { throw null; } + public static System.Numerics.Vector CreateHarmonicSequence(T start, T step) { throw null; } + public static System.Numerics.Vector CreateCauchySequence(T start, T step) { throw null; } + public static System.Numerics.Vector ZipLower(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector ZipUpper(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static (System.Numerics.Vector Lower, System.Numerics.Vector Upper) Zip(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector UnzipEven(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector UnzipOdd(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static (System.Numerics.Vector Even, System.Numerics.Vector Odd) Unzip(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector ConcatLowerLower(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector ConcatUpperLower(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector ConcatUpperUpper(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector ConcatLowerUpper(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector Reverse(System.Numerics.Vector vector) { throw null; } public static System.Numerics.Vector DegreesToRadians(System.Numerics.Vector degrees) { throw null; } public static System.Numerics.Vector DegreesToRadians(System.Numerics.Vector degrees) { throw null; } public static System.Numerics.Vector Divide(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } @@ -1131,6 +1146,7 @@ public readonly void CopyTo(System.Span destination) { } public static System.Numerics.Vector AllBitsSet { get { throw null; } } public static int Count { get { throw null; } } public static System.Numerics.Vector Indices { get { throw null; } } + public static System.Numerics.Vector SignSequence { get { throw null; } } public static bool IsSupported { get { throw null; } } public T this[int index] { get { throw null; } } public static System.Numerics.Vector One { get { throw null; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs index bb14e36a8be3d5..3672c452de311f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs @@ -879,6 +879,243 @@ public static Vector CreateScalarUnsafe(T value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector CreateSequence(T start, T step) => (Vector.Indices * step) + Create(start); + /// Creates a new instance where the elements begin at a specified value and are multiplied by another specified value. + /// The type of the elements in the vector. + /// The value that element 0 will be initialized to. + /// The value that indicates how each element should be scaled from the previous. + /// A new instance with the first element initialized to and each subsequent element initialized to the value of the previous element multiplied by . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector CreateGeometricSequence(T initial, [ConstantExpected] T multiplier) + { + int count = Vector.Count; + Unsafe.SkipInit(out Vector result); + + T value = initial; + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, value); + value = Scalar.Multiply(value, multiplier); + } + + return result; + } + + /// Creates a new instance whose elements alternate between two specified values. + /// The type of the elements in the vector. + /// The value assigned to even-indexed elements. + /// The value assigned to odd-indexed elements. + /// A new instance whose even-indexed elements are initialized to and odd-indexed elements are initialized to . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector CreateAlternatingSequence(T even, T odd) + { + int count = Vector.Count; + Unsafe.SkipInit(out Vector result); + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, ((index & 1) == 0) ? even : odd); + } + + return result; + } + + /// Creates a new instance whose elements are the reciprocal of an arithmetic sequence. + /// The type of the elements in the vector. + /// The value that element 0 of the arithmetic sequence will be initialized to. + /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. + /// A new instance whose elements are initialized to one divided by the corresponding element of the arithmetic sequence. + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector CreateHarmonicSequence(T start, T step) => Vector.One / CreateSequence(start, step); + + /// Creates a new instance whose elements are the square root of an arithmetic sequence. + /// The type of the elements in the vector. + /// The value that element 0 of the arithmetic sequence will be initialized to. + /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. + /// A new instance whose elements are initialized to the square root of the corresponding element of the arithmetic sequence. + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector CreateCauchySequence(T start, T step) => SquareRoot(CreateSequence(start, step)); + + /// Creates a new vector by concatenating the lower halves of two vectors. + /// The type of the elements in the vector. + /// The vector that provides the lower half of the result. + /// The vector that provides the upper half of the result. + /// A new vector whose lower half comes from the lower half of and whose upper half comes from the lower half of . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector ConcatLowerLower(Vector left, Vector right) => ConcatHalves(left, right, leftUpper: false, rightUpper: false); + + /// Creates a new vector by concatenating the upper half of one vector and the lower half of another vector. + /// The type of the elements in the vector. + /// The vector that provides the lower half of the result. + /// The vector that provides the upper half of the result. + /// A new vector whose lower half comes from the upper half of and whose upper half comes from the lower half of . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector ConcatUpperLower(Vector left, Vector right) => ConcatHalves(left, right, leftUpper: true, rightUpper: false); + + /// Creates a new vector by concatenating the upper halves of two vectors. + /// The type of the elements in the vector. + /// The vector that provides the lower half of the result. + /// The vector that provides the upper half of the result. + /// A new vector whose lower half comes from the upper half of and whose upper half comes from the upper half of . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector ConcatUpperUpper(Vector left, Vector right) => ConcatHalves(left, right, leftUpper: true, rightUpper: true); + + /// Creates a new vector by concatenating the lower half of one vector and the upper half of another vector. + /// The type of the elements in the vector. + /// The vector that provides the lower half of the result. + /// The vector that provides the upper half of the result. + /// A new vector whose lower half comes from the lower half of and whose upper half comes from the upper half of . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector ConcatLowerUpper(Vector left, Vector right) => ConcatHalves(left, right, leftUpper: false, rightUpper: true); + + /// Interleaves the lower halves of two vectors. + /// The type of the elements in the vector. + /// The vector that provides the even-indexed elements. + /// The vector that provides the odd-indexed elements. + /// A new vector containing interleaved elements from the lower halves of and . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector ZipLower(Vector left, Vector right) => Zip(left, right, upper: false); + + /// Interleaves the upper halves of two vectors. + /// The type of the elements in the vector. + /// The vector that provides the even-indexed elements. + /// The vector that provides the odd-indexed elements. + /// A new vector containing interleaved elements from the upper halves of and . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector ZipUpper(Vector left, Vector right) => Zip(left, right, upper: true); + + /// Interleaves two vectors into their lower and upper halves. + /// The type of the elements in the vector. + /// The vector that provides the even-indexed elements. + /// The vector that provides the odd-indexed elements. + /// A pair of vectors containing interleaved elements from the lower and upper halves of and . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (Vector Lower, Vector Upper) Zip(Vector left, Vector right) => (ZipLower(left, right), ZipUpper(left, right)); + + private static Vector Zip(Vector left, Vector right, bool upper) + { + int count = Vector.Count; + int lowerCount = (count + 1) / 2; + int start = upper ? count - lowerCount : 0; + + Unsafe.SkipInit(out Vector result); + + for (int index = 0; index < count; index++) + { + int elementIndex = start + (index / 2); + T value = ((index & 1) == 0) + ? left.GetElementUnsafe(elementIndex) + : right.GetElementUnsafe(elementIndex); + + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// De-interleaves the even-indexed elements from two vectors. + /// The type of the elements in the vector. + /// The vector that provides the lower half of the result. + /// The vector that provides the upper half of the result. + /// A new vector containing the even-indexed elements from followed by the even-indexed elements from . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector UnzipEven(Vector left, Vector right) => Unzip(left, right, odd: false); + + /// De-interleaves the odd-indexed elements from two vectors. + /// The type of the elements in the vector. + /// The vector that provides the lower half of the result. + /// The vector that provides the upper half of the result. + /// A new vector containing the odd-indexed elements from followed by the odd-indexed elements from . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector UnzipOdd(Vector left, Vector right) => Unzip(left, right, odd: true); + + /// De-interleaves two vectors into their even-indexed and odd-indexed elements. + /// The type of the elements in the vector. + /// The vector that provides the lower half of each result. + /// The vector that provides the upper half of each result. + /// A pair of vectors containing the even-indexed and odd-indexed elements from and . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (Vector Even, Vector Odd) Unzip(Vector left, Vector right) => (UnzipEven(left, right), UnzipOdd(left, right)); + + private static Vector Unzip(Vector left, Vector right, bool odd) + { + int count = Vector.Count; + int start = odd ? 1 : 0; + int lowerCount = (count - start + 1) / 2; + + if (lowerCount == 0) + { + return Vector.Zero; + } + + Unsafe.SkipInit(out Vector result); + + for (int index = 0; index < count; index++) + { + T value = (index < lowerCount) + ? left.GetElementUnsafe(start + (index * 2)) + : right.GetElementUnsafe(start + ((index - lowerCount) * 2)); + + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// Creates a new vector with the elements of a specified vector in reverse order. + /// The type of the elements in the vector. + /// The vector whose elements will be reversed. + /// A new vector containing the elements of in reverse order. + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector Reverse(Vector vector) + { + int count = Vector.Count; + Unsafe.SkipInit(out Vector result); + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, vector.GetElementUnsafe(count - 1 - index)); + } + + return result; + } + + private static Vector ConcatHalves(Vector left, Vector right, bool leftUpper, bool rightUpper) + { + int count = Vector.Count; + int lowerCount = (count + 1) / 2; + int leftStart = leftUpper ? count - lowerCount : 0; + int rightStart = rightUpper ? count - lowerCount : 0; + + Unsafe.SkipInit(out Vector result); + + for (int index = 0; index < count; index++) + { + T value = (index < lowerCount) + ? left.GetElementUnsafe(leftStart + index) + : right.GetElementUnsafe(rightStart + index - lowerCount); + + result.SetElementUnsafe(index, value); + } + + return result; + } + internal static Vector DegreesToRadians(Vector degrees) where T : ITrigonometricFunctions { diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs index 0dd762c866e312..ef2e9f6bd2f55b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs @@ -164,6 +164,15 @@ public static Vector Indices } } + /// Gets a new with elements that alternate between one and negative one, starting with one. + /// The type of the vector () is not supported. + public static Vector SignSequence + { + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => Vector.CreateAlternatingSequence(Scalar.One, Scalar.Subtract(default!, Scalar.One)); + } + /// Gets true if is supported; otherwise, false. /// true if is supported; otherwise, false. public static bool IsSupported diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index 1cebc6f55a6b81..c9bb5a48f9aa83 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -1578,6 +1578,200 @@ public static Vector128 CreateScalarUnsafe(T value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 CreateSequence(T start, T step) => (Vector128.Indices * step) + Create(start); + /// Creates a new instance where the elements begin at a specified value and are multiplied by another specified value. + /// The type of the elements in the vector. + /// The value that element 0 will be initialized to. + /// The value that indicates how each element should be scaled from the previous. + /// A new instance with the first element initialized to and each subsequent element initialized to the value of the previous element multiplied by . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 CreateGeometricSequence(T initial, [ConstantExpected] T multiplier) + { + int count = Vector128.Count; + Unsafe.SkipInit(out Vector128 result); + + T value = initial; + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, value); + value = Scalar.Multiply(value, multiplier); + } + + return result; + } + + /// Creates a new instance whose elements alternate between two specified values. + /// The type of the elements in the vector. + /// The value assigned to even-indexed elements. + /// The value assigned to odd-indexed elements. + /// A new instance whose even-indexed elements are initialized to and odd-indexed elements are initialized to . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 CreateAlternatingSequence(T even, T odd) + { + int count = Vector128.Count; + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, ((index & 1) == 0) ? even : odd); + } + + return result; + } + + /// Creates a new instance whose elements are the reciprocal of an arithmetic sequence. + /// The type of the elements in the vector. + /// The value that element 0 of the arithmetic sequence will be initialized to. + /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. + /// A new instance whose elements are initialized to one divided by the corresponding element of the arithmetic sequence. + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 CreateHarmonicSequence(T start, T step) => Vector128.One / CreateSequence(start, step); + + /// Creates a new instance whose elements are the square root of an arithmetic sequence. + /// The type of the elements in the vector. + /// The value that element 0 of the arithmetic sequence will be initialized to. + /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. + /// A new instance whose elements are initialized to the square root of the corresponding element of the arithmetic sequence. + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 CreateCauchySequence(T start, T step) => Sqrt(CreateSequence(start, step)); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 ConcatLowerLower(Vector128 left, Vector128 right) => ConcatHalves(left, right, leftUpper: false, rightUpper: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 ConcatUpperLower(Vector128 left, Vector128 right) => ConcatHalves(left, right, leftUpper: true, rightUpper: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 ConcatUpperUpper(Vector128 left, Vector128 right) => ConcatHalves(left, right, leftUpper: true, rightUpper: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 ConcatLowerUpper(Vector128 left, Vector128 right) => ConcatHalves(left, right, leftUpper: false, rightUpper: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 ZipLower(Vector128 left, Vector128 right) => Zip(left, right, upper: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 ZipUpper(Vector128 left, Vector128 right) => Zip(left, right, upper: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (Vector128 Lower, Vector128 Upper) Zip(Vector128 left, Vector128 right) => (ZipLower(left, right), ZipUpper(left, right)); + + private static Vector128 Zip(Vector128 left, Vector128 right, bool upper) + { + int count = Vector128.Count; + int lowerCount = (count + 1) / 2; + int start = upper ? count - lowerCount : 0; + + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < count; index++) + { + int elementIndex = start + (index / 2); + T value = ((index & 1) == 0) + ? left.GetElementUnsafe(elementIndex) + : right.GetElementUnsafe(elementIndex); + + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 UnzipEven(Vector128 left, Vector128 right) => Unzip(left, right, odd: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 UnzipOdd(Vector128 left, Vector128 right) => Unzip(left, right, odd: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (Vector128 Even, Vector128 Odd) Unzip(Vector128 left, Vector128 right) => (UnzipEven(left, right), UnzipOdd(left, right)); + + private static Vector128 Unzip(Vector128 left, Vector128 right, bool odd) + { + int count = Vector128.Count; + int start = odd ? 1 : 0; + int lowerCount = (count - start + 1) / 2; + + if (lowerCount == 0) + { + return Vector128.Zero; + } + + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < count; index++) + { + T value = (index < lowerCount) + ? left.GetElementUnsafe(start + (index * 2)) + : right.GetElementUnsafe(start + ((index - lowerCount) * 2)); + + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128 Reverse(Vector128 vector) + { + int count = Vector128.Count; + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, vector.GetElementUnsafe(count - 1 - index)); + } + + return result; + } + + private static Vector128 ConcatHalves(Vector128 left, Vector128 right, bool leftUpper, bool rightUpper) + { + int count = Vector128.Count; + int lowerCount = (count + 1) / 2; + int leftStart = leftUpper ? count - lowerCount : 0; + int rightStart = rightUpper ? count - lowerCount : 0; + + Unsafe.SkipInit(out Vector128 result); + + for (int index = 0; index < count; index++) + { + T value = (index < lowerCount) + ? left.GetElementUnsafe(leftStart + index) + : right.GetElementUnsafe(rightStart + index - lowerCount); + + result.SetElementUnsafe(index, value); + } + + return result; + } + /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs index e00dbbe7258076..39ca308a3cfc4e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs @@ -74,6 +74,15 @@ public static Vector128 Indices } } + /// Gets a new with elements that alternate between one and negative one, starting with one. + /// The type of the vector () is not supported. + public static Vector128 SignSequence + { + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => Vector128.CreateAlternatingSequence(Scalar.One, Scalar.Subtract(default!, Scalar.One)); + } + /// Gets true if is supported; otherwise, false. /// true if is supported; otherwise, false. public static bool IsSupported diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index cd446c7646ac03..7c5b618ffdfe17 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -1658,6 +1658,200 @@ public static Vector256 CreateScalarUnsafe(T value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateSequence(T start, T step) => (Vector256.Indices * step) + Create(start); + /// Creates a new instance where the elements begin at a specified value and are multiplied by another specified value. + /// The type of the elements in the vector. + /// The value that element 0 will be initialized to. + /// The value that indicates how each element should be scaled from the previous. + /// A new instance with the first element initialized to and each subsequent element initialized to the value of the previous element multiplied by . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 CreateGeometricSequence(T initial, [ConstantExpected] T multiplier) + { + int count = Vector256.Count; + Unsafe.SkipInit(out Vector256 result); + + T value = initial; + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, value); + value = Scalar.Multiply(value, multiplier); + } + + return result; + } + + /// Creates a new instance whose elements alternate between two specified values. + /// The type of the elements in the vector. + /// The value assigned to even-indexed elements. + /// The value assigned to odd-indexed elements. + /// A new instance whose even-indexed elements are initialized to and odd-indexed elements are initialized to . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 CreateAlternatingSequence(T even, T odd) + { + int count = Vector256.Count; + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, ((index & 1) == 0) ? even : odd); + } + + return result; + } + + /// Creates a new instance whose elements are the reciprocal of an arithmetic sequence. + /// The type of the elements in the vector. + /// The value that element 0 of the arithmetic sequence will be initialized to. + /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. + /// A new instance whose elements are initialized to one divided by the corresponding element of the arithmetic sequence. + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 CreateHarmonicSequence(T start, T step) => Vector256.One / CreateSequence(start, step); + + /// Creates a new instance whose elements are the square root of an arithmetic sequence. + /// The type of the elements in the vector. + /// The value that element 0 of the arithmetic sequence will be initialized to. + /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. + /// A new instance whose elements are initialized to the square root of the corresponding element of the arithmetic sequence. + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 CreateCauchySequence(T start, T step) => Sqrt(CreateSequence(start, step)); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ConcatLowerLower(Vector256 left, Vector256 right) => ConcatHalves(left, right, leftUpper: false, rightUpper: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ConcatUpperLower(Vector256 left, Vector256 right) => ConcatHalves(left, right, leftUpper: true, rightUpper: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ConcatUpperUpper(Vector256 left, Vector256 right) => ConcatHalves(left, right, leftUpper: true, rightUpper: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ConcatLowerUpper(Vector256 left, Vector256 right) => ConcatHalves(left, right, leftUpper: false, rightUpper: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ZipLower(Vector256 left, Vector256 right) => Zip(left, right, upper: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ZipUpper(Vector256 left, Vector256 right) => Zip(left, right, upper: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (Vector256 Lower, Vector256 Upper) Zip(Vector256 left, Vector256 right) => (ZipLower(left, right), ZipUpper(left, right)); + + private static Vector256 Zip(Vector256 left, Vector256 right, bool upper) + { + int count = Vector256.Count; + int lowerCount = (count + 1) / 2; + int start = upper ? count - lowerCount : 0; + + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < count; index++) + { + int elementIndex = start + (index / 2); + T value = ((index & 1) == 0) + ? left.GetElementUnsafe(elementIndex) + : right.GetElementUnsafe(elementIndex); + + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 UnzipEven(Vector256 left, Vector256 right) => Unzip(left, right, odd: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 UnzipOdd(Vector256 left, Vector256 right) => Unzip(left, right, odd: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (Vector256 Even, Vector256 Odd) Unzip(Vector256 left, Vector256 right) => (UnzipEven(left, right), UnzipOdd(left, right)); + + private static Vector256 Unzip(Vector256 left, Vector256 right, bool odd) + { + int count = Vector256.Count; + int start = odd ? 1 : 0; + int lowerCount = (count - start + 1) / 2; + + if (lowerCount == 0) + { + return Vector256.Zero; + } + + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < count; index++) + { + T value = (index < lowerCount) + ? left.GetElementUnsafe(start + (index * 2)) + : right.GetElementUnsafe(start + ((index - lowerCount) * 2)); + + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 Reverse(Vector256 vector) + { + int count = Vector256.Count; + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, vector.GetElementUnsafe(count - 1 - index)); + } + + return result; + } + + private static Vector256 ConcatHalves(Vector256 left, Vector256 right, bool leftUpper, bool rightUpper) + { + int count = Vector256.Count; + int lowerCount = (count + 1) / 2; + int leftStart = leftUpper ? count - lowerCount : 0; + int rightStart = rightUpper ? count - lowerCount : 0; + + Unsafe.SkipInit(out Vector256 result); + + for (int index = 0; index < count; index++) + { + T value = (index < lowerCount) + ? left.GetElementUnsafe(leftStart + index) + : right.GetElementUnsafe(rightStart + index - lowerCount); + + result.SetElementUnsafe(index, value); + } + + return result; + } + /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs index 0418b0d01a79bc..1f64922bc77eb1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs @@ -73,6 +73,15 @@ public static Vector256 Indices } } + /// Gets a new with elements that alternate between one and negative one, starting with one. + /// The type of the vector () is not supported. + public static Vector256 SignSequence + { + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => Vector256.CreateAlternatingSequence(Scalar.One, Scalar.Subtract(default!, Scalar.One)); + } + /// Gets true if is supported; otherwise, false. /// true if is supported; otherwise, false. public static bool IsSupported diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs index 6e71109c3ab9c5..6026e3e1348665 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs @@ -1690,6 +1690,200 @@ public static Vector512 CreateScalarUnsafe(T value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 CreateSequence(T start, T step) => (Vector512.Indices * step) + Create(start); + /// Creates a new instance where the elements begin at a specified value and are multiplied by another specified value. + /// The type of the elements in the vector. + /// The value that element 0 will be initialized to. + /// The value that indicates how each element should be scaled from the previous. + /// A new instance with the first element initialized to and each subsequent element initialized to the value of the previous element multiplied by . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 CreateGeometricSequence(T initial, [ConstantExpected] T multiplier) + { + int count = Vector512.Count; + Unsafe.SkipInit(out Vector512 result); + + T value = initial; + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, value); + value = Scalar.Multiply(value, multiplier); + } + + return result; + } + + /// Creates a new instance whose elements alternate between two specified values. + /// The type of the elements in the vector. + /// The value assigned to even-indexed elements. + /// The value assigned to odd-indexed elements. + /// A new instance whose even-indexed elements are initialized to and odd-indexed elements are initialized to . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 CreateAlternatingSequence(T even, T odd) + { + int count = Vector512.Count; + Unsafe.SkipInit(out Vector512 result); + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, ((index & 1) == 0) ? even : odd); + } + + return result; + } + + /// Creates a new instance whose elements are the reciprocal of an arithmetic sequence. + /// The type of the elements in the vector. + /// The value that element 0 of the arithmetic sequence will be initialized to. + /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. + /// A new instance whose elements are initialized to one divided by the corresponding element of the arithmetic sequence. + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 CreateHarmonicSequence(T start, T step) => Vector512.One / CreateSequence(start, step); + + /// Creates a new instance whose elements are the square root of an arithmetic sequence. + /// The type of the elements in the vector. + /// The value that element 0 of the arithmetic sequence will be initialized to. + /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. + /// A new instance whose elements are initialized to the square root of the corresponding element of the arithmetic sequence. + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 CreateCauchySequence(T start, T step) => Sqrt(CreateSequence(start, step)); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ConcatLowerLower(Vector512 left, Vector512 right) => ConcatHalves(left, right, leftUpper: false, rightUpper: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ConcatUpperLower(Vector512 left, Vector512 right) => ConcatHalves(left, right, leftUpper: true, rightUpper: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ConcatUpperUpper(Vector512 left, Vector512 right) => ConcatHalves(left, right, leftUpper: true, rightUpper: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ConcatLowerUpper(Vector512 left, Vector512 right) => ConcatHalves(left, right, leftUpper: false, rightUpper: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ZipLower(Vector512 left, Vector512 right) => Zip(left, right, upper: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ZipUpper(Vector512 left, Vector512 right) => Zip(left, right, upper: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (Vector512 Lower, Vector512 Upper) Zip(Vector512 left, Vector512 right) => (ZipLower(left, right), ZipUpper(left, right)); + + private static Vector512 Zip(Vector512 left, Vector512 right, bool upper) + { + int count = Vector512.Count; + int lowerCount = (count + 1) / 2; + int start = upper ? count - lowerCount : 0; + + Unsafe.SkipInit(out Vector512 result); + + for (int index = 0; index < count; index++) + { + int elementIndex = start + (index / 2); + T value = ((index & 1) == 0) + ? left.GetElementUnsafe(elementIndex) + : right.GetElementUnsafe(elementIndex); + + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 UnzipEven(Vector512 left, Vector512 right) => Unzip(left, right, odd: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 UnzipOdd(Vector512 left, Vector512 right) => Unzip(left, right, odd: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (Vector512 Even, Vector512 Odd) Unzip(Vector512 left, Vector512 right) => (UnzipEven(left, right), UnzipOdd(left, right)); + + private static Vector512 Unzip(Vector512 left, Vector512 right, bool odd) + { + int count = Vector512.Count; + int start = odd ? 1 : 0; + int lowerCount = (count - start + 1) / 2; + + if (lowerCount == 0) + { + return Vector512.Zero; + } + + Unsafe.SkipInit(out Vector512 result); + + for (int index = 0; index < count; index++) + { + T value = (index < lowerCount) + ? left.GetElementUnsafe(start + (index * 2)) + : right.GetElementUnsafe(start + ((index - lowerCount) * 2)); + + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Reverse(Vector512 vector) + { + int count = Vector512.Count; + Unsafe.SkipInit(out Vector512 result); + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, vector.GetElementUnsafe(count - 1 - index)); + } + + return result; + } + + private static Vector512 ConcatHalves(Vector512 left, Vector512 right, bool leftUpper, bool rightUpper) + { + int count = Vector512.Count; + int lowerCount = (count + 1) / 2; + int leftStart = leftUpper ? count - lowerCount : 0; + int rightStart = rightUpper ? count - lowerCount : 0; + + Unsafe.SkipInit(out Vector512 result); + + for (int index = 0; index < count; index++) + { + T value = (index < lowerCount) + ? left.GetElementUnsafe(leftStart + index) + : right.GetElementUnsafe(rightStart + index - lowerCount); + + result.SetElementUnsafe(index, value); + } + + return result; + } + /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs index 3841113a21ec11..abbd5a3f9561d3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs @@ -73,6 +73,15 @@ public static Vector512 Indices } } + /// Gets a new with elements that alternate between one and negative one, starting with one. + /// The type of the vector () is not supported. + public static Vector512 SignSequence + { + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => Vector512.CreateAlternatingSequence(Scalar.One, Scalar.Subtract(default!, Scalar.One)); + } + /// Gets true if is supported; otherwise, false. /// true if is supported; otherwise, false. public static bool IsSupported diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 8655d9778f0529..c449a7128f30db 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -1386,6 +1386,200 @@ public static Vector64 CreateScalarUnsafe(T value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 CreateSequence(T start, T step) => (Vector64.Indices * step) + Create(start); + /// Creates a new instance where the elements begin at a specified value and are multiplied by another specified value. + /// The type of the elements in the vector. + /// The value that element 0 will be initialized to. + /// The value that indicates how each element should be scaled from the previous. + /// A new instance with the first element initialized to and each subsequent element initialized to the value of the previous element multiplied by . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector64 CreateGeometricSequence(T initial, [ConstantExpected] T multiplier) + { + int count = Vector64.Count; + Unsafe.SkipInit(out Vector64 result); + + T value = initial; + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, value); + value = Scalar.Multiply(value, multiplier); + } + + return result; + } + + /// Creates a new instance whose elements alternate between two specified values. + /// The type of the elements in the vector. + /// The value assigned to even-indexed elements. + /// The value assigned to odd-indexed elements. + /// A new instance whose even-indexed elements are initialized to and odd-indexed elements are initialized to . + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector64 CreateAlternatingSequence(T even, T odd) + { + int count = Vector64.Count; + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, ((index & 1) == 0) ? even : odd); + } + + return result; + } + + /// Creates a new instance whose elements are the reciprocal of an arithmetic sequence. + /// The type of the elements in the vector. + /// The value that element 0 of the arithmetic sequence will be initialized to. + /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. + /// A new instance whose elements are initialized to one divided by the corresponding element of the arithmetic sequence. + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector64 CreateHarmonicSequence(T start, T step) => Vector64.One / CreateSequence(start, step); + + /// Creates a new instance whose elements are the square root of an arithmetic sequence. + /// The type of the elements in the vector. + /// The value that element 0 of the arithmetic sequence will be initialized to. + /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. + /// A new instance whose elements are initialized to the square root of the corresponding element of the arithmetic sequence. + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector64 CreateCauchySequence(T start, T step) => Sqrt(CreateSequence(start, step)); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector64 ConcatLowerLower(Vector64 left, Vector64 right) => ConcatHalves(left, right, leftUpper: false, rightUpper: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector64 ConcatUpperLower(Vector64 left, Vector64 right) => ConcatHalves(left, right, leftUpper: true, rightUpper: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector64 ConcatUpperUpper(Vector64 left, Vector64 right) => ConcatHalves(left, right, leftUpper: true, rightUpper: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector64 ConcatLowerUpper(Vector64 left, Vector64 right) => ConcatHalves(left, right, leftUpper: false, rightUpper: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector64 ZipLower(Vector64 left, Vector64 right) => Zip(left, right, upper: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector64 ZipUpper(Vector64 left, Vector64 right) => Zip(left, right, upper: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (Vector64 Lower, Vector64 Upper) Zip(Vector64 left, Vector64 right) => (ZipLower(left, right), ZipUpper(left, right)); + + private static Vector64 Zip(Vector64 left, Vector64 right, bool upper) + { + int count = Vector64.Count; + int lowerCount = (count + 1) / 2; + int start = upper ? count - lowerCount : 0; + + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < count; index++) + { + int elementIndex = start + (index / 2); + T value = ((index & 1) == 0) + ? left.GetElementUnsafe(elementIndex) + : right.GetElementUnsafe(elementIndex); + + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector64 UnzipEven(Vector64 left, Vector64 right) => Unzip(left, right, odd: false); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector64 UnzipOdd(Vector64 left, Vector64 right) => Unzip(left, right, odd: true); + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (Vector64 Even, Vector64 Odd) Unzip(Vector64 left, Vector64 right) => (UnzipEven(left, right), UnzipOdd(left, right)); + + private static Vector64 Unzip(Vector64 left, Vector64 right, bool odd) + { + int count = Vector64.Count; + int start = odd ? 1 : 0; + int lowerCount = (count - start + 1) / 2; + + if (lowerCount == 0) + { + return Vector64.Zero; + } + + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < count; index++) + { + T value = (index < lowerCount) + ? left.GetElementUnsafe(start + (index * 2)) + : right.GetElementUnsafe(start + ((index - lowerCount) * 2)); + + result.SetElementUnsafe(index, value); + } + + return result; + } + + /// + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector64 Reverse(Vector64 vector) + { + int count = Vector64.Count; + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < count; index++) + { + result.SetElementUnsafe(index, vector.GetElementUnsafe(count - 1 - index)); + } + + return result; + } + + private static Vector64 ConcatHalves(Vector64 left, Vector64 right, bool leftUpper, bool rightUpper) + { + int count = Vector64.Count; + int lowerCount = (count + 1) / 2; + int leftStart = leftUpper ? count - lowerCount : 0; + int rightStart = rightUpper ? count - lowerCount : 0; + + Unsafe.SkipInit(out Vector64 result); + + for (int index = 0; index < count; index++) + { + T value = (index < lowerCount) + ? left.GetElementUnsafe(leftStart + index) + : right.GetElementUnsafe(rightStart + index - lowerCount); + + result.SetElementUnsafe(index, value); + } + + return result; + } + internal static Vector64 DegreesToRadians(Vector64 degrees) where T : ITrigonometricFunctions { diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs index df7a7dafddd0f2..5818dce386f591 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs @@ -73,6 +73,15 @@ public static Vector64 Indices } } + /// Gets a new with elements that alternate between one and negative one, starting with one. + /// The type of the vector () is not supported. + public static Vector64 SignSequence + { + [Intrinsic] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => Vector64.CreateAlternatingSequence(Scalar.One, Scalar.Subtract(default!, Scalar.One)); + } + /// Gets true if is supported; otherwise, false. /// true if is supported; otherwise, false. public static bool IsSupported diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index de7788351ca6da..38df5477bcd087 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4,8 +4,6 @@ // Changes to this file must follow the https://aka.ms/api-review process. // ------------------------------------------------------------------------------ -using System.Diagnostics.CodeAnalysis; - namespace System.Runtime.Intrinsics { public static partial class Vector128 @@ -171,6 +169,21 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector128 vector, public static System.Runtime.Intrinsics.Vector128 CreateScalarUnsafe(ulong value) { throw null; } public static System.Runtime.Intrinsics.Vector128 CreateScalarUnsafe(T value) { throw null; } public static System.Runtime.Intrinsics.Vector128 CreateSequence(T start, T step) { throw null; } + public static System.Runtime.Intrinsics.Vector128 CreateGeometricSequence(T initial, [System.Diagnostics.CodeAnalysis.ConstantExpected] T multiplier) { throw null; } + public static System.Runtime.Intrinsics.Vector128 CreateAlternatingSequence(T even, T odd) { throw null; } + public static System.Runtime.Intrinsics.Vector128 CreateHarmonicSequence(T start, T step) { throw null; } + public static System.Runtime.Intrinsics.Vector128 CreateCauchySequence(T start, T step) { throw null; } + public static System.Runtime.Intrinsics.Vector128 ZipLower(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 ZipUpper(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static (System.Runtime.Intrinsics.Vector128 Lower, System.Runtime.Intrinsics.Vector128 Upper) Zip(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 UnzipEven(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 UnzipOdd(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static (System.Runtime.Intrinsics.Vector128 Even, System.Runtime.Intrinsics.Vector128 Odd) Unzip(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 ConcatLowerLower(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 ConcatUpperLower(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 ConcatUpperUpper(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 ConcatLowerUpper(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } + public static System.Runtime.Intrinsics.Vector128 Reverse(System.Runtime.Intrinsics.Vector128 vector) { throw null; } public static System.Runtime.Intrinsics.Vector128 Create(System.ReadOnlySpan values) { throw null; } public static System.Runtime.Intrinsics.Vector128 Create(System.Runtime.Intrinsics.Vector64 value) { throw null; } public static System.Runtime.Intrinsics.Vector128 Create(System.Runtime.Intrinsics.Vector64 lower, System.Runtime.Intrinsics.Vector64 upper) { throw null; } @@ -444,6 +457,7 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector128 vector, public static System.Runtime.Intrinsics.Vector128 AllBitsSet { get { throw null; } } public static int Count { get { throw null; } } public static System.Runtime.Intrinsics.Vector128 Indices { get { throw null; } } + public static System.Runtime.Intrinsics.Vector128 SignSequence { get { throw null; } } public static bool IsSupported { get { throw null; } } public static System.Runtime.Intrinsics.Vector128 One { get { throw null; } } public static System.Runtime.Intrinsics.Vector128 Zero { get { throw null; } } @@ -622,6 +636,21 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector256 vector, public static System.Runtime.Intrinsics.Vector256 CreateScalarUnsafe(ulong value) { throw null; } public static System.Runtime.Intrinsics.Vector256 CreateScalarUnsafe(T value) { throw null; } public static System.Runtime.Intrinsics.Vector256 CreateSequence(T start, T step) { throw null; } + public static System.Runtime.Intrinsics.Vector256 CreateGeometricSequence(T initial, [System.Diagnostics.CodeAnalysis.ConstantExpected] T multiplier) { throw null; } + public static System.Runtime.Intrinsics.Vector256 CreateAlternatingSequence(T even, T odd) { throw null; } + public static System.Runtime.Intrinsics.Vector256 CreateHarmonicSequence(T start, T step) { throw null; } + public static System.Runtime.Intrinsics.Vector256 CreateCauchySequence(T start, T step) { throw null; } + public static System.Runtime.Intrinsics.Vector256 ZipLower(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 ZipUpper(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static (System.Runtime.Intrinsics.Vector256 Lower, System.Runtime.Intrinsics.Vector256 Upper) Zip(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 UnzipEven(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 UnzipOdd(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static (System.Runtime.Intrinsics.Vector256 Even, System.Runtime.Intrinsics.Vector256 Odd) Unzip(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 ConcatLowerLower(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 ConcatUpperLower(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 ConcatUpperUpper(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 ConcatLowerUpper(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } + public static System.Runtime.Intrinsics.Vector256 Reverse(System.Runtime.Intrinsics.Vector256 vector) { throw null; } public static System.Runtime.Intrinsics.Vector256 Create(System.ReadOnlySpan values) { throw null; } public static System.Runtime.Intrinsics.Vector256 Create(System.Runtime.Intrinsics.Vector64 value) { throw null; } public static System.Runtime.Intrinsics.Vector256 Create(System.Runtime.Intrinsics.Vector128 value) { throw null; } @@ -896,6 +925,7 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector256 vector, public static System.Runtime.Intrinsics.Vector256 AllBitsSet { get { throw null; } } public static int Count { get { throw null; } } public static System.Runtime.Intrinsics.Vector256 Indices { get { throw null; } } + public static System.Runtime.Intrinsics.Vector256 SignSequence { get { throw null; } } public static bool IsSupported { get { throw null; } } public static System.Runtime.Intrinsics.Vector256 One { get { throw null; } } public static System.Runtime.Intrinsics.Vector256 Zero { get { throw null; } } @@ -1074,6 +1104,21 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector512 vector, public static System.Runtime.Intrinsics.Vector512 CreateScalarUnsafe(ulong value) { throw null; } public static System.Runtime.Intrinsics.Vector512 CreateScalarUnsafe(T value) { throw null; } public static System.Runtime.Intrinsics.Vector512 CreateSequence(T start, T step) { throw null; } + public static System.Runtime.Intrinsics.Vector512 CreateGeometricSequence(T initial, [System.Diagnostics.CodeAnalysis.ConstantExpected] T multiplier) { throw null; } + public static System.Runtime.Intrinsics.Vector512 CreateAlternatingSequence(T even, T odd) { throw null; } + public static System.Runtime.Intrinsics.Vector512 CreateHarmonicSequence(T start, T step) { throw null; } + public static System.Runtime.Intrinsics.Vector512 CreateCauchySequence(T start, T step) { throw null; } + public static System.Runtime.Intrinsics.Vector512 ZipLower(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 ZipUpper(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static (System.Runtime.Intrinsics.Vector512 Lower, System.Runtime.Intrinsics.Vector512 Upper) Zip(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 UnzipEven(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 UnzipOdd(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static (System.Runtime.Intrinsics.Vector512 Even, System.Runtime.Intrinsics.Vector512 Odd) Unzip(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 ConcatLowerLower(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 ConcatUpperLower(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 ConcatUpperUpper(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 ConcatLowerUpper(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right) { throw null; } + public static System.Runtime.Intrinsics.Vector512 Reverse(System.Runtime.Intrinsics.Vector512 vector) { throw null; } public static System.Runtime.Intrinsics.Vector512 Create(System.ReadOnlySpan values) { throw null; } public static System.Runtime.Intrinsics.Vector512 Create(System.Runtime.Intrinsics.Vector64 value) { throw null; } public static System.Runtime.Intrinsics.Vector512 Create(System.Runtime.Intrinsics.Vector128 value) { throw null; } @@ -1347,6 +1392,7 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector512 vector, public static System.Runtime.Intrinsics.Vector512 AllBitsSet { get { throw null; } } public static int Count { get { throw null; } } public static System.Runtime.Intrinsics.Vector512 Indices { get { throw null; } } + public static System.Runtime.Intrinsics.Vector512 SignSequence { get { throw null; } } public static bool IsSupported { get { throw null; } } public static System.Runtime.Intrinsics.Vector512 One { get { throw null; } } public static System.Runtime.Intrinsics.Vector512 Zero { get { throw null; } } @@ -1502,6 +1548,21 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector64 vector, public static System.Runtime.Intrinsics.Vector64 CreateScalarUnsafe(ulong value) { throw null; } public static System.Runtime.Intrinsics.Vector64 CreateScalarUnsafe(T value) { throw null; } public static System.Runtime.Intrinsics.Vector64 CreateSequence(T start, T step) { throw null; } + public static System.Runtime.Intrinsics.Vector64 CreateGeometricSequence(T initial, [System.Diagnostics.CodeAnalysis.ConstantExpected] T multiplier) { throw null; } + public static System.Runtime.Intrinsics.Vector64 CreateAlternatingSequence(T even, T odd) { throw null; } + public static System.Runtime.Intrinsics.Vector64 CreateHarmonicSequence(T start, T step) { throw null; } + public static System.Runtime.Intrinsics.Vector64 CreateCauchySequence(T start, T step) { throw null; } + public static System.Runtime.Intrinsics.Vector64 ZipLower(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } + public static System.Runtime.Intrinsics.Vector64 ZipUpper(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } + public static (System.Runtime.Intrinsics.Vector64 Lower, System.Runtime.Intrinsics.Vector64 Upper) Zip(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } + public static System.Runtime.Intrinsics.Vector64 UnzipEven(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } + public static System.Runtime.Intrinsics.Vector64 UnzipOdd(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } + public static (System.Runtime.Intrinsics.Vector64 Even, System.Runtime.Intrinsics.Vector64 Odd) Unzip(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } + public static System.Runtime.Intrinsics.Vector64 ConcatLowerLower(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } + public static System.Runtime.Intrinsics.Vector64 ConcatUpperLower(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } + public static System.Runtime.Intrinsics.Vector64 ConcatUpperUpper(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } + public static System.Runtime.Intrinsics.Vector64 ConcatLowerUpper(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } + public static System.Runtime.Intrinsics.Vector64 Reverse(System.Runtime.Intrinsics.Vector64 vector) { throw null; } public static System.Runtime.Intrinsics.Vector64 Create(System.ReadOnlySpan values) { throw null; } public static System.Runtime.Intrinsics.Vector64 Create(T value) { throw null; } public static System.Runtime.Intrinsics.Vector64 Create(T[] values) { throw null; } @@ -1761,6 +1822,7 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector64 vector, public static System.Runtime.Intrinsics.Vector64 AllBitsSet { get { throw null; } } public static int Count { get { throw null; } } public static System.Runtime.Intrinsics.Vector64 Indices { get { throw null; } } + public static System.Runtime.Intrinsics.Vector64 SignSequence { get { throw null; } } public static bool IsSupported { get { throw null; } } public static System.Runtime.Intrinsics.Vector64 One { get { throw null; } } public static System.Runtime.Intrinsics.Vector64 Zero { get { throw null; } } From b8f8dc8ec9342a5b210b95a0498a1c809cc4e611 Mon Sep 17 00:00:00 2001 From: Steven He Date: Sun, 3 May 2026 19:07:36 +0900 Subject: [PATCH 02/24] tests --- .../tests/GenericVectorTests.cs | 113 ++++++++++++++++++ .../tests/Vectors/Vector128Tests.cs | 113 ++++++++++++++++++ .../tests/Vectors/Vector256Tests.cs | 113 ++++++++++++++++++ .../tests/Vectors/Vector512Tests.cs | 113 ++++++++++++++++++ .../tests/Vectors/Vector64Tests.cs | 113 ++++++++++++++++++ 5 files changed, 565 insertions(+) diff --git a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs index f18f9ff9278419..acc586a3ccaadc 100644 --- a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs +++ b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs @@ -4531,6 +4531,119 @@ private static void TestCreateSequence(T start, T step) } } + [Fact] + public void CreateGeometricSequenceInt32Test() + { + Vector sequence = Vector.CreateGeometricSequence(1, 2); + int expected = 1; + + for (int index = 0; index < Vector.Count; index++) + { + Assert.Equal(expected, sequence.GetElement(index)); + expected *= 2; + } + } + + [Fact] + public void CreateAlternatingSequenceInt32Test() + { + Vector sequence = Vector.CreateAlternatingSequence(5, -5); + + for (int index = 0; index < Vector.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 5 : -5, sequence.GetElement(index)); + } + } + + [Fact] + public void CreateHarmonicSequenceDoubleTest() + { + Vector sequence = Vector.CreateHarmonicSequence(1.0, 1.0); + double expected = 1.0; + + for (int index = 0; index < Vector.Count; index++) + { + Assert.Equal(1.0 / expected, sequence.GetElement(index)); + expected += 1.0; + } + } + + [Fact] + public void CreateCauchySequenceDoubleTest() + { + Vector sequence = Vector.CreateCauchySequence(1.0, 1.0); + double expected = 1.0; + + for (int index = 0; index < Vector.Count; index++) + { + Assert.Equal(Math.Sqrt(expected), sequence.GetElement(index)); + expected += 1.0; + } + } + + [Fact] + public void SignSequenceInt32Test() + { + Vector sequence = Vector.SignSequence; + + for (int index = 0; index < Vector.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1 : -1, sequence.GetElement(index)); + } + } + + [Fact] + public void LaneOperationsInt32Test() + { + Vector left = Vector.CreateSequence(0, 1); + Vector right = Vector.CreateSequence(100, 1); + int count = Vector.Count; + int lowerCount = (count + 1) / 2; + int upperStart = count - lowerCount; + + AssertVectorEqual(CreateVector(index => ((index & 1) == 0) ? left.GetElement(index / 2) : right.GetElement(index / 2)), Vector.ZipLower(left, right)); + AssertVectorEqual(CreateVector(index => ((index & 1) == 0) ? left.GetElement(upperStart + (index / 2)) : right.GetElement(upperStart + (index / 2))), Vector.ZipUpper(left, right)); + + (Vector lower, Vector upper) = Vector.Zip(left, right); + AssertVectorEqual(Vector.ZipLower(left, right), lower); + AssertVectorEqual(Vector.ZipUpper(left, right), upper); + + AssertVectorEqual(left, Vector.UnzipEven(lower, upper)); + AssertVectorEqual(right, Vector.UnzipOdd(lower, upper)); + + (Vector even, Vector odd) = Vector.Unzip(lower, upper); + AssertVectorEqual(left, even); + AssertVectorEqual(right, odd); + + AssertVectorEqual(CreateVector(index => (index < lowerCount) ? left.GetElement(index) : right.GetElement(index - lowerCount)), Vector.ConcatLowerLower(left, right)); + AssertVectorEqual(CreateVector(index => (index < lowerCount) ? left.GetElement(upperStart + index) : right.GetElement(index - lowerCount)), Vector.ConcatUpperLower(left, right)); + AssertVectorEqual(CreateVector(index => (index < lowerCount) ? left.GetElement(upperStart + index) : right.GetElement(upperStart + index - lowerCount)), Vector.ConcatUpperUpper(left, right)); + AssertVectorEqual(CreateVector(index => (index < lowerCount) ? left.GetElement(index) : right.GetElement(upperStart + index - lowerCount)), Vector.ConcatLowerUpper(left, right)); + + AssertVectorEqual(CreateVector(index => left.GetElement(count - 1 - index)), Vector.Reverse(left)); + } + + private static Vector CreateVector(Func elementSelector) + { + int[] values = new int[Vector.Count]; + + for (int index = 0; index < values.Length; index++) + { + values[index] = elementSelector(index); + } + + return new Vector(values); + } + + private static void AssertVectorEqual(Vector expected, Vector actual) + where T : struct + { + for (int index = 0; index < Vector.Count; index++) + { + Assert.Equal(expected.GetElement(index), actual.GetElement(index)); + } + } + [Theory] [MemberData(nameof(GenericMathTestMemberData.CosDouble), MemberType = typeof(GenericMathTestMemberData))] public void CosDoubleTest(double value, double expectedResult, double variance) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs index 6246c1231c32d2..599e74a3463f9e 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs @@ -5305,6 +5305,119 @@ private static void TestCreateSequence(T start, T step) } } + [Fact] + public void CreateGeometricSequenceInt32Test() + { + Vector128 sequence = Vector128.CreateGeometricSequence(1, 2); + int expected = 1; + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal(expected, sequence.GetElement(index)); + expected *= 2; + } + } + + [Fact] + public void CreateAlternatingSequenceInt32Test() + { + Vector128 sequence = Vector128.CreateAlternatingSequence(5, -5); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 5 : -5, sequence.GetElement(index)); + } + } + + [Fact] + public void CreateHarmonicSequenceDoubleTest() + { + Vector128 sequence = Vector128.CreateHarmonicSequence(1.0, 1.0); + double expected = 1.0; + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal(1.0 / expected, sequence.GetElement(index)); + expected += 1.0; + } + } + + [Fact] + public void CreateCauchySequenceDoubleTest() + { + Vector128 sequence = Vector128.CreateCauchySequence(1.0, 1.0); + double expected = 1.0; + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal(Math.Sqrt(expected), sequence.GetElement(index)); + expected += 1.0; + } + } + + [Fact] + public void SignSequenceInt32Test() + { + Vector128 sequence = Vector128.SignSequence; + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1 : -1, sequence.GetElement(index)); + } + } + + [Fact] + public void LaneOperationsInt32Test() + { + Vector128 left = Vector128.CreateSequence(0, 1); + Vector128 right = Vector128.CreateSequence(100, 1); + int count = Vector128.Count; + int lowerCount = (count + 1) / 2; + int upperStart = count - lowerCount; + + AssertVectorEqual(CreateVector128(index => ((index & 1) == 0) ? left.GetElement(index / 2) : right.GetElement(index / 2)), Vector128.ZipLower(left, right)); + AssertVectorEqual(CreateVector128(index => ((index & 1) == 0) ? left.GetElement(upperStart + (index / 2)) : right.GetElement(upperStart + (index / 2))), Vector128.ZipUpper(left, right)); + + (Vector128 lower, Vector128 upper) = Vector128.Zip(left, right); + AssertVectorEqual(Vector128.ZipLower(left, right), lower); + AssertVectorEqual(Vector128.ZipUpper(left, right), upper); + + AssertVectorEqual(left, Vector128.UnzipEven(lower, upper)); + AssertVectorEqual(right, Vector128.UnzipOdd(lower, upper)); + + (Vector128 even, Vector128 odd) = Vector128.Unzip(lower, upper); + AssertVectorEqual(left, even); + AssertVectorEqual(right, odd); + + AssertVectorEqual(CreateVector128(index => (index < lowerCount) ? left.GetElement(index) : right.GetElement(index - lowerCount)), Vector128.ConcatLowerLower(left, right)); + AssertVectorEqual(CreateVector128(index => (index < lowerCount) ? left.GetElement(upperStart + index) : right.GetElement(index - lowerCount)), Vector128.ConcatUpperLower(left, right)); + AssertVectorEqual(CreateVector128(index => (index < lowerCount) ? left.GetElement(upperStart + index) : right.GetElement(upperStart + index - lowerCount)), Vector128.ConcatUpperUpper(left, right)); + AssertVectorEqual(CreateVector128(index => (index < lowerCount) ? left.GetElement(index) : right.GetElement(upperStart + index - lowerCount)), Vector128.ConcatLowerUpper(left, right)); + + AssertVectorEqual(CreateVector128(index => left.GetElement(count - 1 - index)), Vector128.Reverse(left)); + } + + private static Vector128 CreateVector128(Func elementSelector) + { + int[] values = new int[Vector128.Count]; + + for (int index = 0; index < values.Length; index++) + { + values[index] = elementSelector(index); + } + + return Vector128.Create(values); + } + + private static void AssertVectorEqual(Vector128 expected, Vector128 actual) + where T : struct + { + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal(expected.GetElement(index), actual.GetElement(index)); + } + } + [Theory] [MemberData(nameof(GenericMathTestMemberData.AsinDouble), MemberType = typeof(GenericMathTestMemberData))] public void AsinDoubleTest(double value, double expectedResult, double variance) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs index 728a87900f314a..9656040071ae69 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs @@ -6481,6 +6481,119 @@ private static void TestCreateSequence(T start, T step) } } + [Fact] + public void CreateGeometricSequenceInt32Test() + { + Vector256 sequence = Vector256.CreateGeometricSequence(1, 2); + int expected = 1; + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal(expected, sequence.GetElement(index)); + expected *= 2; + } + } + + [Fact] + public void CreateAlternatingSequenceInt32Test() + { + Vector256 sequence = Vector256.CreateAlternatingSequence(5, -5); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 5 : -5, sequence.GetElement(index)); + } + } + + [Fact] + public void CreateHarmonicSequenceDoubleTest() + { + Vector256 sequence = Vector256.CreateHarmonicSequence(1.0, 1.0); + double expected = 1.0; + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal(1.0 / expected, sequence.GetElement(index)); + expected += 1.0; + } + } + + [Fact] + public void CreateCauchySequenceDoubleTest() + { + Vector256 sequence = Vector256.CreateCauchySequence(1.0, 1.0); + double expected = 1.0; + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal(Math.Sqrt(expected), sequence.GetElement(index)); + expected += 1.0; + } + } + + [Fact] + public void SignSequenceInt32Test() + { + Vector256 sequence = Vector256.SignSequence; + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1 : -1, sequence.GetElement(index)); + } + } + + [Fact] + public void LaneOperationsInt32Test() + { + Vector256 left = Vector256.CreateSequence(0, 1); + Vector256 right = Vector256.CreateSequence(100, 1); + int count = Vector256.Count; + int lowerCount = (count + 1) / 2; + int upperStart = count - lowerCount; + + AssertVectorEqual(CreateVector256(index => ((index & 1) == 0) ? left.GetElement(index / 2) : right.GetElement(index / 2)), Vector256.ZipLower(left, right)); + AssertVectorEqual(CreateVector256(index => ((index & 1) == 0) ? left.GetElement(upperStart + (index / 2)) : right.GetElement(upperStart + (index / 2))), Vector256.ZipUpper(left, right)); + + (Vector256 lower, Vector256 upper) = Vector256.Zip(left, right); + AssertVectorEqual(Vector256.ZipLower(left, right), lower); + AssertVectorEqual(Vector256.ZipUpper(left, right), upper); + + AssertVectorEqual(left, Vector256.UnzipEven(lower, upper)); + AssertVectorEqual(right, Vector256.UnzipOdd(lower, upper)); + + (Vector256 even, Vector256 odd) = Vector256.Unzip(lower, upper); + AssertVectorEqual(left, even); + AssertVectorEqual(right, odd); + + AssertVectorEqual(CreateVector256(index => (index < lowerCount) ? left.GetElement(index) : right.GetElement(index - lowerCount)), Vector256.ConcatLowerLower(left, right)); + AssertVectorEqual(CreateVector256(index => (index < lowerCount) ? left.GetElement(upperStart + index) : right.GetElement(index - lowerCount)), Vector256.ConcatUpperLower(left, right)); + AssertVectorEqual(CreateVector256(index => (index < lowerCount) ? left.GetElement(upperStart + index) : right.GetElement(upperStart + index - lowerCount)), Vector256.ConcatUpperUpper(left, right)); + AssertVectorEqual(CreateVector256(index => (index < lowerCount) ? left.GetElement(index) : right.GetElement(upperStart + index - lowerCount)), Vector256.ConcatLowerUpper(left, right)); + + AssertVectorEqual(CreateVector256(index => left.GetElement(count - 1 - index)), Vector256.Reverse(left)); + } + + private static Vector256 CreateVector256(Func elementSelector) + { + int[] values = new int[Vector256.Count]; + + for (int index = 0; index < values.Length; index++) + { + values[index] = elementSelector(index); + } + + return Vector256.Create(values); + } + + private static void AssertVectorEqual(Vector256 expected, Vector256 actual) + where T : struct + { + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal(expected.GetElement(index), actual.GetElement(index)); + } + } + [Theory] [MemberData(nameof(GenericMathTestMemberData.AsinDouble), MemberType = typeof(GenericMathTestMemberData))] public void AsinDoubleTest(double value, double expectedResult, double variance) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs index d3c430020b5225..73f5529810e721 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs @@ -6264,6 +6264,119 @@ private static void TestCreateSequence(T start, T step) } } + [Fact] + public void CreateGeometricSequenceInt32Test() + { + Vector512 sequence = Vector512.CreateGeometricSequence(1, 2); + int expected = 1; + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal(expected, sequence.GetElement(index)); + expected *= 2; + } + } + + [Fact] + public void CreateAlternatingSequenceInt32Test() + { + Vector512 sequence = Vector512.CreateAlternatingSequence(5, -5); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 5 : -5, sequence.GetElement(index)); + } + } + + [Fact] + public void CreateHarmonicSequenceDoubleTest() + { + Vector512 sequence = Vector512.CreateHarmonicSequence(1.0, 1.0); + double expected = 1.0; + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal(1.0 / expected, sequence.GetElement(index)); + expected += 1.0; + } + } + + [Fact] + public void CreateCauchySequenceDoubleTest() + { + Vector512 sequence = Vector512.CreateCauchySequence(1.0, 1.0); + double expected = 1.0; + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal(Math.Sqrt(expected), sequence.GetElement(index)); + expected += 1.0; + } + } + + [Fact] + public void SignSequenceInt32Test() + { + Vector512 sequence = Vector512.SignSequence; + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1 : -1, sequence.GetElement(index)); + } + } + + [Fact] + public void LaneOperationsInt32Test() + { + Vector512 left = Vector512.CreateSequence(0, 1); + Vector512 right = Vector512.CreateSequence(100, 1); + int count = Vector512.Count; + int lowerCount = (count + 1) / 2; + int upperStart = count - lowerCount; + + AssertVectorEqual(CreateVector512(index => ((index & 1) == 0) ? left.GetElement(index / 2) : right.GetElement(index / 2)), Vector512.ZipLower(left, right)); + AssertVectorEqual(CreateVector512(index => ((index & 1) == 0) ? left.GetElement(upperStart + (index / 2)) : right.GetElement(upperStart + (index / 2))), Vector512.ZipUpper(left, right)); + + (Vector512 lower, Vector512 upper) = Vector512.Zip(left, right); + AssertVectorEqual(Vector512.ZipLower(left, right), lower); + AssertVectorEqual(Vector512.ZipUpper(left, right), upper); + + AssertVectorEqual(left, Vector512.UnzipEven(lower, upper)); + AssertVectorEqual(right, Vector512.UnzipOdd(lower, upper)); + + (Vector512 even, Vector512 odd) = Vector512.Unzip(lower, upper); + AssertVectorEqual(left, even); + AssertVectorEqual(right, odd); + + AssertVectorEqual(CreateVector512(index => (index < lowerCount) ? left.GetElement(index) : right.GetElement(index - lowerCount)), Vector512.ConcatLowerLower(left, right)); + AssertVectorEqual(CreateVector512(index => (index < lowerCount) ? left.GetElement(upperStart + index) : right.GetElement(index - lowerCount)), Vector512.ConcatUpperLower(left, right)); + AssertVectorEqual(CreateVector512(index => (index < lowerCount) ? left.GetElement(upperStart + index) : right.GetElement(upperStart + index - lowerCount)), Vector512.ConcatUpperUpper(left, right)); + AssertVectorEqual(CreateVector512(index => (index < lowerCount) ? left.GetElement(index) : right.GetElement(upperStart + index - lowerCount)), Vector512.ConcatLowerUpper(left, right)); + + AssertVectorEqual(CreateVector512(index => left.GetElement(count - 1 - index)), Vector512.Reverse(left)); + } + + private static Vector512 CreateVector512(Func elementSelector) + { + int[] values = new int[Vector512.Count]; + + for (int index = 0; index < values.Length; index++) + { + values[index] = elementSelector(index); + } + + return Vector512.Create(values); + } + + private static void AssertVectorEqual(Vector512 expected, Vector512 actual) + where T : struct + { + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal(expected.GetElement(index), actual.GetElement(index)); + } + } + [Theory] [MemberData(nameof(GenericMathTestMemberData.AsinDouble), MemberType = typeof(GenericMathTestMemberData))] public void AsinDoubleTest(double value, double expectedResult, double variance) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs index be91c3325549fb..fa332d75809c4e 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs @@ -4579,6 +4579,119 @@ private static void TestCreateSequence(T start, T step) } } + [Fact] + public void CreateGeometricSequenceInt32Test() + { + Vector64 sequence = Vector64.CreateGeometricSequence(1, 2); + int expected = 1; + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal(expected, sequence.GetElement(index)); + expected *= 2; + } + } + + [Fact] + public void CreateAlternatingSequenceInt32Test() + { + Vector64 sequence = Vector64.CreateAlternatingSequence(5, -5); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 5 : -5, sequence.GetElement(index)); + } + } + + [Fact] + public void CreateHarmonicSequenceDoubleTest() + { + Vector64 sequence = Vector64.CreateHarmonicSequence(1.0, 1.0); + double expected = 1.0; + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal(1.0 / expected, sequence.GetElement(index)); + expected += 1.0; + } + } + + [Fact] + public void CreateCauchySequenceDoubleTest() + { + Vector64 sequence = Vector64.CreateCauchySequence(1.0, 1.0); + double expected = 1.0; + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal(Math.Sqrt(expected), sequence.GetElement(index)); + expected += 1.0; + } + } + + [Fact] + public void SignSequenceInt32Test() + { + Vector64 sequence = Vector64.SignSequence; + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1 : -1, sequence.GetElement(index)); + } + } + + [Fact] + public void LaneOperationsInt32Test() + { + Vector64 left = Vector64.CreateSequence(0, 1); + Vector64 right = Vector64.CreateSequence(100, 1); + int count = Vector64.Count; + int lowerCount = (count + 1) / 2; + int upperStart = count - lowerCount; + + AssertVectorEqual(CreateVector64(index => ((index & 1) == 0) ? left.GetElement(index / 2) : right.GetElement(index / 2)), Vector64.ZipLower(left, right)); + AssertVectorEqual(CreateVector64(index => ((index & 1) == 0) ? left.GetElement(upperStart + (index / 2)) : right.GetElement(upperStart + (index / 2))), Vector64.ZipUpper(left, right)); + + (Vector64 lower, Vector64 upper) = Vector64.Zip(left, right); + AssertVectorEqual(Vector64.ZipLower(left, right), lower); + AssertVectorEqual(Vector64.ZipUpper(left, right), upper); + + AssertVectorEqual(left, Vector64.UnzipEven(lower, upper)); + AssertVectorEqual(right, Vector64.UnzipOdd(lower, upper)); + + (Vector64 even, Vector64 odd) = Vector64.Unzip(lower, upper); + AssertVectorEqual(left, even); + AssertVectorEqual(right, odd); + + AssertVectorEqual(CreateVector64(index => (index < lowerCount) ? left.GetElement(index) : right.GetElement(index - lowerCount)), Vector64.ConcatLowerLower(left, right)); + AssertVectorEqual(CreateVector64(index => (index < lowerCount) ? left.GetElement(upperStart + index) : right.GetElement(index - lowerCount)), Vector64.ConcatUpperLower(left, right)); + AssertVectorEqual(CreateVector64(index => (index < lowerCount) ? left.GetElement(upperStart + index) : right.GetElement(upperStart + index - lowerCount)), Vector64.ConcatUpperUpper(left, right)); + AssertVectorEqual(CreateVector64(index => (index < lowerCount) ? left.GetElement(index) : right.GetElement(upperStart + index - lowerCount)), Vector64.ConcatLowerUpper(left, right)); + + AssertVectorEqual(CreateVector64(index => left.GetElement(count - 1 - index)), Vector64.Reverse(left)); + } + + private static Vector64 CreateVector64(Func elementSelector) + { + int[] values = new int[Vector64.Count]; + + for (int index = 0; index < values.Length; index++) + { + values[index] = elementSelector(index); + } + + return Vector64.Create(values); + } + + private static void AssertVectorEqual(Vector64 expected, Vector64 actual) + where T : struct + { + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal(expected.GetElement(index), actual.GetElement(index)); + } + } + [Theory] [MemberData(nameof(GenericMathTestMemberData.AsinDouble), MemberType = typeof(GenericMathTestMemberData))] public void AsinDoubleTest(double value, double expectedResult, double variance) From 71b11d4ff553be4478d1bb555fb63fcfb464ef0a Mon Sep 17 00:00:00 2001 From: Steven He Date: Sun, 3 May 2026 19:50:25 +0900 Subject: [PATCH 03/24] Nit --- .../System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 38df5477bcd087..3bdc8526df2a43 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4,6 +4,8 @@ // Changes to this file must follow the https://aka.ms/api-review process. // ------------------------------------------------------------------------------ +using System.Diagnostics.CodeAnalysis; + namespace System.Runtime.Intrinsics { public static partial class Vector128 From ce55e52dcdcd52c927fde3320a8e7d1b092bc1aa Mon Sep 17 00:00:00 2001 From: Steven He Date: Sun, 3 May 2026 20:25:22 +0900 Subject: [PATCH 04/24] Implement new SIMD nodes --- src/coreclr/jit/compiler.h | 32 ++ src/coreclr/jit/gentree.cpp | 702 ++++++++++++++++++++++++++++++++++++ 2 files changed, 734 insertions(+) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 74ebf2548df805..f7d7a36ba23c94 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3422,6 +3422,12 @@ class Compiler GenTree* gtNewSimdCreateSequenceNode( var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize); + GenTree* gtNewSimdCreateGeometricSequenceNode( + var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize); + + GenTree* gtNewSimdCreateAlternatingSequenceNode( + var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize); + GenTree* gtNewSimdDotProdNode(var_types type, GenTree* op1, GenTree* op2, @@ -3446,6 +3452,8 @@ class Compiler GenTree* gtNewSimdGetIndicesNode(var_types type, var_types simdBaseType, unsigned simdSize); + GenTree* gtNewSimdGetSignSequenceNode(var_types type, var_types simdBaseType, unsigned simdSize); + GenTree* gtNewSimdGetLowerNode(var_types type, GenTree* op1, var_types simdBaseType, @@ -3552,6 +3560,30 @@ class Compiler var_types simdBaseType, unsigned simdSize); + GenTree* gtNewSimdConcatNode(var_types type, + GenTree* op1, + GenTree* op2, + var_types simdBaseType, + unsigned simdSize, + bool leftUpper, + bool rightUpper); + + GenTree* gtNewSimdZipNode(var_types type, + GenTree* op1, + GenTree* op2, + var_types simdBaseType, + unsigned simdSize, + bool upper); + + GenTree* gtNewSimdUnzipNode(var_types type, + GenTree* op1, + GenTree* op2, + var_types simdBaseType, + unsigned simdSize, + bool odd); + + GenTree* gtNewSimdReverseNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize); + GenTree* gtNewSimdRoundNode( var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize); diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 1b846460e89c8b..37fadd62e1b563 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -24520,6 +24520,346 @@ GenTree* Compiler::gtNewSimdCreateSequenceNode( return result; } +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdCreateGeometricSequenceNode: Creates a new simd CreateGeometricSequence node +// +// Arguments: +// type - The return type of SIMD node being created +// op1 - The initial value +// op2 - The multiplier value +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// +// Returns: +// The created CreateGeometricSequence node +// +GenTree* Compiler::gtNewSimdCreateGeometricSequenceNode( + var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + assert(varTypeIsArithmetic(simdBaseType)); + assert(op2->OperIsConst()); + + GenTreeVecCon* vecCon = gtNewVconNode(type); + uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); + bool isPartial = !op1->OperIsConst(); + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + uint64_t initial = isPartial ? 1 : static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t multiplier = static_cast(op2->AsIntConCommon()->IntegralValue()); + + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u8[index] = static_cast(initial); + initial *= multiplier; + } + break; + } + + case TYP_SHORT: + case TYP_USHORT: + { + uint64_t initial = isPartial ? 1 : static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t multiplier = static_cast(op2->AsIntConCommon()->IntegralValue()); + + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u16[index] = static_cast(initial); + initial *= multiplier; + } + break; + } + + case TYP_INT: + case TYP_UINT: + { + uint64_t initial = isPartial ? 1 : static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t multiplier = static_cast(op2->AsIntConCommon()->IntegralValue()); + + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u32[index] = static_cast(initial); + initial *= multiplier; + } + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + uint64_t initial = isPartial ? 1 : static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t multiplier = static_cast(op2->AsIntConCommon()->IntegralValue()); + + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u64[index] = initial; + initial *= multiplier; + } + break; + } + + case TYP_FLOAT: + { + float initial = isPartial ? 1.0f : static_cast(op1->AsDblCon()->DconValue()); + float multiplier = static_cast(op2->AsDblCon()->DconValue()); + + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.f32[index] = initial; + initial *= multiplier; + } + break; + } + + case TYP_DOUBLE: + { + double initial = isPartial ? 1.0 : op1->AsDblCon()->DconValue(); + double multiplier = op2->AsDblCon()->DconValue(); + + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.f64[index] = initial; + initial *= multiplier; + } + break; + } + + default: + { + unreached(); + } + } + + GenTree* result = vecCon; + + if (isPartial) + { + GenTree* initial = gtNewSimdCreateBroadcastNode(type, op1, simdBaseType, simdSize); + result = gtNewSimdBinOpNode(GT_MUL, type, result, initial, simdBaseType, simdSize); + } + + return result; +} + +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdCreateAlternatingSequenceNode: Creates a new simd CreateAlternatingSequence node +// +// Arguments: +// type - The return type of SIMD node being created +// op1 - The even-indexed value +// op2 - The odd-indexed value +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// +// Returns: +// The created CreateAlternatingSequence node +// +GenTree* Compiler::gtNewSimdCreateAlternatingSequenceNode( + var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + assert(varTypeIsArithmetic(simdBaseType)); + + uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); + + if (simdCount == 1) + { + GenTree* result = gtNewSimdCreateBroadcastNode(type, op1, simdBaseType, simdSize); + return gtWrapWithSideEffects(result, op2, GTF_ALL_EFFECT); + } + + if (op1->OperIsConst() && op2->OperIsConst()) + { + GenTreeVecCon* vecCon = gtNewVconNode(type); + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + uint64_t even = static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t odd = static_cast(op2->AsIntConCommon()->IntegralValue()); + + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u8[index] = static_cast(((index & 1) == 0) ? even : odd); + } + break; + } + + case TYP_SHORT: + case TYP_USHORT: + { + uint64_t even = static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t odd = static_cast(op2->AsIntConCommon()->IntegralValue()); + + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u16[index] = static_cast(((index & 1) == 0) ? even : odd); + } + break; + } + + case TYP_INT: + case TYP_UINT: + { + uint64_t even = static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t odd = static_cast(op2->AsIntConCommon()->IntegralValue()); + + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u32[index] = static_cast(((index & 1) == 0) ? even : odd); + } + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + uint64_t even = static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t odd = static_cast(op2->AsIntConCommon()->IntegralValue()); + + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u64[index] = ((index & 1) == 0) ? even : odd; + } + break; + } + + case TYP_FLOAT: + { + double even = op1->AsDblCon()->DconValue(); + double odd = op2->AsDblCon()->DconValue(); + + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.f32[index] = static_cast(((index & 1) == 0) ? even : odd); + } + break; + } + + case TYP_DOUBLE: + { + double even = op1->AsDblCon()->DconValue(); + double odd = op2->AsDblCon()->DconValue(); + + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.f64[index] = ((index & 1) == 0) ? even : odd; + } + break; + } + + default: + { + unreached(); + } + } + + return vecCon; + } + + GenTree* even = gtNewSimdCreateBroadcastNode(type, op1, simdBaseType, simdSize); + GenTree* odd = gtNewSimdCreateBroadcastNode(type, op2, simdBaseType, simdSize); + + return gtNewSimdZipNode(type, even, odd, simdBaseType, simdSize, false); +} + +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdGetSignSequenceNode: Creates a new simd SignSequence node +// +// Arguments: +// type - The return type of SIMD node being created +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// +// Returns: +// The created SignSequence node +// +GenTree* Compiler::gtNewSimdGetSignSequenceNode(var_types type, var_types simdBaseType, unsigned simdSize) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + assert(varTypeIsArithmetic(simdBaseType)); + + GenTreeVecCon* vecCon = gtNewVconNode(type); + uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u8[index] = ((index & 1) == 0) ? 1 : UINT8_MAX; + } + break; + } + + case TYP_SHORT: + case TYP_USHORT: + { + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u16[index] = ((index & 1) == 0) ? 1 : UINT16_MAX; + } + break; + } + + case TYP_INT: + case TYP_UINT: + { + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u32[index] = ((index & 1) == 0) ? 1 : UINT32_MAX; + } + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u64[index] = ((index & 1) == 0) ? 1 : UINT64_MAX; + } + break; + } + + case TYP_FLOAT: + { + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.f32[index] = ((index & 1) == 0) ? 1.0f : -1.0f; + } + break; + } + + case TYP_DOUBLE: + { + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.f64[index] = ((index & 1) == 0) ? 1.0 : -1.0; + } + break; + } + + default: + { + unreached(); + } + } + + return vecCon; +} + GenTree* Compiler::gtNewSimdDotProdNode( var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) { @@ -26761,6 +27101,368 @@ GenTree* Compiler::gtNewSimdNarrowNode( #endif // !TARGET_XARCH && !TARGET_ARM64 } +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdConcatNode: Creates a new simd ConcatLowerLower/... node +// +// Arguments: +// type - The return type of SIMD node being created +// op1 - The vector that supplies the lower half +// op2 - The vector that supplies the upper half +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// leftUpper - Whether the lower result half comes from the upper half of op1 +// rightUpper - Whether the upper result half comes from the upper half of op2 +// +// Returns: +// The created concat node +// +GenTree* Compiler::gtNewSimdConcatNode(var_types type, + GenTree* op1, + GenTree* op2, + var_types simdBaseType, + unsigned simdSize, + bool leftUpper, + bool rightUpper) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + assert(varTypeIsArithmetic(simdBaseType)); + + uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); + uint32_t lowerCount = (simdCount + 1) / 2; + + if (simdCount == 1) + { + return gtWrapWithSideEffects(op1, op2, GTF_ALL_EFFECT); + } + +#if defined(TARGET_XARCH) + if (simdSize == 16) +#elif defined(TARGET_ARM64) + if (simdSize == 8) +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + { + unsigned wideSimdSize = simdSize * 2; + var_types wideType = getSIMDTypeForSize(wideSimdSize); + GenTreeVecCon* shuffle = gtNewVconNode(wideType); + uint32_t wideCount = getSIMDVectorLength(wideSimdSize, simdBaseType); + uint32_t leftStart = leftUpper ? simdCount - lowerCount : 0; + uint32_t rightStart = rightUpper ? simdCount - lowerCount : 0; + + for (uint32_t index = 0; index < wideCount; index++) + { + uint32_t shuffleIndex = 0; + + if (index < simdCount) + { + shuffleIndex = (index < lowerCount) ? leftStart + index : simdCount + rightStart + index - lowerCount; + } + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + shuffle->gtSimdVal.u8[index] = static_cast(shuffleIndex); + break; + + case TYP_SHORT: + case TYP_USHORT: + shuffle->gtSimdVal.u16[index] = static_cast(shuffleIndex); + break; + + case TYP_INT: + case TYP_UINT: + case TYP_FLOAT: + shuffle->gtSimdVal.u32[index] = shuffleIndex; + break; + + case TYP_LONG: + case TYP_ULONG: + case TYP_DOUBLE: + shuffle->gtSimdVal.u64[index] = shuffleIndex; + break; + + default: + unreached(); + } + } + + assert(IsValidForShuffle(shuffle, wideSimdSize, simdBaseType, nullptr, false)); + +#if defined(TARGET_XARCH) + GenTree* result = + gtNewSimdHWIntrinsicNode(wideType, op1, NI_Vector128_ToVector256Unsafe, simdBaseType, simdSize); +#elif defined(TARGET_ARM64) + GenTree* result = + gtNewSimdHWIntrinsicNode(wideType, op1, NI_Vector64_ToVector128Unsafe, simdBaseType, simdSize); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + + result = gtNewSimdWithUpperNode(wideType, result, op2, simdBaseType, wideSimdSize); + result = gtNewSimdShuffleNode(wideType, result, shuffle, simdBaseType, wideSimdSize, false); + + return gtNewSimdGetLowerNode(type, result, simdBaseType, wideSimdSize); + } + + var_types halfType = getSIMDTypeForSize(simdSize / 2); + GenTree* lower = leftUpper ? gtNewSimdGetUpperNode(halfType, op1, simdBaseType, simdSize) + : gtNewSimdGetLowerNode(halfType, op1, simdBaseType, simdSize); + GenTree* upper = rightUpper ? gtNewSimdGetUpperNode(halfType, op2, simdBaseType, simdSize) + : gtNewSimdGetLowerNode(halfType, op2, simdBaseType, simdSize); + +#if defined(TARGET_XARCH) + GenTree* result = + (simdSize == 32) + ? gtNewSimdHWIntrinsicNode(type, lower, NI_Vector128_ToVector256Unsafe, simdBaseType, simdSize / 2) + : gtNewSimdHWIntrinsicNode(type, lower, NI_Vector256_ToVector512Unsafe, simdBaseType, simdSize / 2); +#elif defined(TARGET_ARM64) + GenTree* result = gtNewSimdHWIntrinsicNode(type, lower, NI_Vector64_ToVector128Unsafe, simdBaseType, simdSize / 2); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + + return gtNewSimdWithUpperNode(type, result, upper, simdBaseType, simdSize); +} + +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdZipNode: Creates a new simd ZipLower/ZipUpper node +// +// Arguments: +// type - The return type of SIMD node being created +// op1 - The vector that supplies even-indexed elements +// op2 - The vector that supplies odd-indexed elements +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// upper - Whether to zip the upper halves +// +// Returns: +// The created zip node +// +GenTree* Compiler::gtNewSimdZipNode( + var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize, bool upper) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + assert(varTypeIsArithmetic(simdBaseType)); + + if (getSIMDVectorLength(simdSize, simdBaseType) == 1) + { + return gtWrapWithSideEffects(op1, op2, GTF_ALL_EFFECT); + } + +#if defined(TARGET_XARCH) + if (simdSize == 16) + { + NamedIntrinsic intrinsic = upper ? NI_X86Base_UnpackHigh : NI_X86Base_UnpackLow; + return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseType, simdSize); + } +#elif defined(TARGET_ARM64) + NamedIntrinsic intrinsic = upper ? NI_AdvSimd_Arm64_ZipHigh : NI_AdvSimd_Arm64_ZipLow; + return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseType, simdSize); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + +#if defined(TARGET_XARCH) + var_types halfType = getSIMDTypeForSize(simdSize / 2); + GenTree* left = upper ? gtNewSimdGetUpperNode(halfType, op1, simdBaseType, simdSize) + : gtNewSimdGetLowerNode(halfType, op1, simdBaseType, simdSize); + GenTree* right = upper ? gtNewSimdGetUpperNode(halfType, op2, simdBaseType, simdSize) + : gtNewSimdGetLowerNode(halfType, op2, simdBaseType, simdSize); + + GenTree* leftDup = fgMakeMultiUse(&left); + GenTree* rightDup = fgMakeMultiUse(&right); + + GenTree* lower = gtNewSimdZipNode(halfType, left, right, simdBaseType, simdSize / 2, false); + GenTree* higher = gtNewSimdZipNode(halfType, leftDup, rightDup, simdBaseType, simdSize / 2, true); + + GenTree* result = + (simdSize == 32) + ? gtNewSimdHWIntrinsicNode(type, lower, NI_Vector128_ToVector256Unsafe, simdBaseType, simdSize / 2) + : gtNewSimdHWIntrinsicNode(type, lower, NI_Vector256_ToVector512Unsafe, simdBaseType, simdSize / 2); + return gtNewSimdWithUpperNode(type, result, higher, simdBaseType, simdSize); +#endif // TARGET_XARCH +} + +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdUnzipNode: Creates a new simd UnzipEven/UnzipOdd node +// +// Arguments: +// type - The return type of SIMD node being created +// op1 - The vector that supplies the lower half +// op2 - The vector that supplies the upper half +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// odd - Whether to unzip odd-indexed elements +// +// Returns: +// The created unzip node +// +GenTree* Compiler::gtNewSimdUnzipNode( + var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize, bool odd) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + assert(varTypeIsArithmetic(simdBaseType)); + + uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); + + if (simdCount == 1) + { + GenTree* result = odd ? gtNewZeroConNode(type) : op1; + result = gtWrapWithSideEffects(result, odd ? op1 : op2, GTF_ALL_EFFECT); + return odd ? gtWrapWithSideEffects(result, op2, GTF_ALL_EFFECT) : result; + } + +#if defined(TARGET_ARM64) + NamedIntrinsic intrinsic = odd ? NI_AdvSimd_Arm64_UnzipOdd : NI_AdvSimd_Arm64_UnzipEven; + return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseType, simdSize); +#elif defined(TARGET_XARCH) + if (simdSize == 16) + { + unsigned wideSimdSize = simdSize * 2; + var_types wideType = getSIMDTypeForSize(wideSimdSize); + GenTreeVecCon* shuffle = gtNewVconNode(wideType); + uint32_t wideCount = getSIMDVectorLength(wideSimdSize, simdBaseType); + uint32_t start = odd ? 1 : 0; + uint32_t lowerCount = (simdCount - start + 1) / 2; + + for (uint32_t index = 0; index < wideCount; index++) + { + uint32_t shuffleIndex = 0; + + if (index < simdCount) + { + shuffleIndex = + (index < lowerCount) ? start + (index * 2) : simdCount + start + ((index - lowerCount) * 2); + } + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + shuffle->gtSimdVal.u8[index] = static_cast(shuffleIndex); + break; + + case TYP_SHORT: + case TYP_USHORT: + shuffle->gtSimdVal.u16[index] = static_cast(shuffleIndex); + break; + + case TYP_INT: + case TYP_UINT: + case TYP_FLOAT: + shuffle->gtSimdVal.u32[index] = shuffleIndex; + break; + + case TYP_LONG: + case TYP_ULONG: + case TYP_DOUBLE: + shuffle->gtSimdVal.u64[index] = shuffleIndex; + break; + + default: + unreached(); + } + } + + assert(IsValidForShuffle(shuffle, wideSimdSize, simdBaseType, nullptr, false)); + + GenTree* result = + gtNewSimdHWIntrinsicNode(wideType, op1, NI_Vector128_ToVector256Unsafe, simdBaseType, simdSize); + result = gtNewSimdWithUpperNode(wideType, result, op2, simdBaseType, wideSimdSize); + result = gtNewSimdShuffleNode(wideType, result, shuffle, simdBaseType, wideSimdSize, false); + + return gtNewSimdGetLowerNode(type, result, simdBaseType, wideSimdSize); + } + + var_types halfType = getSIMDTypeForSize(simdSize / 2); + GenTree* op1Lower = gtNewSimdGetLowerNode(halfType, op1, simdBaseType, simdSize); + GenTree* op1Upper = gtNewSimdGetUpperNode(halfType, op1, simdBaseType, simdSize); + GenTree* op2Lower = gtNewSimdGetLowerNode(halfType, op2, simdBaseType, simdSize); + GenTree* op2Upper = gtNewSimdGetUpperNode(halfType, op2, simdBaseType, simdSize); + + GenTree* lower = gtNewSimdUnzipNode(halfType, op1Lower, op1Upper, simdBaseType, simdSize / 2, odd); + GenTree* higher = gtNewSimdUnzipNode(halfType, op2Lower, op2Upper, simdBaseType, simdSize / 2, odd); + + GenTree* result = + (simdSize == 32) + ? gtNewSimdHWIntrinsicNode(type, lower, NI_Vector128_ToVector256Unsafe, simdBaseType, simdSize / 2) + : gtNewSimdHWIntrinsicNode(type, lower, NI_Vector256_ToVector512Unsafe, simdBaseType, simdSize / 2); + return gtNewSimdWithUpperNode(type, result, higher, simdBaseType, simdSize); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 +} + +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdReverseNode: Creates a new simd Reverse node +// +// Arguments: +// type - The return type of SIMD node being created +// op1 - The vector to reverse +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// +// Returns: +// The created reverse node +// +GenTree* Compiler::gtNewSimdReverseNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + assert(varTypeIsArithmetic(simdBaseType)); + + uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); + + if (simdCount == 1) + { + return op1; + } + + GenTreeVecCon* shuffle = gtNewVconNode(type); + + for (uint32_t index = 0; index < simdCount; index++) + { + uint32_t shuffleIndex = simdCount - 1 - index; + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + shuffle->gtSimdVal.u8[index] = static_cast(shuffleIndex); + break; + + case TYP_SHORT: + case TYP_USHORT: + shuffle->gtSimdVal.u16[index] = static_cast(shuffleIndex); + break; + + case TYP_INT: + case TYP_UINT: + case TYP_FLOAT: + shuffle->gtSimdVal.u32[index] = shuffleIndex; + break; + + case TYP_LONG: + case TYP_ULONG: + case TYP_DOUBLE: + shuffle->gtSimdVal.u64[index] = shuffleIndex; + break; + + default: + unreached(); + } + } + + assert(IsValidForShuffle(shuffle, simdSize, simdBaseType, nullptr, false)); + + return gtNewSimdShuffleNode(type, op1, shuffle, simdBaseType, simdSize, false); +} + //------------------------------------------------------------------------ // gtNewSimdRoundNode: Creates a new simd Round node // From ead50c659e3835659497fac02d54a6d9f0730366 Mon Sep 17 00:00:00 2001 From: Steven He Date: Sun, 3 May 2026 20:26:04 +0900 Subject: [PATCH 05/24] Intrinsics recognization --- src/coreclr/jit/hwintrinsicarm64.cpp | 119 +++++++++++++++++ src/coreclr/jit/hwintrinsiclistarm64.h | 24 ++++ src/coreclr/jit/hwintrinsiclistxarch.h | 36 ++++++ src/coreclr/jit/hwintrinsicxarch.cpp | 170 +++++++++++++++++++++++++ 4 files changed, 349 insertions(+) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 7275c2ffe4f305..a0085e9c807c98 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1334,6 +1334,52 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector64_CreateGeometricSequence: + case NI_Vector128_CreateGeometricSequence: + { + assert(sig->numArgs == 2); + + if (!impStackTop(0).val->OperIsConst()) + { + break; + } + + if (!impStackTop(1).val->OperIsConst() && varTypeIsFloating(simdBaseType)) + { + break; + } + + if (varTypeIsLong(simdBaseType) && !impStackTop(1).val->OperIsConst() && (simdSize != 8)) + { + // TODO-ARM64-CQ: We should support long/ulong multiplication. + break; + } + + impSpillSideEffect(true, stackState.esStackDepth - + 2 DEBUGARG("Spilling op1 side effects for vector CreateGeometricSequence")); + + op2 = impPopStack().val; + op1 = impPopStack().val; + + retNode = gtNewSimdCreateGeometricSequenceNode(retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector64_CreateAlternatingSequence: + case NI_Vector128_CreateAlternatingSequence: + { + assert(sig->numArgs == 2); + + impSpillSideEffect(true, stackState.esStackDepth - + 2 DEBUGARG("Spilling op1 side effects for vector CreateAlternatingSequence")); + + op2 = impPopStack().val; + op1 = impPopStack().val; + + retNode = gtNewSimdCreateAlternatingSequenceNode(retType, op1, op2, simdBaseType, simdSize); + break; + } + case NI_Vector64_CreateScalarUnsafe: case NI_Vector128_CreateScalarUnsafe: { @@ -1519,6 +1565,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector64_get_SignSequence: + case NI_Vector128_get_SignSequence: + { + assert(sig->numArgs == 0); + retNode = gtNewSimdGetSignSequenceNode(retType, simdBaseType, simdSize); + break; + } + case NI_Vector64_get_NaN: case NI_Vector128_get_NaN: { @@ -2844,6 +2898,71 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector64_ConcatLowerLower: + case NI_Vector128_ConcatLowerLower: + case NI_Vector64_ConcatLowerUpper: + case NI_Vector128_ConcatLowerUpper: + case NI_Vector64_ConcatUpperLower: + case NI_Vector128_ConcatUpperLower: + case NI_Vector64_ConcatUpperUpper: + case NI_Vector128_ConcatUpperUpper: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + bool leftUpper = + (intrinsic == NI_Vector64_ConcatUpperLower) || (intrinsic == NI_Vector128_ConcatUpperLower) || + (intrinsic == NI_Vector64_ConcatUpperUpper) || (intrinsic == NI_Vector128_ConcatUpperUpper); + bool rightUpper = + (intrinsic == NI_Vector64_ConcatLowerUpper) || (intrinsic == NI_Vector128_ConcatLowerUpper) || + (intrinsic == NI_Vector64_ConcatUpperUpper) || (intrinsic == NI_Vector128_ConcatUpperUpper); + + retNode = gtNewSimdConcatNode(retType, op1, op2, simdBaseType, simdSize, leftUpper, rightUpper); + break; + } + + case NI_Vector64_ZipLower: + case NI_Vector128_ZipLower: + case NI_Vector64_ZipUpper: + case NI_Vector128_ZipUpper: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + bool upper = (intrinsic == NI_Vector64_ZipUpper) || (intrinsic == NI_Vector128_ZipUpper); + retNode = gtNewSimdZipNode(retType, op1, op2, simdBaseType, simdSize, upper); + break; + } + + case NI_Vector64_UnzipEven: + case NI_Vector128_UnzipEven: + case NI_Vector64_UnzipOdd: + case NI_Vector128_UnzipOdd: + { + assert(sig->numArgs == 2); + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + bool odd = (intrinsic == NI_Vector64_UnzipOdd) || (intrinsic == NI_Vector128_UnzipOdd); + retNode = gtNewSimdUnzipNode(retType, op1, op2, simdBaseType, simdSize, odd); + break; + } + + case NI_Vector64_Reverse: + case NI_Vector128_Reverse: + { + assert(sig->numArgs == 1); + + op1 = impSIMDPopStack(); + retNode = gtNewSimdReverseNode(retType, op1, simdBaseType, simdSize); + break; + } + case NI_Vector64_op_ExclusiveOr: case NI_Vector128_op_ExclusiveOr: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index b127b61920d4b7..7d261d855a00e8 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -33,6 +33,10 @@ HARDWARE_INTRINSIC(Vector64, AsUInt16, HARDWARE_INTRINSIC(Vector64, AsUInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, AsUInt64, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, Ceiling, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, ConcatLowerLower, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, ConcatLowerUpper, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, ConcatUpperLower, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, ConcatUpperUpper, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, ConditionalSelect, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, ConvertToDouble, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, ConvertToInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -45,6 +49,8 @@ HARDWARE_INTRINSIC(Vector64, ConvertToUInt32Native, HARDWARE_INTRINSIC(Vector64, ConvertToUInt64, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, ConvertToUInt64Native, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, Create, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector64, CreateAlternatingSequence, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, CreateGeometricSequence, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, CreateScalar, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, CreateScalarUnsafe, 8, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_invalid, INS_invalid, INS_fmov, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(Vector64, CreateSequence, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -96,6 +102,7 @@ HARDWARE_INTRINSIC(Vector64, MinNumber, HARDWARE_INTRINSIC(Vector64, MultiplyAddEstimate, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, Narrow, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, NarrowWithSaturation, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Reverse, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, Round, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, ShiftLeft, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) @@ -111,9 +118,13 @@ HARDWARE_INTRINSIC(Vector64, ToScalar, HARDWARE_INTRINSIC(Vector64, ToVector128, 8, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, ToVector128Unsafe, 8, 1, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, Truncate, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, UnzipEven, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, UnzipOdd, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, WidenLower, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, WidenUpper, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, WithElement, 8, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Vector64, ZipLower, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, ZipUpper, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, get_E, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, get_Epsilon, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -125,6 +136,7 @@ HARDWARE_INTRINSIC(Vector64, get_NegativeZero, HARDWARE_INTRINSIC(Vector64, get_One, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, get_Pi, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, get_PositiveInfinity, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, get_SignSequence, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, get_Tau, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, get_Zero, 8, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, op_Addition, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -173,6 +185,10 @@ HARDWARE_INTRINSIC(Vector128, AsVector2, HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ConcatLowerLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ConcatLowerUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ConcatUpperLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ConcatUpperUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -185,6 +201,8 @@ HARDWARE_INTRINSIC(Vector128, ConvertToUInt32Native, HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, ConvertToUInt64Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, CreateAlternatingSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, CreateGeometricSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_fmov, INS_fmov}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(Vector128, CreateSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -238,6 +256,7 @@ HARDWARE_INTRINSIC(Vector128, MinNumber, HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, NarrowWithSaturation, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Reverse, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) @@ -251,11 +270,15 @@ HARDWARE_INTRINSIC(Vector128, SubtractSaturate, HARDWARE_INTRINSIC(Vector128, Sum, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, Truncate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, UnzipEven, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, UnzipOdd, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, WidenLower, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, WidenUpper, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, WithElement, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Vector128, WithLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, WithUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, ZipLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ZipUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, get_E, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, get_Epsilon, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -267,6 +290,7 @@ HARDWARE_INTRINSIC(Vector128, get_NegativeZero, HARDWARE_INTRINSIC(Vector128, get_One, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, get_Pi, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, get_PositiveInfinity, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_SignSequence, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, get_Tau, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_Addition, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index de2322872c60fc..bcbc4a0b0927f1 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -51,6 +51,10 @@ HARDWARE_INTRINSIC(Vector128, AsVector2, HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, -1, 1, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ConcatLowerLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ConcatLowerUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ConcatUpperLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ConcatUpperUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -63,6 +67,8 @@ HARDWARE_INTRINSIC(Vector128, ConvertToUInt32Native, HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, ConvertToUInt64Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, CreateAlternatingSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, CreateGeometricSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, CreateSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -114,6 +120,7 @@ HARDWARE_INTRINSIC(Vector128, MinNumber, HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, NarrowWithSaturation, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, Reverse, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) @@ -130,9 +137,13 @@ HARDWARE_INTRINSIC(Vector128, ToVector256, HARDWARE_INTRINSIC(Vector128, ToVector256Unsafe, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(Vector128, ToVector512, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(Vector128, Truncate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, UnzipEven, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, UnzipOdd, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, WidenLower, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, WidenUpper, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, WithElement, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ZipLower, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ZipUpper, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, get_E, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, get_Epsilon, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -144,6 +155,7 @@ HARDWARE_INTRINSIC(Vector128, get_NegativeZero, HARDWARE_INTRINSIC(Vector128, get_One, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, get_Pi, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, get_PositiveInfinity, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_SignSequence, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, get_Tau, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_Addition, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -188,6 +200,10 @@ HARDWARE_INTRINSIC(Vector256, AsUInt64, HARDWARE_INTRINSIC(Vector256, AsVector, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, AsVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, Ceiling, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ConcatLowerLower, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ConcatLowerUpper, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ConcatUpperLower, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ConcatUpperUpper, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, ConditionalSelect, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, ConvertToDouble, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) @@ -200,6 +216,8 @@ HARDWARE_INTRINSIC(Vector256, ConvertToUInt32Native, HARDWARE_INTRINSIC(Vector256, ConvertToUInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, ConvertToUInt64Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, CreateAlternatingSequence, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, CreateGeometricSequence, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, CreateScalar, 32, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, CreateSequence, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) @@ -253,6 +271,7 @@ HARDWARE_INTRINSIC(Vector256, MinNumber, HARDWARE_INTRINSIC(Vector256, MultiplyAddEstimate, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, Narrow, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, NarrowWithSaturation, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, Reverse, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, Round, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, ShiftLeft, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) @@ -268,11 +287,15 @@ HARDWARE_INTRINSIC(Vector256, ToScalar, HARDWARE_INTRINSIC(Vector256, ToVector512, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(Vector256, ToVector512Unsafe, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, -1, -1, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(Vector256, Truncate, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, UnzipEven, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, UnzipOdd, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, WidenLower, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, WidenUpper, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, WithElement, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, WithLower, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, WithUpper, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ZipLower, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, ZipUpper, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, get_AllBitsSet, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, get_E, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, get_Epsilon, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -284,6 +307,7 @@ HARDWARE_INTRINSIC(Vector256, get_NegativeZero, HARDWARE_INTRINSIC(Vector256, get_One, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, get_Pi, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, get_PositiveInfinity, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_SignSequence, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, get_Tau, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, get_Zero, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, op_Addition, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -328,6 +352,10 @@ HARDWARE_INTRINSIC(Vector512, AsUInt64, HARDWARE_INTRINSIC(Vector512, AsVector, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, AsVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, Ceiling, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, ConcatLowerLower, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, ConcatLowerUpper, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, ConcatUpperLower, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, ConcatUpperUpper, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, ConditionalSelect, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, ConvertToDouble, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, ConvertToInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -340,6 +368,8 @@ HARDWARE_INTRINSIC(Vector512, ConvertToUInt32Native, HARDWARE_INTRINSIC(Vector512, ConvertToUInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, ConvertToUInt64Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, Create, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, CreateAlternatingSequence, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, CreateGeometricSequence, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, CreateScalar, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector512, CreateScalarUnsafe, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector512, CreateSequence, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -394,6 +424,7 @@ HARDWARE_INTRINSIC(Vector512, MinNumber, HARDWARE_INTRINSIC(Vector512, MultiplyAddEstimate, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Narrow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, NarrowWithSaturation, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, Reverse, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Round, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, ShiftLeft, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) @@ -407,11 +438,15 @@ HARDWARE_INTRINSIC(Vector512, SubtractSaturate, HARDWARE_INTRINSIC(Vector512, Sum, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, ToScalar, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, -1, -1, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, Truncate, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, UnzipEven, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, UnzipOdd, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, WidenLower, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, WidenUpper, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, WithElement, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, WithLower, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, WithUpper, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, ZipLower, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, ZipUpper, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, get_AllBitsSet, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, get_E, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, get_Epsilon, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) @@ -423,6 +458,7 @@ HARDWARE_INTRINSIC(Vector512, get_NegativeZero, HARDWARE_INTRINSIC(Vector512, get_One, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, get_Pi, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, get_PositiveInfinity, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_SignSequence, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, get_Tau, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, get_Zero, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, op_Addition, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, -1, -1, HW_Category_Helper, HW_Flag_InvalidNodeId) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index ef52964d430eac..7a1a220f5698ef 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -2258,6 +2258,62 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector128_CreateGeometricSequence: + case NI_Vector256_CreateGeometricSequence: + case NI_Vector512_CreateGeometricSequence: + { + assert(sig->numArgs == 2); + + if (!impStackTop(0).val->OperIsConst()) + { + break; + } + + if (!impStackTop(1).val->OperIsConst() && varTypeIsFloating(simdBaseType)) + { + break; + } + + if (!impStackTop(1).val->OperIsConst() && (simdSize == 32) && varTypeIsIntegral(simdBaseType) && + !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + // We can't deal with TYP_SIMD32 for integral types if the compiler doesn't support AVX2 + break; + } + + impSpillSideEffect(true, stackState.esStackDepth - + 2 DEBUGARG("Spilling op1 side effects for vector CreateGeometricSequence")); + + op2 = impPopStack().val; + op1 = impPopStack().val; + + retNode = gtNewSimdCreateGeometricSequenceNode(retType, op1, op2, simdBaseType, simdSize); + break; + } + + case NI_Vector128_CreateAlternatingSequence: + case NI_Vector256_CreateAlternatingSequence: + case NI_Vector512_CreateAlternatingSequence: + { + assert(sig->numArgs == 2); + + if ((!impStackTop(1).val->OperIsConst() || !impStackTop(0).val->OperIsConst()) && (simdSize == 32) && + varTypeIsIntegral(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + // We can't deal with TYP_SIMD32 for integral types if the compiler doesn't support AVX2 + break; + } + + impSpillSideEffect(true, stackState.esStackDepth - + 2 DEBUGARG("Spilling op1 side effects for vector CreateAlternatingSequence")); + + op2 = impPopStack().val; + op1 = impPopStack().val; + + retNode = gtNewSimdCreateAlternatingSequenceNode(retType, op1, op2, simdBaseType, simdSize); + break; + } + case NI_Vector128_op_Division: case NI_Vector256_op_Division: case NI_Vector512_op_Division: @@ -2545,6 +2601,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector128_get_SignSequence: + case NI_Vector256_get_SignSequence: + case NI_Vector512_get_SignSequence: + { + assert(sig->numArgs == 0); + retNode = gtNewSimdGetSignSequenceNode(retType, simdBaseType, simdSize); + break; + } + case NI_Vector128_get_NaN: case NI_Vector256_get_NaN: case NI_Vector512_get_NaN: @@ -4167,6 +4232,111 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector128_ConcatLowerLower: + case NI_Vector256_ConcatLowerLower: + case NI_Vector512_ConcatLowerLower: + case NI_Vector128_ConcatLowerUpper: + case NI_Vector256_ConcatLowerUpper: + case NI_Vector512_ConcatLowerUpper: + case NI_Vector128_ConcatUpperLower: + case NI_Vector256_ConcatUpperLower: + case NI_Vector512_ConcatUpperLower: + case NI_Vector128_ConcatUpperUpper: + case NI_Vector256_ConcatUpperUpper: + case NI_Vector512_ConcatUpperUpper: + { + assert(sig->numArgs == 2); + + if ((simdSize == 16) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + bool leftUpper = + (intrinsic == NI_Vector128_ConcatUpperLower) || (intrinsic == NI_Vector256_ConcatUpperLower) || + (intrinsic == NI_Vector512_ConcatUpperLower) || (intrinsic == NI_Vector128_ConcatUpperUpper) || + (intrinsic == NI_Vector256_ConcatUpperUpper) || (intrinsic == NI_Vector512_ConcatUpperUpper); + bool rightUpper = + (intrinsic == NI_Vector128_ConcatLowerUpper) || (intrinsic == NI_Vector256_ConcatLowerUpper) || + (intrinsic == NI_Vector512_ConcatLowerUpper) || (intrinsic == NI_Vector128_ConcatUpperUpper) || + (intrinsic == NI_Vector256_ConcatUpperUpper) || (intrinsic == NI_Vector512_ConcatUpperUpper); + + retNode = gtNewSimdConcatNode(retType, op1, op2, simdBaseType, simdSize, leftUpper, rightUpper); + break; + } + + case NI_Vector128_ZipLower: + case NI_Vector256_ZipLower: + case NI_Vector512_ZipLower: + case NI_Vector128_ZipUpper: + case NI_Vector256_ZipUpper: + case NI_Vector512_ZipUpper: + { + assert(sig->numArgs == 2); + + if ((simdSize == 32) && varTypeIsIntegral(simdBaseType) && + !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + bool upper = (intrinsic == NI_Vector128_ZipUpper) || (intrinsic == NI_Vector256_ZipUpper) || + (intrinsic == NI_Vector512_ZipUpper); + retNode = gtNewSimdZipNode(retType, op1, op2, simdBaseType, simdSize, upper); + break; + } + + case NI_Vector128_UnzipEven: + case NI_Vector256_UnzipEven: + case NI_Vector512_UnzipEven: + case NI_Vector128_UnzipOdd: + case NI_Vector256_UnzipOdd: + case NI_Vector512_UnzipOdd: + { + assert(sig->numArgs == 2); + + if (!compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } + + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + bool odd = (intrinsic == NI_Vector128_UnzipOdd) || (intrinsic == NI_Vector256_UnzipOdd) || + (intrinsic == NI_Vector512_UnzipOdd); + retNode = gtNewSimdUnzipNode(retType, op1, op2, simdBaseType, simdSize, odd); + break; + } + + case NI_Vector128_Reverse: + case NI_Vector256_Reverse: + case NI_Vector512_Reverse: + { + assert(sig->numArgs == 1); + + if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } + + if ((simdSize == 64) && varTypeIsByte(simdBaseType) && + !compOpportunisticallyDependsOn(InstructionSet_AVX512v2)) + { + break; + } + + op1 = impSIMDPopStack(); + retNode = gtNewSimdReverseNode(retType, op1, simdBaseType, simdSize); + break; + } + case NI_Vector128_op_ExclusiveOr: case NI_Vector256_op_ExclusiveOr: case NI_Vector512_op_ExclusiveOr: From 281993f528d161317a11c1ff500ceb361c218ebb Mon Sep 17 00:00:00 2001 From: Steven He Date: Sun, 3 May 2026 20:58:04 +0900 Subject: [PATCH 06/24] Oops --- src/coreclr/jit/gentree.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 37fadd62e1b563..4af18e4b4dadc5 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -27379,11 +27379,14 @@ GenTree* Compiler::gtNewSimdUnzipNode( return gtNewSimdGetLowerNode(type, result, simdBaseType, wideSimdSize); } + GenTree* op1Dup = fgMakeMultiUse(&op1); + GenTree* op2Dup = fgMakeMultiUse(&op2); + var_types halfType = getSIMDTypeForSize(simdSize / 2); GenTree* op1Lower = gtNewSimdGetLowerNode(halfType, op1, simdBaseType, simdSize); - GenTree* op1Upper = gtNewSimdGetUpperNode(halfType, op1, simdBaseType, simdSize); + GenTree* op1Upper = gtNewSimdGetUpperNode(halfType, op1Dup, simdBaseType, simdSize); GenTree* op2Lower = gtNewSimdGetLowerNode(halfType, op2, simdBaseType, simdSize); - GenTree* op2Upper = gtNewSimdGetUpperNode(halfType, op2, simdBaseType, simdSize); + GenTree* op2Upper = gtNewSimdGetUpperNode(halfType, op2Dup, simdBaseType, simdSize); GenTree* lower = gtNewSimdUnzipNode(halfType, op1Lower, op1Upper, simdBaseType, simdSize / 2, odd); GenTree* higher = gtNewSimdUnzipNode(halfType, op2Lower, op2Upper, simdBaseType, simdSize / 2, odd); From 81b0d3dc83f46941eb95b9ec8ebd7479b2e2f1b0 Mon Sep 17 00:00:00 2001 From: Steven He Date: Sun, 3 May 2026 21:20:38 +0900 Subject: [PATCH 07/24] decomposition --- .../System/Runtime/Intrinsics/Vector256.cs | 179 +++++++---------- .../System/Runtime/Intrinsics/Vector512.cs | 185 ++++++++---------- 2 files changed, 154 insertions(+), 210 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 7c5b618ffdfe17..237f56e65aa83c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -1667,18 +1667,37 @@ public static Vector256 CreateScalarUnsafe(T value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateGeometricSequence(T initial, [ConstantExpected] T multiplier) { - int count = Vector256.Count; - Unsafe.SkipInit(out Vector256 result); - - T value = initial; + T upperMultiplier = multiplier; - for (int index = 0; index < count; index++) + if (Vector128.Count >= 2) { - result.SetElementUnsafe(index, value); - value = Scalar.Multiply(value, multiplier); + T multiplier2 = Scalar.Multiply(multiplier, multiplier); + upperMultiplier = multiplier2; + + if (Vector128.Count >= 4) + { + T multiplier4 = Scalar.Multiply(multiplier2, multiplier2); + upperMultiplier = multiplier4; + + if (Vector128.Count >= 8) + { + T multiplier8 = Scalar.Multiply(multiplier4, multiplier4); + upperMultiplier = multiplier8; + + if (Vector128.Count >= 16) + { + upperMultiplier = Scalar.Multiply(multiplier8, multiplier8); + } + } + } } - return result; + T upperInitial = Scalar.Multiply(initial, upperMultiplier); + + return Create( + Vector128.CreateGeometricSequence(initial, multiplier), + Vector128.CreateGeometricSequence(upperInitial, multiplier) + ); } /// Creates a new instance whose elements alternate between two specified values. @@ -1690,15 +1709,12 @@ public static Vector256 CreateGeometricSequence(T initial, [ConstantExpect [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateAlternatingSequence(T even, T odd) { - int count = Vector256.Count; - Unsafe.SkipInit(out Vector256 result); + Vector128 lower = Vector128.CreateAlternatingSequence(even, odd); + Vector128 upper = ((Vector128.Count & 1) == 0) + ? Vector128.CreateAlternatingSequence(even, odd) + : Vector128.CreateAlternatingSequence(odd, even); - for (int index = 0; index < count; index++) - { - result.SetElementUnsafe(index, ((index & 1) == 0) ? even : odd); - } - - return result; + return Create(lower, upper); } /// Creates a new instance whose elements are the reciprocal of an arithmetic sequence. @@ -1708,7 +1724,15 @@ public static Vector256 CreateAlternatingSequence(T even, T odd) /// A new instance whose elements are initialized to one divided by the corresponding element of the arithmetic sequence. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 CreateHarmonicSequence(T start, T step) => Vector256.One / CreateSequence(start, step); + public static Vector256 CreateHarmonicSequence(T start, T step) + { + T upperStart = Scalar.Add(start, Scalar.Multiply(Scalar.Convert(Vector128.Count), step)); + + return Create( + Vector128.CreateHarmonicSequence(start, step), + Vector128.CreateHarmonicSequence(upperStart, step) + ); + } /// Creates a new instance whose elements are the square root of an arithmetic sequence. /// The type of the elements in the vector. @@ -1717,140 +1741,85 @@ public static Vector256 CreateAlternatingSequence(T even, T odd) /// A new instance whose elements are initialized to the square root of the corresponding element of the arithmetic sequence. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 CreateCauchySequence(T start, T step) => Sqrt(CreateSequence(start, step)); + public static Vector256 CreateCauchySequence(T start, T step) + { + T upperStart = Scalar.Add(start, Scalar.Multiply(Scalar.Convert(Vector128.Count), step)); + + return Create( + Vector128.CreateCauchySequence(start, step), + Vector128.CreateCauchySequence(upperStart, step) + ); + } /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 ConcatLowerLower(Vector256 left, Vector256 right) => ConcatHalves(left, right, leftUpper: false, rightUpper: false); + public static Vector256 ConcatLowerLower(Vector256 left, Vector256 right) => Create(left._lower, right._lower); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 ConcatUpperLower(Vector256 left, Vector256 right) => ConcatHalves(left, right, leftUpper: true, rightUpper: false); + public static Vector256 ConcatUpperLower(Vector256 left, Vector256 right) => Create(left._upper, right._lower); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 ConcatUpperUpper(Vector256 left, Vector256 right) => ConcatHalves(left, right, leftUpper: true, rightUpper: true); + public static Vector256 ConcatUpperUpper(Vector256 left, Vector256 right) => Create(left._upper, right._upper); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 ConcatLowerUpper(Vector256 left, Vector256 right) => ConcatHalves(left, right, leftUpper: false, rightUpper: true); + public static Vector256 ConcatLowerUpper(Vector256 left, Vector256 right) => Create(left._lower, right._upper); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 ZipLower(Vector256 left, Vector256 right) => Zip(left, right, upper: false); + public static Vector256 ZipLower(Vector256 left, Vector256 right) => Create( + Vector128.ZipLower(left._lower, right._lower), + Vector128.ZipUpper(left._lower, right._lower) + ); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 ZipUpper(Vector256 left, Vector256 right) => Zip(left, right, upper: true); + public static Vector256 ZipUpper(Vector256 left, Vector256 right) => Create( + Vector128.ZipLower(left._upper, right._upper), + Vector128.ZipUpper(left._upper, right._upper) + ); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static (Vector256 Lower, Vector256 Upper) Zip(Vector256 left, Vector256 right) => (ZipLower(left, right), ZipUpper(left, right)); - private static Vector256 Zip(Vector256 left, Vector256 right, bool upper) - { - int count = Vector256.Count; - int lowerCount = (count + 1) / 2; - int start = upper ? count - lowerCount : 0; - - Unsafe.SkipInit(out Vector256 result); - - for (int index = 0; index < count; index++) - { - int elementIndex = start + (index / 2); - T value = ((index & 1) == 0) - ? left.GetElementUnsafe(elementIndex) - : right.GetElementUnsafe(elementIndex); - - result.SetElementUnsafe(index, value); - } - - return result; - } - /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 UnzipEven(Vector256 left, Vector256 right) => Unzip(left, right, odd: false); + public static Vector256 UnzipEven(Vector256 left, Vector256 right) => Create( + Vector128.UnzipEven(left._lower, left._upper), + Vector128.UnzipEven(right._lower, right._upper) + ); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 UnzipOdd(Vector256 left, Vector256 right) => Unzip(left, right, odd: true); + public static Vector256 UnzipOdd(Vector256 left, Vector256 right) => Create( + Vector128.UnzipOdd(left._lower, left._upper), + Vector128.UnzipOdd(right._lower, right._upper) + ); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static (Vector256 Even, Vector256 Odd) Unzip(Vector256 left, Vector256 right) => (UnzipEven(left, right), UnzipOdd(left, right)); - private static Vector256 Unzip(Vector256 left, Vector256 right, bool odd) - { - int count = Vector256.Count; - int start = odd ? 1 : 0; - int lowerCount = (count - start + 1) / 2; - - if (lowerCount == 0) - { - return Vector256.Zero; - } - - Unsafe.SkipInit(out Vector256 result); - - for (int index = 0; index < count; index++) - { - T value = (index < lowerCount) - ? left.GetElementUnsafe(start + (index * 2)) - : right.GetElementUnsafe(start + ((index - lowerCount) * 2)); - - result.SetElementUnsafe(index, value); - } - - return result; - } - /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 Reverse(Vector256 vector) - { - int count = Vector256.Count; - Unsafe.SkipInit(out Vector256 result); - - for (int index = 0; index < count; index++) - { - result.SetElementUnsafe(index, vector.GetElementUnsafe(count - 1 - index)); - } - - return result; - } - - private static Vector256 ConcatHalves(Vector256 left, Vector256 right, bool leftUpper, bool rightUpper) - { - int count = Vector256.Count; - int lowerCount = (count + 1) / 2; - int leftStart = leftUpper ? count - lowerCount : 0; - int rightStart = rightUpper ? count - lowerCount : 0; - - Unsafe.SkipInit(out Vector256 result); - - for (int index = 0; index < count; index++) - { - T value = (index < lowerCount) - ? left.GetElementUnsafe(leftStart + index) - : right.GetElementUnsafe(rightStart + index - lowerCount); - - result.SetElementUnsafe(index, value); - } - - return result; - } + public static Vector256 Reverse(Vector256 vector) => Create( + Vector128.Reverse(vector._upper), + Vector128.Reverse(vector._lower) + ); /// [Intrinsic] diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs index 6026e3e1348665..087ff3baa510b4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs @@ -1699,18 +1699,43 @@ public static Vector512 CreateScalarUnsafe(T value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 CreateGeometricSequence(T initial, [ConstantExpected] T multiplier) { - int count = Vector512.Count; - Unsafe.SkipInit(out Vector512 result); - - T value = initial; + T upperMultiplier = multiplier; - for (int index = 0; index < count; index++) + if (Vector256.Count >= 2) { - result.SetElementUnsafe(index, value); - value = Scalar.Multiply(value, multiplier); + T multiplier2 = Scalar.Multiply(multiplier, multiplier); + upperMultiplier = multiplier2; + + if (Vector256.Count >= 4) + { + T multiplier4 = Scalar.Multiply(multiplier2, multiplier2); + upperMultiplier = multiplier4; + + if (Vector256.Count >= 8) + { + T multiplier8 = Scalar.Multiply(multiplier4, multiplier4); + upperMultiplier = multiplier8; + + if (Vector256.Count >= 16) + { + T multiplier16 = Scalar.Multiply(multiplier8, multiplier8); + upperMultiplier = multiplier16; + + if (Vector256.Count >= 32) + { + upperMultiplier = Scalar.Multiply(multiplier16, multiplier16); + } + } + } + } } - return result; + T upperInitial = Scalar.Multiply(initial, upperMultiplier); + + return Create( + Vector256.CreateGeometricSequence(initial, multiplier), + Vector256.CreateGeometricSequence(upperInitial, multiplier) + ); } /// Creates a new instance whose elements alternate between two specified values. @@ -1722,15 +1747,12 @@ public static Vector512 CreateGeometricSequence(T initial, [ConstantExpect [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 CreateAlternatingSequence(T even, T odd) { - int count = Vector512.Count; - Unsafe.SkipInit(out Vector512 result); + Vector256 lower = Vector256.CreateAlternatingSequence(even, odd); + Vector256 upper = ((Vector256.Count & 1) == 0) + ? Vector256.CreateAlternatingSequence(even, odd) + : Vector256.CreateAlternatingSequence(odd, even); - for (int index = 0; index < count; index++) - { - result.SetElementUnsafe(index, ((index & 1) == 0) ? even : odd); - } - - return result; + return Create(lower, upper); } /// Creates a new instance whose elements are the reciprocal of an arithmetic sequence. @@ -1740,7 +1762,15 @@ public static Vector512 CreateAlternatingSequence(T even, T odd) /// A new instance whose elements are initialized to one divided by the corresponding element of the arithmetic sequence. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 CreateHarmonicSequence(T start, T step) => Vector512.One / CreateSequence(start, step); + public static Vector512 CreateHarmonicSequence(T start, T step) + { + T upperStart = Scalar.Add(start, Scalar.Multiply(Scalar.Convert(Vector256.Count), step)); + + return Create( + Vector256.CreateHarmonicSequence(start, step), + Vector256.CreateHarmonicSequence(upperStart, step) + ); + } /// Creates a new instance whose elements are the square root of an arithmetic sequence. /// The type of the elements in the vector. @@ -1749,140 +1779,85 @@ public static Vector512 CreateAlternatingSequence(T even, T odd) /// A new instance whose elements are initialized to the square root of the corresponding element of the arithmetic sequence. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 CreateCauchySequence(T start, T step) => Sqrt(CreateSequence(start, step)); + public static Vector512 CreateCauchySequence(T start, T step) + { + T upperStart = Scalar.Add(start, Scalar.Multiply(Scalar.Convert(Vector256.Count), step)); + + return Create( + Vector256.CreateCauchySequence(start, step), + Vector256.CreateCauchySequence(upperStart, step) + ); + } /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 ConcatLowerLower(Vector512 left, Vector512 right) => ConcatHalves(left, right, leftUpper: false, rightUpper: false); + public static Vector512 ConcatLowerLower(Vector512 left, Vector512 right) => Create(left._lower, right._lower); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 ConcatUpperLower(Vector512 left, Vector512 right) => ConcatHalves(left, right, leftUpper: true, rightUpper: false); + public static Vector512 ConcatUpperLower(Vector512 left, Vector512 right) => Create(left._upper, right._lower); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 ConcatUpperUpper(Vector512 left, Vector512 right) => ConcatHalves(left, right, leftUpper: true, rightUpper: true); + public static Vector512 ConcatUpperUpper(Vector512 left, Vector512 right) => Create(left._upper, right._upper); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 ConcatLowerUpper(Vector512 left, Vector512 right) => ConcatHalves(left, right, leftUpper: false, rightUpper: true); + public static Vector512 ConcatLowerUpper(Vector512 left, Vector512 right) => Create(left._lower, right._upper); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 ZipLower(Vector512 left, Vector512 right) => Zip(left, right, upper: false); + public static Vector512 ZipLower(Vector512 left, Vector512 right) => Create( + Vector256.ZipLower(left._lower, right._lower), + Vector256.ZipUpper(left._lower, right._lower) + ); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 ZipUpper(Vector512 left, Vector512 right) => Zip(left, right, upper: true); + public static Vector512 ZipUpper(Vector512 left, Vector512 right) => Create( + Vector256.ZipLower(left._upper, right._upper), + Vector256.ZipUpper(left._upper, right._upper) + ); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static (Vector512 Lower, Vector512 Upper) Zip(Vector512 left, Vector512 right) => (ZipLower(left, right), ZipUpper(left, right)); - private static Vector512 Zip(Vector512 left, Vector512 right, bool upper) - { - int count = Vector512.Count; - int lowerCount = (count + 1) / 2; - int start = upper ? count - lowerCount : 0; - - Unsafe.SkipInit(out Vector512 result); - - for (int index = 0; index < count; index++) - { - int elementIndex = start + (index / 2); - T value = ((index & 1) == 0) - ? left.GetElementUnsafe(elementIndex) - : right.GetElementUnsafe(elementIndex); - - result.SetElementUnsafe(index, value); - } - - return result; - } - /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 UnzipEven(Vector512 left, Vector512 right) => Unzip(left, right, odd: false); + public static Vector512 UnzipEven(Vector512 left, Vector512 right) => Create( + Vector256.UnzipEven(left._lower, left._upper), + Vector256.UnzipEven(right._lower, right._upper) + ); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 UnzipOdd(Vector512 left, Vector512 right) => Unzip(left, right, odd: true); + public static Vector512 UnzipOdd(Vector512 left, Vector512 right) => Create( + Vector256.UnzipOdd(left._lower, left._upper), + Vector256.UnzipOdd(right._lower, right._upper) + ); /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static (Vector512 Even, Vector512 Odd) Unzip(Vector512 left, Vector512 right) => (UnzipEven(left, right), UnzipOdd(left, right)); - private static Vector512 Unzip(Vector512 left, Vector512 right, bool odd) - { - int count = Vector512.Count; - int start = odd ? 1 : 0; - int lowerCount = (count - start + 1) / 2; - - if (lowerCount == 0) - { - return Vector512.Zero; - } - - Unsafe.SkipInit(out Vector512 result); - - for (int index = 0; index < count; index++) - { - T value = (index < lowerCount) - ? left.GetElementUnsafe(start + (index * 2)) - : right.GetElementUnsafe(start + ((index - lowerCount) * 2)); - - result.SetElementUnsafe(index, value); - } - - return result; - } - /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector512 Reverse(Vector512 vector) - { - int count = Vector512.Count; - Unsafe.SkipInit(out Vector512 result); - - for (int index = 0; index < count; index++) - { - result.SetElementUnsafe(index, vector.GetElementUnsafe(count - 1 - index)); - } - - return result; - } - - private static Vector512 ConcatHalves(Vector512 left, Vector512 right, bool leftUpper, bool rightUpper) - { - int count = Vector512.Count; - int lowerCount = (count + 1) / 2; - int leftStart = leftUpper ? count - lowerCount : 0; - int rightStart = rightUpper ? count - lowerCount : 0; - - Unsafe.SkipInit(out Vector512 result); - - for (int index = 0; index < count; index++) - { - T value = (index < lowerCount) - ? left.GetElementUnsafe(leftStart + index) - : right.GetElementUnsafe(rightStart + index - lowerCount); - - result.SetElementUnsafe(index, value); - } - - return result; - } + public static Vector512 Reverse(Vector512 vector) => Create( + Vector256.Reverse(vector._upper), + Vector256.Reverse(vector._lower) + ); /// [Intrinsic] From 716a806d900d38d25c80d7068617f5a8a5ce2ad6 Mon Sep 17 00:00:00 2001 From: Steven He Date: Sun, 3 May 2026 22:55:19 +0900 Subject: [PATCH 08/24] Some CQ improvements --- .../System/Runtime/Intrinsics/Vector256.cs | 45 +++++++++++- .../System/Runtime/Intrinsics/Vector512.cs | 68 ++++++++++++++++++- 2 files changed, 109 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 237f56e65aa83c..a758fcb983a313 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -1726,6 +1726,11 @@ public static Vector256 CreateAlternatingSequence(T even, T odd) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateHarmonicSequence(T start, T step) { + if (IsHardwareAccelerated) + { + return Vector256.One / CreateSequence(start, step); + } + T upperStart = Scalar.Add(start, Scalar.Multiply(Scalar.Convert(Vector128.Count), step)); return Create( @@ -1743,6 +1748,11 @@ public static Vector256 CreateHarmonicSequence(T start, T step) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateCauchySequence(T start, T step) { + if (IsHardwareAccelerated) + { + return Sqrt(CreateSequence(start, step)); + } + T upperStart = Scalar.Add(start, Scalar.Multiply(Scalar.Convert(Vector128.Count), step)); return Create( @@ -1790,7 +1800,21 @@ public static Vector256 ZipUpper(Vector256 left, Vector256 right) => /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static (Vector256 Lower, Vector256 Upper) Zip(Vector256 left, Vector256 right) => (ZipLower(left, right), ZipUpper(left, right)); + public static (Vector256 Lower, Vector256 Upper) Zip(Vector256 left, Vector256 right) + { + if (Avx2.IsSupported && ((typeof(T) == typeof(int)) || (typeof(T) == typeof(uint)))) + { + Vector256 lower = Avx2.UnpackLow(left.AsInt32(), right.AsInt32()); + Vector256 upper = Avx2.UnpackHigh(left.AsInt32(), right.AsInt32()); + + return ( + Avx2.Permute2x128(lower, upper, 0x20).As(), + Avx2.Permute2x128(lower, upper, 0x31).As() + ); + } + + return (ZipLower(left, right), ZipUpper(left, right)); + } /// [Intrinsic] @@ -1811,7 +1835,24 @@ public static Vector256 UnzipOdd(Vector256 left, Vector256 right) => /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static (Vector256 Even, Vector256 Odd) Unzip(Vector256 left, Vector256 right) => (UnzipEven(left, right), UnzipOdd(left, right)); + public static (Vector256 Even, Vector256 Odd) Unzip(Vector256 left, Vector256 right) + { + if (Avx2.IsSupported && ((typeof(T) == typeof(int)) || (typeof(T) == typeof(uint)))) + { + Vector256 leftUnzip = Avx2.Shuffle(left.AsInt32(), 0xD8); + leftUnzip = Avx2.Permute4x64(leftUnzip.AsInt64(), 0xD8).AsInt32(); + + Vector256 rightUnzip = Avx2.Shuffle(right.AsInt32(), 0xD8); + rightUnzip = Avx2.Permute4x64(rightUnzip.AsInt64(), 0xD8).AsInt32(); + + return ( + Avx2.Permute2x128(leftUnzip, rightUnzip, 0x20).As(), + Avx2.Permute2x128(leftUnzip, rightUnzip, 0x31).As() + ); + } + + return (UnzipEven(left, right), UnzipOdd(left, right)); + } /// [Intrinsic] diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs index 087ff3baa510b4..77fd2af26f4c13 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs @@ -6,6 +6,7 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; namespace System.Runtime.Intrinsics { @@ -1764,6 +1765,11 @@ public static Vector512 CreateAlternatingSequence(T even, T odd) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 CreateHarmonicSequence(T start, T step) { + if (IsHardwareAccelerated) + { + return Vector512.One / CreateSequence(start, step); + } + T upperStart = Scalar.Add(start, Scalar.Multiply(Scalar.Convert(Vector256.Count), step)); return Create( @@ -1781,6 +1787,11 @@ public static Vector512 CreateHarmonicSequence(T start, T step) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 CreateCauchySequence(T start, T step) { + if (IsHardwareAccelerated) + { + return Sqrt(CreateSequence(start, step)); + } + T upperStart = Scalar.Add(start, Scalar.Multiply(Scalar.Convert(Vector256.Count), step)); return Create( @@ -1828,7 +1839,32 @@ public static Vector512 ZipUpper(Vector512 left, Vector512 right) => /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static (Vector512 Lower, Vector512 Upper) Zip(Vector512 left, Vector512 right) => (ZipLower(left, right), ZipUpper(left, right)); + public static (Vector512 Lower, Vector512 Upper) Zip(Vector512 left, Vector512 right) + { + if (Avx512F.IsSupported && ((typeof(T) == typeof(int)) || (typeof(T) == typeof(uint)))) + { + Vector512 lower = Avx512F.UnpackLow(left.AsInt32(), right.AsInt32()); + Vector512 upper = Avx512F.UnpackHigh(left.AsInt32(), right.AsInt32()); + + Vector512 lowerResult = Avx512F.Shuffle4x128(lower, upper, 0x44); + lowerResult = Avx512F.Shuffle4x128(lowerResult, lowerResult, 0xD8); + + Vector512 upperResult = Avx512F.Shuffle4x128(lower, upper, 0xEE); + upperResult = Avx512F.Shuffle4x128(upperResult, upperResult, 0xD8); + + return (lowerResult.As(), upperResult.As()); + } + + if (IsHardwareAccelerated) + { + return (ZipLower(left, right), ZipUpper(left, right)); + } + + (Vector256 lower0, Vector256 upper0) = Vector256.Zip(left._lower, right._lower); + (Vector256 lower1, Vector256 upper1) = Vector256.Zip(left._upper, right._upper); + + return (Create(lower0, upper0), Create(lower1, upper1)); + } /// [Intrinsic] @@ -1849,7 +1885,35 @@ public static Vector512 UnzipOdd(Vector512 left, Vector512 right) => /// [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static (Vector512 Even, Vector512 Odd) Unzip(Vector512 left, Vector512 right) => (UnzipEven(left, right), UnzipOdd(left, right)); + public static (Vector512 Even, Vector512 Odd) Unzip(Vector512 left, Vector512 right) + { + if (Avx512F.IsSupported && ((typeof(T) == typeof(int)) || (typeof(T) == typeof(uint)))) + { + Vector512 evenIndices = Vector512.Create( + 0, 2, 4, 6, 8, 10, 12, 14, + 16, 18, 20, 22, 24, 26, 28, 30 + ); + Vector512 oddIndices = Vector512.Create( + 1, 3, 5, 7, 9, 11, 13, 15, + 17, 19, 21, 23, 25, 27, 29, 31 + ); + + Vector512 even = Avx512F.PermuteVar16x32x2(left.AsInt32(), evenIndices, right.AsInt32()); + Vector512 odd = Avx512F.PermuteVar16x32x2(left.AsInt32(), oddIndices, right.AsInt32()); + + return (even.As(), odd.As()); + } + + if (IsHardwareAccelerated) + { + return (UnzipEven(left, right), UnzipOdd(left, right)); + } + + (Vector256 even0, Vector256 odd0) = Vector256.Unzip(left._lower, left._upper); + (Vector256 even1, Vector256 odd1) = Vector256.Unzip(right._lower, right._upper); + + return (Create(even0, even1), Create(odd0, odd1)); + } /// [Intrinsic] From 6a82e6a038c90cdc59edb2a8f7ede2ca904b62fe Mon Sep 17 00:00:00 2001 From: Steven He Date: Sun, 3 May 2026 23:25:07 +0900 Subject: [PATCH 09/24] Some intrinsic codegen improvements --- src/coreclr/jit/gentree.cpp | 220 ++++++++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 4af18e4b4dadc5..e2e10b267903e5 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -24541,6 +24541,9 @@ GenTree* Compiler::gtNewSimdCreateGeometricSequenceNode( assert(varTypeIsArithmetic(simdBaseType)); assert(op2->OperIsConst()); + // op2 is expected to be constant. When op1 is also constant the whole sequence can be folded + // to a constant; otherwise build the constant multiplier vector and leave one broadcast+multiply. + GenTreeVecCon* vecCon = gtNewVconNode(type); uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); bool isPartial = !op1->OperIsConst(); @@ -24666,6 +24669,9 @@ GenTree* Compiler::gtNewSimdCreateAlternatingSequenceNode( assert(getSIMDTypeForSize(simdSize) == type); assert(varTypeIsArithmetic(simdBaseType)); + // Fold constant pairs directly. Otherwise build two broadcasts and zip them, except where + // the target has a better way to broadcast the two-lane pattern directly. + uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); if (simdCount == 1) @@ -24765,6 +24771,30 @@ GenTree* Compiler::gtNewSimdCreateAlternatingSequenceNode( return vecCon; } +#if defined(TARGET_XARCH) + if (((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)) && + ((compOpportunisticallyDependsOn(InstructionSet_AVX2) && ((simdSize == 16) || (simdSize == 32))) || + (compOpportunisticallyDependsOn(InstructionSet_AVX512) && (simdSize == 64)))) + { + // var pattern = Vector128.CreateScalarUnsafe(op1).WithElement(1, op2); + // return Broadcast(pattern.AsInt64()).As(); + + GenTree* pattern = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, simdBaseType, 16); + pattern = gtNewSimdWithElementNode(TYP_SIMD16, pattern, gtNewIconNode(1), op2, simdBaseType, 16); + + if (simdSize == 64) + { + return gtNewSimdHWIntrinsicNode(type, pattern, NI_AVX512_BroadcastPairScalarToVector512, simdBaseType, + simdSize); + } + + var_types broadcastBaseType = (simdBaseType == TYP_INT) ? TYP_LONG : TYP_ULONG; + NamedIntrinsic broadcast = + (simdSize == 16) ? NI_AVX2_BroadcastScalarToVector128 : NI_AVX2_BroadcastScalarToVector256; + return gtNewSimdHWIntrinsicNode(type, pattern, broadcast, broadcastBaseType, simdSize); + } +#endif // TARGET_XARCH + GenTree* even = gtNewSimdCreateBroadcastNode(type, op1, simdBaseType, simdSize); GenTree* odd = gtNewSimdCreateBroadcastNode(type, op2, simdBaseType, simdSize); @@ -27136,6 +27166,53 @@ GenTree* Compiler::gtNewSimdConcatNode(var_types type, return gtWrapWithSideEffects(op1, op2, GTF_ALL_EFFECT); } +#if defined(TARGET_ARM64) + if ((simdSize == 8) && (simdCount == 2) && + ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT) || (simdBaseType == TYP_FLOAT))) + { + // var result = op1; + // if (leftUpper) + // { + // result = result.WithElement(0, result.GetElement(1)); + // } + // return result.WithElement(1, op2.GetElement(rightUpper ? 1 : 0)); + + GenTree* result = op1; + + if (leftUpper) + { + GenTree* resultDup = fgMakeMultiUse(&result); + result = gtNewSimdHWIntrinsicNode(type, result, gtNewIconNode(0), resultDup, gtNewIconNode(1), + NI_AdvSimd_Arm64_InsertSelectedScalar, simdBaseType, simdSize); + } + + return gtNewSimdHWIntrinsicNode(type, result, gtNewIconNode(1), op2, gtNewIconNode(rightUpper ? 1 : 0), + NI_AdvSimd_Arm64_InsertSelectedScalar, simdBaseType, simdSize); + } + + if (simdSize == 16) + { + // var result = op1.AsUInt64(); + // if (leftUpper) + // { + // result = result.WithElement(0, result.GetElement(1)); + // } + // return result.WithElement(1, op2.AsUInt64().GetElement(rightUpper ? 1 : 0)).As(); + + GenTree* result = op1; + + if (leftUpper) + { + GenTree* resultDup = fgMakeMultiUse(&result); + result = gtNewSimdHWIntrinsicNode(type, result, gtNewIconNode(0), resultDup, gtNewIconNode(1), + NI_AdvSimd_Arm64_InsertSelectedScalar, TYP_ULONG, simdSize); + } + + return gtNewSimdHWIntrinsicNode(type, result, gtNewIconNode(1), op2, gtNewIconNode(rightUpper ? 1 : 0), + NI_AdvSimd_Arm64_InsertSelectedScalar, TYP_ULONG, simdSize); + } +#endif // TARGET_ARM64 + #if defined(TARGET_XARCH) if (simdSize == 16) #elif defined(TARGET_ARM64) @@ -27144,6 +27221,29 @@ GenTree* Compiler::gtNewSimdConcatNode(var_types type, #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 { +#if defined(TARGET_XARCH) + if ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)) + { + // return Sse.Shuffle(op1.AsSingle(), op2.AsSingle(), immediate).As(); + + uint32_t leftStart = leftUpper ? simdCount - lowerCount : 0; + uint32_t rightStart = rightUpper ? simdCount - lowerCount : 0; + unsigned immediate = 0; + + for (uint32_t index = 0; index < simdCount; index++) + { + uint32_t shuffleIndex = (index < lowerCount) ? leftStart + index : rightStart + index - lowerCount; + immediate |= shuffleIndex << (index * 2); + } + + return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(immediate), NI_X86Base_Shuffle, TYP_FLOAT, + simdSize); + } +#endif // TARGET_XARCH + + // var tmp = op1.ToVectorUnsafe().WithUpper(op2); + // return Shuffle(tmp, indices).GetLower(); + unsigned wideSimdSize = simdSize * 2; var_types wideType = getSIMDTypeForSize(wideSimdSize); GenTreeVecCon* shuffle = gtNewVconNode(wideType); @@ -27207,6 +27307,10 @@ GenTree* Compiler::gtNewSimdConcatNode(var_types type, return gtNewSimdGetLowerNode(type, result, simdBaseType, wideSimdSize); } + // var lower = leftUpper ? op1.GetUpper() : op1.GetLower(); + // var upper = rightUpper ? op2.GetUpper() : op2.GetLower(); + // return lower.ToVectorUnsafe().WithUpper(upper); + var_types halfType = getSIMDTypeForSize(simdSize / 2); GenTree* lower = leftUpper ? gtNewSimdGetUpperNode(halfType, op1, simdBaseType, simdSize) : gtNewSimdGetLowerNode(halfType, op1, simdBaseType, simdSize); @@ -27259,6 +27363,28 @@ GenTree* Compiler::gtNewSimdZipNode( NamedIntrinsic intrinsic = upper ? NI_X86Base_UnpackHigh : NI_X86Base_UnpackLow; return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseType, simdSize); } + + if ((simdSize == 64) && ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)) && + compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + // var lower = Avx512F.UnpackLow(op1, op2); + // var upper = Avx512F.UnpackHigh(op1, op2); + // return Shuffle4x128(Shuffle4x128(lower, upper, lanes), ..., SHUFFLE_WYZX); + + GenTree* op1Dup = fgMakeMultiUse(&op1); + GenTree* op2Dup = fgMakeMultiUse(&op2); + + GenTree* lower = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_UnpackLow, simdBaseType, simdSize); + GenTree* higher = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_AVX512_UnpackHigh, simdBaseType, simdSize); + + unsigned lanes = upper ? 0xEE : 0x44; + GenTree* result = gtNewSimdHWIntrinsicNode(type, lower, higher, gtNewIconNode(lanes), NI_AVX512_Shuffle4x128, + simdBaseType, simdSize); + GenTree* resultDup = fgMakeMultiUse(&result); + + return gtNewSimdHWIntrinsicNode(type, result, resultDup, gtNewIconNode(SHUFFLE_WYZX), NI_AVX512_Shuffle4x128, + simdBaseType, simdSize); + } #elif defined(TARGET_ARM64) NamedIntrinsic intrinsic = upper ? NI_AdvSimd_Arm64_ZipHigh : NI_AdvSimd_Arm64_ZipLow; return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseType, simdSize); @@ -27267,6 +27393,10 @@ GenTree* Compiler::gtNewSimdZipNode( #endif // !TARGET_XARCH && !TARGET_ARM64 #if defined(TARGET_XARCH) + // var lower = Zip(left, right, upper: false); + // var upper = Zip(left, right, upper: true); + // return lower.ToVectorUnsafe().WithUpper(upper); + var_types halfType = getSIMDTypeForSize(simdSize / 2); GenTree* left = upper ? gtNewSimdGetUpperNode(halfType, op1, simdBaseType, simdSize) : gtNewSimdGetLowerNode(halfType, op1, simdBaseType, simdSize); @@ -27318,11 +27448,24 @@ GenTree* Compiler::gtNewSimdUnzipNode( } #if defined(TARGET_ARM64) + // return odd ? AdvSimd.Arm64.UnzipOdd(op1, op2) : AdvSimd.Arm64.UnzipEven(op1, op2); + NamedIntrinsic intrinsic = odd ? NI_AdvSimd_Arm64_UnzipOdd : NI_AdvSimd_Arm64_UnzipEven; return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseType, simdSize); #elif defined(TARGET_XARCH) if (simdSize == 16) { + if ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)) + { + // return Sse.Shuffle(op1.AsSingle(), op2.AsSingle(), odd ? 0xDD : 0x88).As(); + + return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(odd ? 0xDD : 0x88), NI_X86Base_Shuffle, + TYP_FLOAT, simdSize); + } + + // var tmp = op1.ToVectorUnsafe().WithUpper(op2); + // return Shuffle(tmp, indices).GetLower(); + unsigned wideSimdSize = simdSize * 2; var_types wideType = getSIMDTypeForSize(wideSimdSize); GenTreeVecCon* shuffle = gtNewVconNode(wideType); @@ -27379,6 +27522,58 @@ GenTree* Compiler::gtNewSimdUnzipNode( return gtNewSimdGetLowerNode(type, result, simdBaseType, wideSimdSize); } + if ((simdSize == 32) && ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)) && + compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + // var left = Shuffle(op1, indices); + // var right = Shuffle(op2, indices); + // return left.GetLower().ToVector256Unsafe().WithUpper(right.GetLower()); + + GenTreeVecCon* shuffle = gtNewVconNode(type); + uint32_t start = odd ? 1 : 0; + + for (uint32_t index = 0; index < simdCount; index++) + { + shuffle->gtSimdVal.u32[index] = start + (2 * (index % (simdCount / 2))); + } + + assert(IsValidForShuffle(shuffle, simdSize, simdBaseType, nullptr, false)); + + GenTree* leftShuffle = shuffle; + GenTree* rightShuffle = gtCloneExpr(shuffle); + GenTree* left = gtNewSimdShuffleNode(type, op1, leftShuffle, simdBaseType, simdSize, false); + GenTree* right = gtNewSimdShuffleNode(type, op2, rightShuffle, simdBaseType, simdSize, false); + + var_types halfType = getSIMDTypeForSize(simdSize / 2); + GenTree* lower = gtNewSimdGetLowerNode(halfType, left, simdBaseType, simdSize); + GenTree* upper = gtNewSimdGetLowerNode(halfType, right, simdBaseType, simdSize); + + GenTree* result = + gtNewSimdHWIntrinsicNode(type, lower, NI_Vector128_ToVector256Unsafe, simdBaseType, simdSize / 2); + return gtNewSimdWithUpperNode(type, result, upper, simdBaseType, simdSize); + } + + if ((simdSize == 64) && ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)) && + compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + // return Avx512F.PermuteVar16x32x2(op1, indices, op2); + + GenTreeVecCon* shuffle = gtNewVconNode(type); + uint32_t start = odd ? 1 : 0; + + for (uint32_t index = 0; index < simdCount; index++) + { + shuffle->gtSimdVal.u32[index] = + (index < (simdCount / 2)) ? start + (2 * index) : simdCount + start + (2 * (index - (simdCount / 2))); + } + + return gtNewSimdHWIntrinsicNode(type, op1, shuffle, op2, NI_AVX512_PermuteVar16x32x2, simdBaseType, simdSize); + } + + // var lower = Unzip(op1.GetLower(), op1.GetUpper()); + // var upper = Unzip(op2.GetLower(), op2.GetUpper()); + // return lower.ToVectorUnsafe().WithUpper(upper); + GenTree* op1Dup = fgMakeMultiUse(&op1); GenTree* op2Dup = fgMakeMultiUse(&op2); @@ -27426,6 +27621,31 @@ GenTree* Compiler::gtNewSimdReverseNode(var_types type, GenTree* op1, var_types return op1; } +#if defined(TARGET_XARCH) + if ((simdSize == 32) && ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)) && + compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + // var tmp = Avx2.Shuffle(op1, SHUFFLE_XYZW); + // return Avx2.Permute2x128(tmp, tmp, 1); + + GenTree* reverseInLane = + gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(SHUFFLE_XYZW), NI_AVX2_Shuffle, simdBaseType, simdSize); + GenTree* reverseInLaneDup = fgMakeMultiUse(&reverseInLane); + + return gtNewSimdHWIntrinsicNode(type, reverseInLane, reverseInLaneDup, gtNewIconNode(1), NI_AVX2_Permute2x128, + simdBaseType, simdSize); + } +#elif defined(TARGET_ARM64) + if ((simdSize == 8) && ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT) || (simdBaseType == TYP_FLOAT))) + { + // return AdvSimd.ReverseElement32(op1.AsInt64()).As(); + + return gtNewSimdHWIntrinsicNode(type, op1, NI_AdvSimd_ReverseElement32, TYP_LONG, simdSize); + } +#endif // TARGET_XARCH || TARGET_ARM64 + + // return Shuffle(op1, indices); + GenTreeVecCon* shuffle = gtNewVconNode(type); for (uint32_t index = 0; index < simdCount; index++) From bbb7f8438ea2d1dea084782377f765b5f64dd215 Mon Sep 17 00:00:00 2001 From: Steven He Date: Sun, 3 May 2026 23:41:20 +0900 Subject: [PATCH 10/24] Make the git anchor happy --- src/coreclr/jit/gentree.cpp | 2928 +++++++++++++++++------------------ 1 file changed, 1464 insertions(+), 1464 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index e2e10b267903e5..53a2240a3b2b11 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -24520,46 +24520,221 @@ GenTree* Compiler::gtNewSimdCreateSequenceNode( return result; } +GenTree* Compiler::gtNewSimdDotProdNode( + var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) +{ + var_types simdType = getSIMDTypeForSize(simdSize); + assert(varTypeIsSIMD(simdType)); + + assert(op1 != nullptr); + assert(op1->TypeIs(simdType)); + + assert(op2 != nullptr); + assert(op2->TypeIs(simdType)); + + assert(varTypeIsSIMD(type)); + + NamedIntrinsic intrinsic = NI_Illegal; + +#if defined(TARGET_XARCH) + assert(!varTypeIsByte(simdBaseType) && !varTypeIsLong(simdBaseType)); + assert(simdSize != 64); + + if (simdSize == 32) + { + assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2)); + intrinsic = NI_Vector256_Dot; + } + else + { + intrinsic = NI_Vector128_Dot; + } +#elif defined(TARGET_ARM64) + assert(!varTypeIsLong(simdBaseType)); + intrinsic = (simdSize == 8) ? NI_Vector64_Dot : NI_Vector128_Dot; +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + + assert(intrinsic != NI_Illegal); + return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseType, simdSize); +} + +GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + + assert(op1 != nullptr); + assert(op1->TypeIs(type)); + + assert(varTypeIsFloating(simdBaseType)); + + NamedIntrinsic intrinsic = NI_Illegal; + +#if defined(TARGET_XARCH) + if (simdSize == 32) + { + intrinsic = NI_AVX_Floor; + } + else if (simdSize == 64) + { + GenTree* op2 = gtNewIconNode(static_cast(FloatRoundingMode::ToNegativeInfinity)); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_RoundScale, simdBaseType, simdSize); + } + else + { + intrinsic = NI_X86Base_Floor; + } +#elif defined(TARGET_ARM64) + if (simdBaseType == TYP_DOUBLE) + { + intrinsic = (simdSize == 8) ? NI_AdvSimd_FloorScalar : NI_AdvSimd_Arm64_Floor; + } + else + { + intrinsic = NI_AdvSimd_Floor; + } +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + + assert(intrinsic != NI_Illegal); + return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseType, simdSize); +} + +GenTree* Compiler::gtNewSimdFmaNode( + var_types type, GenTree* op1, GenTree* op2, GenTree* op3, var_types simdBaseType, unsigned simdSize) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + + assert(op1 != nullptr); + assert(op1->TypeIs(type)); + + assert(op2 != nullptr); + assert(op2->TypeIs(type)); + + assert(op3 != nullptr); + assert(op3->TypeIs(type)); + + assert(varTypeIsFloating(simdBaseType)); + + NamedIntrinsic intrinsic = NI_Illegal; + +#if defined(TARGET_XARCH) + if (simdSize == 64) + { + intrinsic = NI_AVX512_FusedMultiplyAdd; + } + else + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); + intrinsic = NI_AVX2_MultiplyAdd; + } +#elif defined(TARGET_ARM64) + if (simdBaseType == TYP_DOUBLE) + { + intrinsic = (simdSize == 8) ? NI_AdvSimd_FusedMultiplyAddScalar : NI_AdvSimd_Arm64_FusedMultiplyAdd; + } + else + { + intrinsic = NI_AdvSimd_FusedMultiplyAdd; + } + + // AdvSimd.FusedMultiplyAdd expects (addend, left, right), while the APIs take (left, right, addend) + // We expect op1 and op2 to have already been spilled + + std::swap(op1, op3); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + + assert(intrinsic != NI_Illegal); + return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseType, simdSize); +} + +GenTree* Compiler::gtNewSimdGetElementNode( + var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) +{ + NamedIntrinsic intrinsicId = NI_Vector128_GetElement; + + assert(varTypeIsArithmetic(simdBaseType)); + +#if defined(TARGET_XARCH) + if (op2->IsIntegralConst(0)) + { + return gtNewSimdToScalarNode(type, op1, simdBaseType, simdSize); + } + + if (simdSize == 64) + { + intrinsicId = NI_Vector512_GetElement; + } + else if (simdSize == 32) + { + intrinsicId = NI_Vector256_GetElement; + } +#elif defined(TARGET_ARM64) + if (op2->IsIntegralConst(0)) + { + return gtNewSimdToScalarNode(type, op1, simdBaseType, simdSize); + } + + if (simdSize == 8) + { + intrinsicId = NI_Vector64_GetElement; + } +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + + int immUpperBound = getSIMDVectorLength(simdSize, simdBaseType) - 1; + bool rangeCheckNeeded = !op2->OperIsConst(); + + if (!rangeCheckNeeded) + { + ssize_t imm8 = op2->AsIntCon()->IconValue(); + rangeCheckNeeded = (imm8 < 0) || (imm8 > immUpperBound); + } + + if (rangeCheckNeeded) + { + op2 = addRangeCheckForHWIntrinsic(op2, 0, immUpperBound); + } + + return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsicId, simdBaseType, simdSize); +} + //---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdCreateGeometricSequenceNode: Creates a new simd CreateGeometricSequence node +// Compiler::gtNewSimdGetIndicesNode: Creates a new simd get_Indices node // // Arguments: // type - The return type of SIMD node being created -// op1 - The initial value -// op2 - The multiplier value // simdBaseType - The base type of SIMD type of the intrinsic // simdSize - The size of the SIMD type of the intrinsic // // Returns: -// The created CreateGeometricSequence node +// The created get_Indices node // -GenTree* Compiler::gtNewSimdCreateGeometricSequenceNode( - var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) +GenTree* Compiler::gtNewSimdGetIndicesNode(var_types type, var_types simdBaseType, unsigned simdSize) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); - assert(varTypeIsArithmetic(simdBaseType)); - assert(op2->OperIsConst()); - // op2 is expected to be constant. When op1 is also constant the whole sequence can be folded - // to a constant; otherwise build the constant multiplier vector and leave one broadcast+multiply. + assert(varTypeIsArithmetic(simdBaseType)); - GenTreeVecCon* vecCon = gtNewVconNode(type); - uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); - bool isPartial = !op1->OperIsConst(); + GenTreeVecCon* indices = gtNewVconNode(type); + uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType); switch (simdBaseType) { case TYP_BYTE: case TYP_UBYTE: { - uint64_t initial = isPartial ? 1 : static_cast(op1->AsIntConCommon()->IntegralValue()); - uint64_t multiplier = static_cast(op2->AsIntConCommon()->IntegralValue()); - - for (uint32_t index = 0; index < simdCount; index++) + for (uint32_t index = 0; index < simdLength; index++) { - vecCon->gtSimdVal.u8[index] = static_cast(initial); - initial *= multiplier; + indices->gtSimdVal.u8[index] = static_cast(index); } break; } @@ -24567,13 +24742,9 @@ GenTree* Compiler::gtNewSimdCreateGeometricSequenceNode( case TYP_SHORT: case TYP_USHORT: { - uint64_t initial = isPartial ? 1 : static_cast(op1->AsIntConCommon()->IntegralValue()); - uint64_t multiplier = static_cast(op2->AsIntConCommon()->IntegralValue()); - - for (uint32_t index = 0; index < simdCount; index++) + for (uint32_t index = 0; index < simdLength; index++) { - vecCon->gtSimdVal.u16[index] = static_cast(initial); - initial *= multiplier; + indices->gtSimdVal.u16[index] = static_cast(index); } break; } @@ -24581,13 +24752,9 @@ GenTree* Compiler::gtNewSimdCreateGeometricSequenceNode( case TYP_INT: case TYP_UINT: { - uint64_t initial = isPartial ? 1 : static_cast(op1->AsIntConCommon()->IntegralValue()); - uint64_t multiplier = static_cast(op2->AsIntConCommon()->IntegralValue()); - - for (uint32_t index = 0; index < simdCount; index++) + for (uint32_t index = 0; index < simdLength; index++) { - vecCon->gtSimdVal.u32[index] = static_cast(initial); - initial *= multiplier; + indices->gtSimdVal.u32[index] = static_cast(index); } break; } @@ -24595,39 +24762,27 @@ GenTree* Compiler::gtNewSimdCreateGeometricSequenceNode( case TYP_LONG: case TYP_ULONG: { - uint64_t initial = isPartial ? 1 : static_cast(op1->AsIntConCommon()->IntegralValue()); - uint64_t multiplier = static_cast(op2->AsIntConCommon()->IntegralValue()); - - for (uint32_t index = 0; index < simdCount; index++) + for (uint32_t index = 0; index < simdLength; index++) { - vecCon->gtSimdVal.u64[index] = initial; - initial *= multiplier; + indices->gtSimdVal.u64[index] = static_cast(index); } break; } case TYP_FLOAT: { - float initial = isPartial ? 1.0f : static_cast(op1->AsDblCon()->DconValue()); - float multiplier = static_cast(op2->AsDblCon()->DconValue()); - - for (uint32_t index = 0; index < simdCount; index++) + for (uint32_t index = 0; index < simdLength; index++) { - vecCon->gtSimdVal.f32[index] = initial; - initial *= multiplier; + indices->gtSimdVal.f32[index] = static_cast(index); } break; } case TYP_DOUBLE: { - double initial = isPartial ? 1.0 : op1->AsDblCon()->DconValue(); - double multiplier = op2->AsDblCon()->DconValue(); - - for (uint32_t index = 0; index < simdCount; index++) + for (uint32_t index = 0; index < simdLength; index++) { - vecCon->gtSimdVal.f64[index] = initial; - initial *= multiplier; + indices->gtSimdVal.f64[index] = static_cast(index); } break; } @@ -24638,299 +24793,251 @@ GenTree* Compiler::gtNewSimdCreateGeometricSequenceNode( } } - GenTree* result = vecCon; + return indices; +} - if (isPartial) +GenTree* Compiler::gtNewSimdGetLowerNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +{ + assert(varTypeIsArithmetic(simdBaseType)); + + NamedIntrinsic intrinsicId = NI_Illegal; + +#if defined(TARGET_XARCH) + if (simdSize == 32) { - GenTree* initial = gtNewSimdCreateBroadcastNode(type, op1, simdBaseType, simdSize); - result = gtNewSimdBinOpNode(GT_MUL, type, result, initial, simdBaseType, simdSize); + assert(type == TYP_SIMD16); + intrinsicId = NI_Vector256_GetLower; + } + else + { + assert((type == TYP_SIMD32) && (simdSize == 64)); + intrinsicId = NI_Vector512_GetLower; } +#elif defined(TARGET_ARM64) + assert((type == TYP_SIMD8) && (simdSize == 16)); + intrinsicId = NI_Vector128_GetLower; +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 - return result; + assert(intrinsicId != NI_Illegal); + return gtNewSimdHWIntrinsicNode(type, op1, intrinsicId, simdBaseType, simdSize); +} + +GenTree* Compiler::gtNewSimdGetUpperNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +{ + assert(varTypeIsArithmetic(simdBaseType)); + + NamedIntrinsic intrinsicId = NI_Illegal; + +#if defined(TARGET_XARCH) + if (simdSize == 32) + { + assert(type == TYP_SIMD16); + intrinsicId = NI_Vector256_GetUpper; + } + else + { + assert((type == TYP_SIMD32) && (simdSize == 64)); + intrinsicId = NI_Vector512_GetUpper; + } +#elif defined(TARGET_ARM64) + assert((type == TYP_SIMD8) && (simdSize == 16)); + intrinsicId = NI_Vector128_GetUpper; +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + + assert(intrinsicId != NI_Illegal); + return gtNewSimdHWIntrinsicNode(type, op1, intrinsicId, simdBaseType, simdSize); } //---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdCreateAlternatingSequenceNode: Creates a new simd CreateAlternatingSequence node +// Compiler::gtNewSimdIsEvenIntegerNode: Creates a new simd IsEvenInteger node // // Arguments: // type - The return type of SIMD node being created -// op1 - The even-indexed value -// op2 - The odd-indexed value +// op1 - The vector to check for even integers // simdBaseType - The base type of SIMD type of the intrinsic // simdSize - The size of the SIMD type of the intrinsic // // Returns: -// The created CreateAlternatingSequence node +// The created IsEvenInteger node // -GenTree* Compiler::gtNewSimdCreateAlternatingSequenceNode( - var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) +GenTree* Compiler::gtNewSimdIsEvenIntegerNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); - assert(varTypeIsArithmetic(simdBaseType)); - - // Fold constant pairs directly. Otherwise build two broadcasts and zip them, except where - // the target has a better way to broadcast the two-lane pattern directly. - - uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); - - if (simdCount == 1) - { - GenTree* result = gtNewSimdCreateBroadcastNode(type, op1, simdBaseType, simdSize); - return gtWrapWithSideEffects(result, op2, GTF_ALL_EFFECT); - } - - if (op1->OperIsConst() && op2->OperIsConst()) - { - GenTreeVecCon* vecCon = gtNewVconNode(type); - - switch (simdBaseType) - { - case TYP_BYTE: - case TYP_UBYTE: - { - uint64_t even = static_cast(op1->AsIntConCommon()->IntegralValue()); - uint64_t odd = static_cast(op2->AsIntConCommon()->IntegralValue()); - - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.u8[index] = static_cast(((index & 1) == 0) ? even : odd); - } - break; - } - - case TYP_SHORT: - case TYP_USHORT: - { - uint64_t even = static_cast(op1->AsIntConCommon()->IntegralValue()); - uint64_t odd = static_cast(op2->AsIntConCommon()->IntegralValue()); - - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.u16[index] = static_cast(((index & 1) == 0) ? even : odd); - } - break; - } - - case TYP_INT: - case TYP_UINT: - { - uint64_t even = static_cast(op1->AsIntConCommon()->IntegralValue()); - uint64_t odd = static_cast(op2->AsIntConCommon()->IntegralValue()); - - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.u32[index] = static_cast(((index & 1) == 0) ? even : odd); - } - break; - } - - case TYP_LONG: - case TYP_ULONG: - { - uint64_t even = static_cast(op1->AsIntConCommon()->IntegralValue()); - uint64_t odd = static_cast(op2->AsIntConCommon()->IntegralValue()); - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.u64[index] = ((index & 1) == 0) ? even : odd; - } - break; - } - - case TYP_FLOAT: - { - double even = op1->AsDblCon()->DconValue(); - double odd = op2->AsDblCon()->DconValue(); + assert(op1 != nullptr); + assert(op1->TypeIs(type)); - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.f32[index] = static_cast(((index & 1) == 0) ? even : odd); - } - break; - } + assert(varTypeIsIntegral(simdBaseType)); - case TYP_DOUBLE: - { - double even = op1->AsDblCon()->DconValue(); - double odd = op2->AsDblCon()->DconValue(); + op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseType, simdSize); + return gtNewSimdIsZeroNode(type, op1, simdBaseType, simdSize); +} - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.f64[index] = ((index & 1) == 0) ? even : odd; - } - break; - } +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdIsFiniteNode: Creates a new simd IsFinite node +// +// Arguments: +// type - The return type of SIMD node being created +// op1 - The vector to check for finite values +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// +// Returns: +// The created IsFinite node +// +GenTree* Compiler::gtNewSimdIsFiniteNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); - default: - { - unreached(); - } - } + assert(op1 != nullptr); + assert(op1->TypeIs(type)); - return vecCon; - } + assert(varTypeIsArithmetic(simdBaseType)); -#if defined(TARGET_XARCH) - if (((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)) && - ((compOpportunisticallyDependsOn(InstructionSet_AVX2) && ((simdSize == 16) || (simdSize == 32))) || - (compOpportunisticallyDependsOn(InstructionSet_AVX512) && (simdSize == 64)))) + if (varTypeIsFloating(simdBaseType)) { - // var pattern = Vector128.CreateScalarUnsafe(op1).WithElement(1, op2); - // return Broadcast(pattern.AsInt64()).As(); - - GenTree* pattern = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, simdBaseType, 16); - pattern = gtNewSimdWithElementNode(TYP_SIMD16, pattern, gtNewIconNode(1), op2, simdBaseType, 16); + GenTree* cnsNode; - if (simdSize == 64) + if (simdBaseType == TYP_FLOAT) { - return gtNewSimdHWIntrinsicNode(type, pattern, NI_AVX512_BroadcastPairScalarToVector512, simdBaseType, - simdSize); + simdBaseType = TYP_INT; + cnsNode = gtNewIconNode(0x7F800000); } + else + { + assert(simdBaseType == TYP_DOUBLE); - var_types broadcastBaseType = (simdBaseType == TYP_INT) ? TYP_LONG : TYP_ULONG; - NamedIntrinsic broadcast = - (simdSize == 16) ? NI_AVX2_BroadcastScalarToVector128 : NI_AVX2_BroadcastScalarToVector256; - return gtNewSimdHWIntrinsicNode(type, pattern, broadcast, broadcastBaseType, simdSize); - } -#endif // TARGET_XARCH + simdBaseType = TYP_LONG; + cnsNode = gtNewLconNode(0x7FF0000000000000); + } + cnsNode = gtNewSimdCreateBroadcastNode(type, cnsNode, simdBaseType, simdSize); - GenTree* even = gtNewSimdCreateBroadcastNode(type, op1, simdBaseType, simdSize); - GenTree* odd = gtNewSimdCreateBroadcastNode(type, op2, simdBaseType, simdSize); + op1 = gtNewSimdBinOpNode(GT_AND_NOT, type, cnsNode, op1, simdBaseType, simdSize); + return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseType, simdSize); + } - return gtNewSimdZipNode(type, even, odd, simdBaseType, simdSize, false); + assert(varTypeIsIntegral(simdBaseType)); + return gtNewAllBitsSetConNode(type); } //---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdGetSignSequenceNode: Creates a new simd SignSequence node +// Compiler::gtNewSimdIsInfinityNode: Creates a new simd IsInfinity node // // Arguments: // type - The return type of SIMD node being created +// op1 - The vector to check for infinities // simdBaseType - The base type of SIMD type of the intrinsic // simdSize - The size of the SIMD type of the intrinsic // // Returns: -// The created SignSequence node +// The created IsInfinity node // -GenTree* Compiler::gtNewSimdGetSignSequenceNode(var_types type, var_types simdBaseType, unsigned simdSize) +GenTree* Compiler::gtNewSimdIsInfinityNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); - assert(varTypeIsArithmetic(simdBaseType)); - GenTreeVecCon* vecCon = gtNewVconNode(type); - uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); + assert(op1 != nullptr); + assert(op1->TypeIs(type)); - switch (simdBaseType) + assert(varTypeIsArithmetic(simdBaseType)); + + if (varTypeIsFloating(simdBaseType)) { - case TYP_BYTE: - case TYP_UBYTE: - { - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.u8[index] = ((index & 1) == 0) ? 1 : UINT8_MAX; - } - break; - } + op1 = gtNewSimdAbsNode(type, op1, simdBaseType, simdSize); + return gtNewSimdIsPositiveInfinityNode(type, op1, simdBaseType, simdSize); + } + return gtNewZeroConNode(type); +} - case TYP_SHORT: - case TYP_USHORT: - { - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.u16[index] = ((index & 1) == 0) ? 1 : UINT16_MAX; - } - break; - } +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdIsIntegerNode: Creates a new simd IsInteger node +// +// Arguments: +// type - The return type of SIMD node being created +// op1 - The vector to check for integers +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// +// Returns: +// The created IsInteger node +// +GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); - case TYP_INT: - case TYP_UINT: - { - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.u32[index] = ((index & 1) == 0) ? 1 : UINT32_MAX; - } - break; - } + assert(op1 != nullptr); + assert(op1->TypeIs(type)); - case TYP_LONG: - case TYP_ULONG: - { - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.u64[index] = ((index & 1) == 0) ? 1 : UINT64_MAX; - } - break; - } + assert(varTypeIsArithmetic(simdBaseType)); - case TYP_FLOAT: - { - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.f32[index] = ((index & 1) == 0) ? 1.0f : -1.0f; - } - break; - } + if (varTypeIsFloating(simdBaseType)) + { + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + GenTree* op1Dup2 = gtCloneExpr(op1Dup1); - case TYP_DOUBLE: - { - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.f64[index] = ((index & 1) == 0) ? 1.0 : -1.0; - } - break; - } + op1 = gtNewSimdIsFiniteNode(type, op1, simdBaseType, simdSize); - default: - { - unreached(); - } + op1Dup1 = gtNewSimdTruncNode(type, op1Dup1, simdBaseType, simdSize); + GenTree* op2 = gtNewSimdCmpOpNode(GT_EQ, type, op1Dup1, op1Dup2, simdBaseType, simdSize); + + return gtNewSimdBinOpNode(GT_AND, type, op1, op2, simdBaseType, simdSize); } - return vecCon; + assert(varTypeIsIntegral(simdBaseType)); + return gtNewAllBitsSetConNode(type); } -GenTree* Compiler::gtNewSimdDotProdNode( - var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdIsNaNNode: Creates a new simd IsNaN node +// +// Arguments: +// type - The return type of SIMD node being created +// op1 - The vector to check for NaNs +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// +// Returns: +// The created IsNaN node +// +GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { - var_types simdType = getSIMDTypeForSize(simdSize); - assert(varTypeIsSIMD(simdType)); - - assert(op1 != nullptr); - assert(op1->TypeIs(simdType)); - - assert(op2 != nullptr); - assert(op2->TypeIs(simdType)); - assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); - NamedIntrinsic intrinsic = NI_Illegal; + assert(op1 != nullptr); + assert(op1->TypeIs(type)); -#if defined(TARGET_XARCH) - assert(!varTypeIsByte(simdBaseType) && !varTypeIsLong(simdBaseType)); - assert(simdSize != 64); + assert(varTypeIsArithmetic(simdBaseType)); - if (simdSize == 32) - { - assert(varTypeIsFloating(simdBaseType) || compIsaSupportedDebugOnly(InstructionSet_AVX2)); - intrinsic = NI_Vector256_Dot; - } - else + if (varTypeIsFloating(simdBaseType)) { - intrinsic = NI_Vector128_Dot; + GenTree* op1Dup = fgMakeMultiUse(&op1); + return gtNewSimdCmpOpNode(GT_NE, type, op1, op1Dup, simdBaseType, simdSize); } -#elif defined(TARGET_ARM64) - assert(!varTypeIsLong(simdBaseType)); - intrinsic = (simdSize == 8) ? NI_Vector64_Dot : NI_Vector128_Dot; -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 - - assert(intrinsic != NI_Illegal); - return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseType, simdSize); + return gtNewZeroConNode(type); } -GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdIsNegativeNode: Creates a new simd IsNegative node +// +// Arguments: +// type - The return type of SIMD node being created +// op1 - The vector to check for negatives +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// +// Returns: +// The created IsNegative node +// +GenTree* Compiler::gtNewSimdIsNegativeNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24938,43 +25045,40 @@ GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, var_types si assert(op1 != nullptr); assert(op1->TypeIs(type)); - assert(varTypeIsFloating(simdBaseType)); - - NamedIntrinsic intrinsic = NI_Illegal; - -#if defined(TARGET_XARCH) - if (simdSize == 32) - { - intrinsic = NI_AVX_Floor; - } - else if (simdSize == 64) - { - GenTree* op2 = gtNewIconNode(static_cast(FloatRoundingMode::ToNegativeInfinity)); - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_RoundScale, simdBaseType, simdSize); - } - else + if (simdBaseType == TYP_FLOAT) { - intrinsic = NI_X86Base_Floor; + simdBaseType = TYP_INT; } -#elif defined(TARGET_ARM64) - if (simdBaseType == TYP_DOUBLE) + else if (simdBaseType == TYP_DOUBLE) { - intrinsic = (simdSize == 8) ? NI_AdvSimd_FloorScalar : NI_AdvSimd_Arm64_Floor; + simdBaseType = TYP_LONG; } - else + + assert(varTypeIsIntegral(simdBaseType)); + + if (varTypeIsUnsigned(simdBaseType)) { - intrinsic = NI_AdvSimd_Floor; + return gtNewZeroConNode(type); } -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 - - assert(intrinsic != NI_Illegal); - return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseType, simdSize); + return gtNewSimdCmpOpNode(GT_LT, type, op1, gtNewZeroConNode(type), simdBaseType, simdSize); } -GenTree* Compiler::gtNewSimdFmaNode( - var_types type, GenTree* op1, GenTree* op2, GenTree* op3, var_types simdBaseType, unsigned simdSize) +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdIsNegativeInfinityNode: Creates a new simd IsNegativeInfinity node +// +// Arguments: +// type - The return type of SIMD node being created +// op1 - The vector to check for negative infinities +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// +// Returns: +// The created IsNegativeInfinity node +// +GenTree* Compiler::gtNewSimdIsNegativeInfinityNode(var_types type, + GenTree* op1, + var_types simdBaseType, + unsigned simdSize) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -24982,259 +25086,127 @@ GenTree* Compiler::gtNewSimdFmaNode( assert(op1 != nullptr); assert(op1->TypeIs(type)); - assert(op2 != nullptr); - assert(op2->TypeIs(type)); - - assert(op3 != nullptr); - assert(op3->TypeIs(type)); - - assert(varTypeIsFloating(simdBaseType)); - - NamedIntrinsic intrinsic = NI_Illegal; + assert(varTypeIsArithmetic(simdBaseType)); -#if defined(TARGET_XARCH) - if (simdSize == 64) - { - intrinsic = NI_AVX512_FusedMultiplyAdd; - } - else - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); - intrinsic = NI_AVX2_MultiplyAdd; - } -#elif defined(TARGET_ARM64) - if (simdBaseType == TYP_DOUBLE) - { - intrinsic = (simdSize == 8) ? NI_AdvSimd_FusedMultiplyAddScalar : NI_AdvSimd_Arm64_FusedMultiplyAdd; - } - else + if (varTypeIsFloating(simdBaseType)) { - intrinsic = NI_AdvSimd_FusedMultiplyAdd; - } + GenTree* cnsNode; - // AdvSimd.FusedMultiplyAdd expects (addend, left, right), while the APIs take (left, right, addend) - // We expect op1 and op2 to have already been spilled + if (simdBaseType == TYP_FLOAT) + { + simdBaseType = TYP_UINT; + cnsNode = gtNewIconNode(0xFF800000); + } + else + { + assert(simdBaseType == TYP_DOUBLE); - std::swap(op1, op3); -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 + simdBaseType = TYP_ULONG; + cnsNode = gtNewLconNode(0xFFF0000000000000); + } + cnsNode = gtNewSimdCreateBroadcastNode(type, cnsNode, simdBaseType, simdSize); - assert(intrinsic != NI_Illegal); - return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, intrinsic, simdBaseType, simdSize); -} - -GenTree* Compiler::gtNewSimdGetElementNode( - var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) -{ - NamedIntrinsic intrinsicId = NI_Vector128_GetElement; - - assert(varTypeIsArithmetic(simdBaseType)); - -#if defined(TARGET_XARCH) - if (op2->IsIntegralConst(0)) - { - return gtNewSimdToScalarNode(type, op1, simdBaseType, simdSize); - } - - if (simdSize == 64) - { - intrinsicId = NI_Vector512_GetElement; - } - else if (simdSize == 32) - { - intrinsicId = NI_Vector256_GetElement; - } -#elif defined(TARGET_ARM64) - if (op2->IsIntegralConst(0)) - { - return gtNewSimdToScalarNode(type, op1, simdBaseType, simdSize); - } - - if (simdSize == 8) - { - intrinsicId = NI_Vector64_GetElement; - } -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 - - int immUpperBound = getSIMDVectorLength(simdSize, simdBaseType) - 1; - bool rangeCheckNeeded = !op2->OperIsConst(); - - if (!rangeCheckNeeded) - { - ssize_t imm8 = op2->AsIntCon()->IconValue(); - rangeCheckNeeded = (imm8 < 0) || (imm8 > immUpperBound); - } - - if (rangeCheckNeeded) - { - op2 = addRangeCheckForHWIntrinsic(op2, 0, immUpperBound); - } - - return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsicId, simdBaseType, simdSize); + return gtNewSimdCmpOpNode(GT_EQ, type, op1, cnsNode, simdBaseType, simdSize); + } + return gtNewZeroConNode(type); } //---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdGetIndicesNode: Creates a new simd get_Indices node +// Compiler::gtNewSimdIsNormalNode: Creates a new simd IsNormal node // // Arguments: // type - The return type of SIMD node being created +// op1 - The vector to check for normal values // simdBaseType - The base type of SIMD type of the intrinsic // simdSize - The size of the SIMD type of the intrinsic // // Returns: -// The created get_Indices node +// The created IsNormal node // -GenTree* Compiler::gtNewSimdGetIndicesNode(var_types type, var_types simdBaseType, unsigned simdSize) +GenTree* Compiler::gtNewSimdIsNormalNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); - assert(varTypeIsArithmetic(simdBaseType)); + assert(op1 != nullptr); + assert(op1->TypeIs(type)); - GenTreeVecCon* indices = gtNewVconNode(type); - uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType); + assert(varTypeIsArithmetic(simdBaseType)); - switch (simdBaseType) + if (varTypeIsFloating(simdBaseType)) { - case TYP_BYTE: - case TYP_UBYTE: - { - for (uint32_t index = 0; index < simdLength; index++) - { - indices->gtSimdVal.u8[index] = static_cast(index); - } - break; - } + op1 = gtNewSimdAbsNode(type, op1, simdBaseType, simdSize); - case TYP_SHORT: - case TYP_USHORT: - { - for (uint32_t index = 0; index < simdLength; index++) - { - indices->gtSimdVal.u16[index] = static_cast(index); - } - break; - } + GenTree* cnsNode1; + GenTree* cnsNode2; - case TYP_INT: - case TYP_UINT: + if (simdBaseType == TYP_FLOAT) { - for (uint32_t index = 0; index < simdLength; index++) - { - indices->gtSimdVal.u32[index] = static_cast(index); - } - break; - } + simdBaseType = TYP_UINT; - case TYP_LONG: - case TYP_ULONG: - { - for (uint32_t index = 0; index < simdLength; index++) - { - indices->gtSimdVal.u64[index] = static_cast(index); - } - break; + cnsNode1 = gtNewIconNode(0x00800000); + cnsNode2 = gtNewIconNode(0x7F800000 - 0x00800000); } - - case TYP_FLOAT: + else { - for (uint32_t index = 0; index < simdLength; index++) - { - indices->gtSimdVal.f32[index] = static_cast(index); - } - break; - } + assert(simdBaseType == TYP_DOUBLE); - case TYP_DOUBLE: - { - for (uint32_t index = 0; index < simdLength; index++) - { - indices->gtSimdVal.f64[index] = static_cast(index); - } - break; - } + simdBaseType = TYP_ULONG; - default: - { - unreached(); + cnsNode1 = gtNewLconNode(0x0010000000000000); + cnsNode2 = gtNewLconNode(0x7FF0000000000000 - 0x0010000000000000); } - } - return indices; -} - -GenTree* Compiler::gtNewSimdGetLowerNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) -{ - assert(varTypeIsArithmetic(simdBaseType)); - - NamedIntrinsic intrinsicId = NI_Illegal; + cnsNode1 = gtNewSimdCreateBroadcastNode(type, cnsNode1, simdBaseType, simdSize); + cnsNode2 = gtNewSimdCreateBroadcastNode(type, cnsNode2, simdBaseType, simdSize); -#if defined(TARGET_XARCH) - if (simdSize == 32) - { - assert(type == TYP_SIMD16); - intrinsicId = NI_Vector256_GetLower; - } - else - { - assert((type == TYP_SIMD32) && (simdSize == 64)); - intrinsicId = NI_Vector512_GetLower; + op1 = gtNewSimdBinOpNode(GT_SUB, type, op1, cnsNode1, simdBaseType, simdSize); + return gtNewSimdCmpOpNode(GT_LT, type, op1, cnsNode2, simdBaseType, simdSize); } -#elif defined(TARGET_ARM64) - assert((type == TYP_SIMD8) && (simdSize == 16)); - intrinsicId = NI_Vector128_GetLower; -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 - assert(intrinsicId != NI_Illegal); - return gtNewSimdHWIntrinsicNode(type, op1, intrinsicId, simdBaseType, simdSize); + assert(varTypeIsIntegral(simdBaseType)); + return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseType, simdSize); } -GenTree* Compiler::gtNewSimdGetUpperNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdIsOddIntegerNode: Creates a new simd IsOddInteger node +// +// Arguments: +// type - The return type of SIMD node being created +// op1 - The vector to check for odd integers +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// +// Returns: +// The created IsOddInteger node +// +GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { - assert(varTypeIsArithmetic(simdBaseType)); + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); - NamedIntrinsic intrinsicId = NI_Illegal; + assert(op1 != nullptr); + assert(op1->TypeIs(type)); -#if defined(TARGET_XARCH) - if (simdSize == 32) - { - assert(type == TYP_SIMD16); - intrinsicId = NI_Vector256_GetUpper; - } - else - { - assert((type == TYP_SIMD32) && (simdSize == 64)); - intrinsicId = NI_Vector512_GetUpper; - } -#elif defined(TARGET_ARM64) - assert((type == TYP_SIMD8) && (simdSize == 16)); - intrinsicId = NI_Vector128_GetUpper; -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 + assert(varTypeIsIntegral(simdBaseType)); - assert(intrinsicId != NI_Illegal); - return gtNewSimdHWIntrinsicNode(type, op1, intrinsicId, simdBaseType, simdSize); + op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseType, simdSize); + return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseType, simdSize); } //---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdIsEvenIntegerNode: Creates a new simd IsEvenInteger node +// Compiler::gtNewSimdIsPositiveNode: Creates a new simd IsPositive node // // Arguments: // type - The return type of SIMD node being created -// op1 - The vector to check for even integers +// op1 - The vector to check for positives // simdBaseType - The base type of SIMD type of the intrinsic // simdSize - The size of the SIMD type of the intrinsic // // Returns: -// The created IsEvenInteger node +// The created IsPositive node // -GenTree* Compiler::gtNewSimdIsEvenIntegerNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -25242,25 +25214,40 @@ GenTree* Compiler::gtNewSimdIsEvenIntegerNode(var_types type, GenTree* op1, var_ assert(op1 != nullptr); assert(op1->TypeIs(type)); + if (simdBaseType == TYP_FLOAT) + { + simdBaseType = TYP_INT; + } + else if (simdBaseType == TYP_DOUBLE) + { + simdBaseType = TYP_LONG; + } + assert(varTypeIsIntegral(simdBaseType)); - op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseType, simdSize); - return gtNewSimdIsZeroNode(type, op1, simdBaseType, simdSize); + if (varTypeIsUnsigned(simdBaseType)) + { + return gtNewAllBitsSetConNode(type); + } + return gtNewSimdCmpOpNode(GT_GE, type, op1, gtNewZeroConNode(type), simdBaseType, simdSize); } //---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdIsFiniteNode: Creates a new simd IsFinite node +// Compiler::gtNewSimdIsPositiveInfinityNode: Creates a new simd IsPositiveInfinity node // // Arguments: // type - The return type of SIMD node being created -// op1 - The vector to check for finite values +// op1 - The vector to check for positive infinities // simdBaseType - The base type of SIMD type of the intrinsic // simdSize - The size of the SIMD type of the intrinsic // // Returns: -// The created IsFinite node +// The created IsPositiveInfinity node // -GenTree* Compiler::gtNewSimdIsFiniteNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +GenTree* Compiler::gtNewSimdIsPositiveInfinityNode(var_types type, + GenTree* op1, + var_types simdBaseType, + unsigned simdSize) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -25276,39 +25263,36 @@ GenTree* Compiler::gtNewSimdIsFiniteNode(var_types type, GenTree* op1, var_types if (simdBaseType == TYP_FLOAT) { - simdBaseType = TYP_INT; + simdBaseType = TYP_UINT; cnsNode = gtNewIconNode(0x7F800000); } else { assert(simdBaseType == TYP_DOUBLE); - simdBaseType = TYP_LONG; + simdBaseType = TYP_ULONG; cnsNode = gtNewLconNode(0x7FF0000000000000); } cnsNode = gtNewSimdCreateBroadcastNode(type, cnsNode, simdBaseType, simdSize); - op1 = gtNewSimdBinOpNode(GT_AND_NOT, type, cnsNode, op1, simdBaseType, simdSize); - return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseType, simdSize); + return gtNewSimdCmpOpNode(GT_EQ, type, op1, cnsNode, simdBaseType, simdSize); } - - assert(varTypeIsIntegral(simdBaseType)); - return gtNewAllBitsSetConNode(type); + return gtNewZeroConNode(type); } //---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdIsInfinityNode: Creates a new simd IsInfinity node +// Compiler::gtNewSimdIsSubnormalNode: Creates a new simd IsSubnormal node // // Arguments: // type - The return type of SIMD node being created -// op1 - The vector to check for infinities +// op1 - The vector to check for subnormal values // simdBaseType - The base type of SIMD type of the intrinsic // simdSize - The size of the SIMD type of the intrinsic // // Returns: -// The created IsInfinity node +// The created IsSubnormal node // -GenTree* Compiler::gtNewSimdIsInfinityNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +GenTree* Compiler::gtNewSimdIsSubnormalNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -25321,24 +25305,48 @@ GenTree* Compiler::gtNewSimdIsInfinityNode(var_types type, GenTree* op1, var_typ if (varTypeIsFloating(simdBaseType)) { op1 = gtNewSimdAbsNode(type, op1, simdBaseType, simdSize); - return gtNewSimdIsPositiveInfinityNode(type, op1, simdBaseType, simdSize); + + GenTree* cnsNode1; + GenTree* cnsNode2; + + if (simdBaseType == TYP_FLOAT) + { + simdBaseType = TYP_UINT; + + cnsNode2 = gtNewIconNode(0x007FFFFF); + } + else + { + assert(simdBaseType == TYP_DOUBLE); + + simdBaseType = TYP_ULONG; + + cnsNode2 = gtNewLconNode(0x000FFFFFFFFFFFFF); + } + + cnsNode1 = gtNewOneConNode(type, simdBaseType); + cnsNode2 = gtNewSimdCreateBroadcastNode(type, cnsNode2, simdBaseType, simdSize); + + op1 = gtNewSimdBinOpNode(GT_SUB, type, op1, cnsNode1, simdBaseType, simdSize); + + return gtNewSimdCmpOpNode(GT_LT, type, op1, cnsNode2, simdBaseType, simdSize); } return gtNewZeroConNode(type); } //---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdIsIntegerNode: Creates a new simd IsInteger node +// Compiler::gtNewSimdIsZeroNode: Creates a new simd IsZero node // // Arguments: // type - The return type of SIMD node being created -// op1 - The vector to check for integers +// op1 - The vector to check for Zeroes // simdBaseType - The base type of SIMD type of the intrinsic // simdSize - The size of the SIMD type of the intrinsic // // Returns: -// The created IsInteger node +// The created IsZero node // -GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +GenTree* Compiler::gtNewSimdIsZeroNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -25348,599 +25356,221 @@ GenTree* Compiler::gtNewSimdIsIntegerNode(var_types type, GenTree* op1, var_type assert(varTypeIsArithmetic(simdBaseType)); - if (varTypeIsFloating(simdBaseType)) - { - GenTree* op1Dup1 = fgMakeMultiUse(&op1); - GenTree* op1Dup2 = gtCloneExpr(op1Dup1); - - op1 = gtNewSimdIsFiniteNode(type, op1, simdBaseType, simdSize); - - op1Dup1 = gtNewSimdTruncNode(type, op1Dup1, simdBaseType, simdSize); - GenTree* op2 = gtNewSimdCmpOpNode(GT_EQ, type, op1Dup1, op1Dup2, simdBaseType, simdSize); - - return gtNewSimdBinOpNode(GT_AND, type, op1, op2, simdBaseType, simdSize); - } - - assert(varTypeIsIntegral(simdBaseType)); - return gtNewAllBitsSetConNode(type); + return gtNewSimdCmpOpNode(GT_EQ, type, op1, gtNewZeroConNode(type), simdBaseType, simdSize); } //---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdIsNaNNode: Creates a new simd IsNaN node +// Compiler::gtNewSimdLoadNode: Creates a new simd Load node // // Arguments: // type - The return type of SIMD node being created -// op1 - The vector to check for NaNs +// op1 - The address of the value to be loaded // simdBaseType - The base type of SIMD type of the intrinsic // simdSize - The size of the SIMD type of the intrinsic // // Returns: -// The created IsNaN node +// The created Load node // -GenTree* Compiler::gtNewSimdIsNaNNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +GenTree* Compiler::gtNewSimdLoadNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); assert(op1 != nullptr); - assert(op1->TypeIs(type)); assert(varTypeIsArithmetic(simdBaseType)); - if (varTypeIsFloating(simdBaseType)) - { - GenTree* op1Dup = fgMakeMultiUse(&op1); - return gtNewSimdCmpOpNode(GT_NE, type, op1, op1Dup, simdBaseType, simdSize); - } - return gtNewZeroConNode(type); + return gtNewIndir(type, op1); } //---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdIsNegativeNode: Creates a new simd IsNegative node +// Compiler::gtNewSimdLoadAlignedNode: Creates a new simd LoadAligned node // // Arguments: // type - The return type of SIMD node being created -// op1 - The vector to check for negatives +// op1 - The address of the value to be loaded // simdBaseType - The base type of SIMD type of the intrinsic // simdSize - The size of the SIMD type of the intrinsic // // Returns: -// The created IsNegative node +// The created LoadAligned node // -GenTree* Compiler::gtNewSimdIsNegativeNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +GenTree* Compiler::gtNewSimdLoadAlignedNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { +#if defined(TARGET_XARCH) assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); assert(op1 != nullptr); - assert(op1->TypeIs(type)); - if (simdBaseType == TYP_FLOAT) + assert(varTypeIsArithmetic(simdBaseType)); + + NamedIntrinsic intrinsic = NI_Illegal; + + if (simdSize == 64) { - simdBaseType = TYP_INT; + intrinsic = NI_AVX512_LoadAlignedVector512; } - else if (simdBaseType == TYP_DOUBLE) + else if (simdSize == 32) { - simdBaseType = TYP_LONG; + intrinsic = NI_AVX_LoadAlignedVector256; } - - assert(varTypeIsIntegral(simdBaseType)); - - if (varTypeIsUnsigned(simdBaseType)) + else { - return gtNewZeroConNode(type); + intrinsic = NI_X86Base_LoadAlignedVector128; } - return gtNewSimdCmpOpNode(GT_LT, type, op1, gtNewZeroConNode(type), simdBaseType, simdSize); + + assert(intrinsic != NI_Illegal); + return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseType, simdSize); +#elif defined(TARGET_ARM64) + // ARM64 doesn't have aligned loads, but aligned loads are only validated to be + // aligned when optimizations are disable, so only skip the intrinsic handling + // if optimizations are enabled + + assert(opts.OptimizationEnabled()); + return gtNewSimdLoadNode(type, op1, simdBaseType, simdSize); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 } //---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdIsNegativeInfinityNode: Creates a new simd IsNegativeInfinity node +// Compiler::gtNewSimdLoadNonTemporalNode: Creates a new simd LoadNonTemporal node // // Arguments: // type - The return type of SIMD node being created -// op1 - The vector to check for negative infinities +// op1 - The address of the value to be loaded // simdBaseType - The base type of SIMD type of the intrinsic // simdSize - The size of the SIMD type of the intrinsic // // Returns: -// The created IsNegativeInfinity node +// The created LoadNonTemporal node // -GenTree* Compiler::gtNewSimdIsNegativeInfinityNode(var_types type, - GenTree* op1, - var_types simdBaseType, - unsigned simdSize) +GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { +#if defined(TARGET_XARCH) assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); assert(op1 != nullptr); - assert(op1->TypeIs(type)); assert(varTypeIsArithmetic(simdBaseType)); - if (varTypeIsFloating(simdBaseType)) - { - GenTree* cnsNode; + NamedIntrinsic intrinsic = NI_Illegal; + bool isNonTemporal = false; - if (simdBaseType == TYP_FLOAT) + // We don't guarantee a non-temporal load will actually occur, so fallback + // to regular aligned loads if the required ISA isn't supported. + + if (simdSize == 32) + { + if (compOpportunisticallyDependsOn(InstructionSet_AVX2)) { - simdBaseType = TYP_UINT; - cnsNode = gtNewIconNode(0xFF800000); + intrinsic = NI_AVX2_LoadAlignedVector256NonTemporal; + isNonTemporal = true; } else { - assert(simdBaseType == TYP_DOUBLE); - - simdBaseType = TYP_ULONG; - cnsNode = gtNewLconNode(0xFFF0000000000000); + intrinsic = NI_AVX_LoadAlignedVector256; } - cnsNode = gtNewSimdCreateBroadcastNode(type, cnsNode, simdBaseType, simdSize); + } + else if (simdSize == 64) + { + intrinsic = NI_AVX512_LoadAlignedVector512NonTemporal; + isNonTemporal = true; + } + else + { + intrinsic = NI_X86Base_LoadAlignedVector128NonTemporal; + isNonTemporal = true; + } - return gtNewSimdCmpOpNode(GT_EQ, type, op1, cnsNode, simdBaseType, simdSize); + if (isNonTemporal) + { + // float and double don't have actual instructions for non-temporal loads + // so we'll just use the equivalent integer instruction instead. + + if (simdBaseType == TYP_FLOAT) + { + simdBaseType = TYP_INT; + } + else if (simdBaseType == TYP_DOUBLE) + { + simdBaseType = TYP_LONG; + } } - return gtNewZeroConNode(type); + + assert(intrinsic != NI_Illegal); + return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseType, simdSize); +#elif defined(TARGET_ARM64) + // ARM64 doesn't have aligned loads, but aligned loads are only validated to be + // aligned when optimizations are disable, so only skip the intrinsic handling + // if optimizations are enabled + + assert(opts.OptimizationEnabled()); + return gtNewSimdLoadNode(type, op1, simdBaseType, simdSize); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 } -//---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdIsNormalNode: Creates a new simd IsNormal node +//------------------------------------------------------------------------ +// gtNewSimdMinMaxNode: Creates a new HWIntrinsic node that performs a min +// or max computation that follows IEEE 754 semantics // -// Arguments: -// type - The return type of SIMD node being created -// op1 - The vector to check for normal values -// simdBaseType - The base type of SIMD type of the intrinsic -// simdSize - The size of the SIMD type of the intrinsic +// Arguments: +// type -- The type of the node to generate +// op1 -- The first operand +// op2 -- The second operand +// simdBaseType -- the base type of the node +// simdSize -- the simd size of the node +// isMax -- true to compute the maximum; otherwise, false for the minimum +// isMagnitude -- true to compare the absolute values of op1/op2; otherwise false to compare op1/op2 directly +// isNumber -- true to propagate numeric values if either op1 or op2 is NaN; false to propagate NaN values // -// Returns: -// The created IsNormal node +// Return Value: +// The node representing the minimum or maximum operation // -GenTree* Compiler::gtNewSimdIsNormalNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) +GenTree* Compiler::gtNewSimdMinMaxNode(var_types type, + GenTree* op1, + GenTree* op2, + var_types simdBaseType, + unsigned simdSize, + bool isMax, + bool isMagnitude, + bool isNumber) { - assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); - assert(op1 != nullptr); assert(op1->TypeIs(type)); + assert(op2 != nullptr); + assert(op2->TypeIs(type)); + assert(varTypeIsArithmetic(simdBaseType)); - if (varTypeIsFloating(simdBaseType)) - { - op1 = gtNewSimdAbsNode(type, op1, simdBaseType, simdSize); + bool isScalar = false; - GenTree* cnsNode1; - GenTree* cnsNode2; + if (simdSize == 0) + { + isScalar = true; + assert(varTypeIsFloating(type)); + assert(simdBaseType == type); + } + else if (!varTypeIsLong(simdBaseType)) + { + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + } - if (simdBaseType == TYP_FLOAT) - { - simdBaseType = TYP_UINT; + NamedIntrinsic intrinsic = NI_Illegal; - cnsNode1 = gtNewIconNode(0x00800000); - cnsNode2 = gtNewIconNode(0x7F800000 - 0x00800000); - } - else - { - assert(simdBaseType == TYP_DOUBLE); + if (varTypeIsFloating(simdBaseType)) + { + GenTree* retNode = nullptr; - simdBaseType = TYP_ULONG; - - cnsNode1 = gtNewLconNode(0x0010000000000000); - cnsNode2 = gtNewLconNode(0x7FF0000000000000 - 0x0010000000000000); - } - - cnsNode1 = gtNewSimdCreateBroadcastNode(type, cnsNode1, simdBaseType, simdSize); - cnsNode2 = gtNewSimdCreateBroadcastNode(type, cnsNode2, simdBaseType, simdSize); - - op1 = gtNewSimdBinOpNode(GT_SUB, type, op1, cnsNode1, simdBaseType, simdSize); - return gtNewSimdCmpOpNode(GT_LT, type, op1, cnsNode2, simdBaseType, simdSize); - } - - assert(varTypeIsIntegral(simdBaseType)); - return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseType, simdSize); -} - -//---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdIsOddIntegerNode: Creates a new simd IsOddInteger node -// -// Arguments: -// type - The return type of SIMD node being created -// op1 - The vector to check for odd integers -// simdBaseType - The base type of SIMD type of the intrinsic -// simdSize - The size of the SIMD type of the intrinsic -// -// Returns: -// The created IsOddInteger node -// -GenTree* Compiler::gtNewSimdIsOddIntegerNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) -{ - assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); - - assert(op1 != nullptr); - assert(op1->TypeIs(type)); - - assert(varTypeIsIntegral(simdBaseType)); - - op1 = gtNewSimdBinOpNode(GT_AND, type, op1, gtNewOneConNode(type, simdBaseType), simdBaseType, simdSize); - return gtNewSimdCmpOpNode(GT_NE, type, op1, gtNewZeroConNode(type), simdBaseType, simdSize); -} - -//---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdIsPositiveNode: Creates a new simd IsPositive node -// -// Arguments: -// type - The return type of SIMD node being created -// op1 - The vector to check for positives -// simdBaseType - The base type of SIMD type of the intrinsic -// simdSize - The size of the SIMD type of the intrinsic -// -// Returns: -// The created IsPositive node -// -GenTree* Compiler::gtNewSimdIsPositiveNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) -{ - assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); - - assert(op1 != nullptr); - assert(op1->TypeIs(type)); - - if (simdBaseType == TYP_FLOAT) - { - simdBaseType = TYP_INT; - } - else if (simdBaseType == TYP_DOUBLE) - { - simdBaseType = TYP_LONG; - } - - assert(varTypeIsIntegral(simdBaseType)); - - if (varTypeIsUnsigned(simdBaseType)) - { - return gtNewAllBitsSetConNode(type); - } - return gtNewSimdCmpOpNode(GT_GE, type, op1, gtNewZeroConNode(type), simdBaseType, simdSize); -} - -//---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdIsPositiveInfinityNode: Creates a new simd IsPositiveInfinity node -// -// Arguments: -// type - The return type of SIMD node being created -// op1 - The vector to check for positive infinities -// simdBaseType - The base type of SIMD type of the intrinsic -// simdSize - The size of the SIMD type of the intrinsic -// -// Returns: -// The created IsPositiveInfinity node -// -GenTree* Compiler::gtNewSimdIsPositiveInfinityNode(var_types type, - GenTree* op1, - var_types simdBaseType, - unsigned simdSize) -{ - assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); - - assert(op1 != nullptr); - assert(op1->TypeIs(type)); - - assert(varTypeIsArithmetic(simdBaseType)); - - if (varTypeIsFloating(simdBaseType)) - { - GenTree* cnsNode; - - if (simdBaseType == TYP_FLOAT) - { - simdBaseType = TYP_UINT; - cnsNode = gtNewIconNode(0x7F800000); - } - else - { - assert(simdBaseType == TYP_DOUBLE); - - simdBaseType = TYP_ULONG; - cnsNode = gtNewLconNode(0x7FF0000000000000); - } - cnsNode = gtNewSimdCreateBroadcastNode(type, cnsNode, simdBaseType, simdSize); - - return gtNewSimdCmpOpNode(GT_EQ, type, op1, cnsNode, simdBaseType, simdSize); - } - return gtNewZeroConNode(type); -} - -//---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdIsSubnormalNode: Creates a new simd IsSubnormal node -// -// Arguments: -// type - The return type of SIMD node being created -// op1 - The vector to check for subnormal values -// simdBaseType - The base type of SIMD type of the intrinsic -// simdSize - The size of the SIMD type of the intrinsic -// -// Returns: -// The created IsSubnormal node -// -GenTree* Compiler::gtNewSimdIsSubnormalNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) -{ - assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); - - assert(op1 != nullptr); - assert(op1->TypeIs(type)); - - assert(varTypeIsArithmetic(simdBaseType)); - - if (varTypeIsFloating(simdBaseType)) - { - op1 = gtNewSimdAbsNode(type, op1, simdBaseType, simdSize); - - GenTree* cnsNode1; - GenTree* cnsNode2; - - if (simdBaseType == TYP_FLOAT) - { - simdBaseType = TYP_UINT; - - cnsNode2 = gtNewIconNode(0x007FFFFF); - } - else - { - assert(simdBaseType == TYP_DOUBLE); - - simdBaseType = TYP_ULONG; - - cnsNode2 = gtNewLconNode(0x000FFFFFFFFFFFFF); - } - - cnsNode1 = gtNewOneConNode(type, simdBaseType); - cnsNode2 = gtNewSimdCreateBroadcastNode(type, cnsNode2, simdBaseType, simdSize); - - op1 = gtNewSimdBinOpNode(GT_SUB, type, op1, cnsNode1, simdBaseType, simdSize); - - return gtNewSimdCmpOpNode(GT_LT, type, op1, cnsNode2, simdBaseType, simdSize); - } - return gtNewZeroConNode(type); -} - -//---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdIsZeroNode: Creates a new simd IsZero node -// -// Arguments: -// type - The return type of SIMD node being created -// op1 - The vector to check for Zeroes -// simdBaseType - The base type of SIMD type of the intrinsic -// simdSize - The size of the SIMD type of the intrinsic -// -// Returns: -// The created IsZero node -// -GenTree* Compiler::gtNewSimdIsZeroNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) -{ - assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); - - assert(op1 != nullptr); - assert(op1->TypeIs(type)); - - assert(varTypeIsArithmetic(simdBaseType)); - - return gtNewSimdCmpOpNode(GT_EQ, type, op1, gtNewZeroConNode(type), simdBaseType, simdSize); -} - -//---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdLoadNode: Creates a new simd Load node -// -// Arguments: -// type - The return type of SIMD node being created -// op1 - The address of the value to be loaded -// simdBaseType - The base type of SIMD type of the intrinsic -// simdSize - The size of the SIMD type of the intrinsic -// -// Returns: -// The created Load node -// -GenTree* Compiler::gtNewSimdLoadNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) -{ - assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); - - assert(op1 != nullptr); - - assert(varTypeIsArithmetic(simdBaseType)); - - return gtNewIndir(type, op1); -} - -//---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdLoadAlignedNode: Creates a new simd LoadAligned node -// -// Arguments: -// type - The return type of SIMD node being created -// op1 - The address of the value to be loaded -// simdBaseType - The base type of SIMD type of the intrinsic -// simdSize - The size of the SIMD type of the intrinsic -// -// Returns: -// The created LoadAligned node -// -GenTree* Compiler::gtNewSimdLoadAlignedNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) -{ -#if defined(TARGET_XARCH) - assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); - - assert(op1 != nullptr); - - assert(varTypeIsArithmetic(simdBaseType)); - - NamedIntrinsic intrinsic = NI_Illegal; - - if (simdSize == 64) - { - intrinsic = NI_AVX512_LoadAlignedVector512; - } - else if (simdSize == 32) - { - intrinsic = NI_AVX_LoadAlignedVector256; - } - else - { - intrinsic = NI_X86Base_LoadAlignedVector128; - } - - assert(intrinsic != NI_Illegal); - return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseType, simdSize); -#elif defined(TARGET_ARM64) - // ARM64 doesn't have aligned loads, but aligned loads are only validated to be - // aligned when optimizations are disable, so only skip the intrinsic handling - // if optimizations are enabled - - assert(opts.OptimizationEnabled()); - return gtNewSimdLoadNode(type, op1, simdBaseType, simdSize); -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 -} - -//---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdLoadNonTemporalNode: Creates a new simd LoadNonTemporal node -// -// Arguments: -// type - The return type of SIMD node being created -// op1 - The address of the value to be loaded -// simdBaseType - The base type of SIMD type of the intrinsic -// simdSize - The size of the SIMD type of the intrinsic -// -// Returns: -// The created LoadNonTemporal node -// -GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) -{ -#if defined(TARGET_XARCH) - assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); - - assert(op1 != nullptr); - - assert(varTypeIsArithmetic(simdBaseType)); - - NamedIntrinsic intrinsic = NI_Illegal; - bool isNonTemporal = false; - - // We don't guarantee a non-temporal load will actually occur, so fallback - // to regular aligned loads if the required ISA isn't supported. - - if (simdSize == 32) - { - if (compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - intrinsic = NI_AVX2_LoadAlignedVector256NonTemporal; - isNonTemporal = true; - } - else - { - intrinsic = NI_AVX_LoadAlignedVector256; - } - } - else if (simdSize == 64) - { - intrinsic = NI_AVX512_LoadAlignedVector512NonTemporal; - isNonTemporal = true; - } - else - { - intrinsic = NI_X86Base_LoadAlignedVector128NonTemporal; - isNonTemporal = true; - } - - if (isNonTemporal) - { - // float and double don't have actual instructions for non-temporal loads - // so we'll just use the equivalent integer instruction instead. - - if (simdBaseType == TYP_FLOAT) - { - simdBaseType = TYP_INT; - } - else if (simdBaseType == TYP_DOUBLE) - { - simdBaseType = TYP_LONG; - } - } - - assert(intrinsic != NI_Illegal); - return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseType, simdSize); -#elif defined(TARGET_ARM64) - // ARM64 doesn't have aligned loads, but aligned loads are only validated to be - // aligned when optimizations are disable, so only skip the intrinsic handling - // if optimizations are enabled - - assert(opts.OptimizationEnabled()); - return gtNewSimdLoadNode(type, op1, simdBaseType, simdSize); -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 -} - -//------------------------------------------------------------------------ -// gtNewSimdMinMaxNode: Creates a new HWIntrinsic node that performs a min -// or max computation that follows IEEE 754 semantics -// -// Arguments: -// type -- The type of the node to generate -// op1 -- The first operand -// op2 -- The second operand -// simdBaseType -- the base type of the node -// simdSize -- the simd size of the node -// isMax -- true to compute the maximum; otherwise, false for the minimum -// isMagnitude -- true to compare the absolute values of op1/op2; otherwise false to compare op1/op2 directly -// isNumber -- true to propagate numeric values if either op1 or op2 is NaN; false to propagate NaN values -// -// Return Value: -// The node representing the minimum or maximum operation -// -GenTree* Compiler::gtNewSimdMinMaxNode(var_types type, - GenTree* op1, - GenTree* op2, - var_types simdBaseType, - unsigned simdSize, - bool isMax, - bool isMagnitude, - bool isNumber) -{ - assert(op1 != nullptr); - assert(op1->TypeIs(type)); - - assert(op2 != nullptr); - assert(op2->TypeIs(type)); - - assert(varTypeIsArithmetic(simdBaseType)); - - bool isScalar = false; - - if (simdSize == 0) - { - isScalar = true; - assert(varTypeIsFloating(type)); - assert(simdBaseType == type); - } - else if (!varTypeIsLong(simdBaseType)) - { - assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); - } - - NamedIntrinsic intrinsic = NI_Illegal; - - if (varTypeIsFloating(simdBaseType)) - { - GenTree* retNode = nullptr; - -#if defined(TARGET_XARCH) - GenTree* cnsNode = nullptr; - GenTree* otherNode = nullptr; +#if defined(TARGET_XARCH) + GenTree* cnsNode = nullptr; + GenTree* otherNode = nullptr; if (isScalar) { @@ -26557,522 +26187,820 @@ GenTree* Compiler::gtNewSimdMinMaxNativeNode( assert(op2 != nullptr); assert(op2->TypeIs(type)); - assert(varTypeIsArithmetic(simdBaseType)); + assert(varTypeIsArithmetic(simdBaseType)); + + bool isScalar = false; + + if (simdSize == 0) + { + isScalar = true; + assert(varTypeIsFloating(type)); + assert(simdBaseType == type); + } + else + { + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + } + + NamedIntrinsic intrinsic = NI_Illegal; + +#if defined(TARGET_XARCH) + if (simdSize == 32) + { + if (varTypeIsFloating(simdBaseType)) + { + intrinsic = isMax ? NI_AVX_Max : NI_AVX_Min; + } + else + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); + + if (!varTypeIsLong(simdBaseType)) + { + intrinsic = isMax ? NI_AVX2_Max : NI_AVX2_Min; + } + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + intrinsic = isMax ? NI_AVX512_Max : NI_AVX512_Min; + } + } + } + else if (simdSize == 64) + { + intrinsic = isMax ? NI_AVX512_Max : NI_AVX512_Min; + } + else if (!varTypeIsLong(simdBaseType)) + { + if (isScalar) + { + simdSize = 16; + type = TYP_SIMD16; + + op1 = gtNewSimdCreateScalarUnsafeNode(type, op1, simdBaseType, simdSize); + op2 = gtNewSimdCreateScalarUnsafeNode(type, op2, simdBaseType, simdSize); + + intrinsic = isMax ? NI_X86Base_MaxScalar : NI_X86Base_MinScalar; + } + else + { + intrinsic = isMax ? NI_X86Base_Max : NI_X86Base_Min; + } + } + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + intrinsic = isMax ? NI_AVX512_Max : NI_AVX512_Min; + } +#elif defined(TARGET_ARM64) + if (!varTypeIsLong(simdBaseType)) + { + if (isScalar) + { + simdSize = 8; + type = TYP_SIMD8; + + op1 = gtNewSimdCreateScalarUnsafeNode(type, op1, simdBaseType, simdSize); + op2 = gtNewSimdCreateScalarUnsafeNode(type, op2, simdBaseType, simdSize); + + intrinsic = isMax ? NI_AdvSimd_Arm64_MaxScalar : NI_AdvSimd_Arm64_MinScalar; + } + else if (simdBaseType == TYP_DOUBLE) + { + if (simdSize == 8) + { + intrinsic = isMax ? NI_AdvSimd_Arm64_MaxScalar : NI_AdvSimd_Arm64_MinScalar; + } + else + { + intrinsic = isMax ? NI_AdvSimd_Arm64_Max : NI_AdvSimd_Arm64_Min; + } + } + else + { + intrinsic = isMax ? NI_AdvSimd_Max : NI_AdvSimd_Min; + } + } +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + + if (intrinsic != NI_Illegal) + { + GenTree* retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseType, simdSize); + + if (isScalar) + { + retNode = gtNewSimdToScalarNode(simdBaseType, retNode, simdBaseType, simdSize); + } + return retNode; + } + + assert(!isScalar); + + GenTree* op1Dup = fgMakeMultiUse(&op1); + GenTree* op2Dup = fgMakeMultiUse(&op2); + + // op1 = op1 < op2 + // -or- + // op1 = op1 > op2 + op1 = gtNewSimdCmpOpNode(isMax ? GT_GT : GT_LT, type, op1, op2, simdBaseType, simdSize); + + // result = ConditionalSelect(op1, op1Dup, op2Dup) + return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseType, simdSize); +} + +GenTree* Compiler::gtNewSimdNarrowNode( + var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + + assert(op1 != nullptr); + assert(op1->TypeIs(type)); + + assert(op2 != nullptr); + assert(op2->TypeIs(type)); + + assert(varTypeIsArithmetic(simdBaseType) && !varTypeIsLong(simdBaseType)); + + GenTree* tmp1; + GenTree* tmp2; + +#if defined(TARGET_XARCH) + GenTree* tmp3; + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + { + // This is the same in principle to the other comments below, however due to + // code formatting, its too long to reasonably display here. + + assert((simdSize == 16) || (simdSize == 32) || (simdSize == 64)); + var_types tmpSimdType = (simdSize == 64) ? TYP_SIMD32 : TYP_SIMD16; + + NamedIntrinsic intrinsicId; + var_types opBaseType; + + switch (simdBaseType) + { + case TYP_BYTE: + { + if (simdSize == 64) + { + intrinsicId = NI_AVX512_ConvertToVector256SByte; + } + else + { + intrinsicId = NI_AVX512_ConvertToVector128SByte; + } + + opBaseType = TYP_SHORT; + break; + } + + case TYP_UBYTE: + { + if (simdSize == 64) + { + intrinsicId = NI_AVX512_ConvertToVector256Byte; + } + else + { + intrinsicId = NI_AVX512_ConvertToVector128Byte; + } + + opBaseType = TYP_USHORT; + break; + } + + case TYP_SHORT: + { + if (simdSize == 64) + { + intrinsicId = NI_AVX512_ConvertToVector256Int16; + } + else + { + intrinsicId = NI_AVX512_ConvertToVector128Int16; + } + + opBaseType = TYP_INT; + break; + } + + case TYP_USHORT: + { + if (simdSize == 64) + { + intrinsicId = NI_AVX512_ConvertToVector256UInt16; + } + else + { + intrinsicId = NI_AVX512_ConvertToVector128UInt16; + } + + opBaseType = TYP_UINT; + break; + } - bool isScalar = false; + case TYP_INT: + { + if (simdSize == 64) + { + intrinsicId = NI_AVX512_ConvertToVector256Int32; + } + else + { + intrinsicId = NI_AVX512_ConvertToVector128Int32; + } - if (simdSize == 0) - { - isScalar = true; - assert(varTypeIsFloating(type)); - assert(simdBaseType == type); - } - else - { - assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); - } + opBaseType = TYP_LONG; + break; + } - NamedIntrinsic intrinsic = NI_Illegal; + case TYP_UINT: + { + if (simdSize == 64) + { + intrinsicId = NI_AVX512_ConvertToVector256UInt32; + } + else + { + intrinsicId = NI_AVX512_ConvertToVector128UInt32; + } -#if defined(TARGET_XARCH) - if (simdSize == 32) - { - if (varTypeIsFloating(simdBaseType)) - { - intrinsic = isMax ? NI_AVX_Max : NI_AVX_Min; - } - else - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); + opBaseType = TYP_ULONG; + break; + } - if (!varTypeIsLong(simdBaseType)) + case TYP_FLOAT: { - intrinsic = isMax ? NI_AVX2_Max : NI_AVX2_Min; + if (simdSize == 64) + { + intrinsicId = NI_AVX512_ConvertToVector256Single; + } + else if (simdSize == 32) + { + intrinsicId = NI_AVX_ConvertToVector128Single; + } + else + { + intrinsicId = NI_X86Base_ConvertToVector128Single; + } + + opBaseType = TYP_DOUBLE; + break; } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + + default: { - intrinsic = isMax ? NI_AVX512_Max : NI_AVX512_Min; + unreached(); } } - } - else if (simdSize == 64) - { - intrinsic = isMax ? NI_AVX512_Max : NI_AVX512_Min; - } - else if (!varTypeIsLong(simdBaseType)) - { - if (isScalar) - { - simdSize = 16; - type = TYP_SIMD16; - op1 = gtNewSimdCreateScalarUnsafeNode(type, op1, simdBaseType, simdSize); - op2 = gtNewSimdCreateScalarUnsafeNode(type, op2, simdBaseType, simdSize); + tmp1 = gtNewSimdHWIntrinsicNode(tmpSimdType, op1, intrinsicId, opBaseType, simdSize); + tmp2 = gtNewSimdHWIntrinsicNode(tmpSimdType, op2, intrinsicId, opBaseType, simdSize); - intrinsic = isMax ? NI_X86Base_MaxScalar : NI_X86Base_MinScalar; - } - else + if (simdSize == 16) { - intrinsic = isMax ? NI_X86Base_Max : NI_X86Base_Min; + return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_MoveLowToHigh, TYP_FLOAT, simdSize); } + + intrinsicId = (simdSize == 64) ? NI_Vector256_ToVector512Unsafe : NI_Vector128_ToVector256Unsafe; + + tmp1 = gtNewSimdHWIntrinsicNode(type, tmp1, intrinsicId, simdBaseType, simdSize / 2); + return gtNewSimdWithUpperNode(type, tmp1, tmp2, simdBaseType, simdSize); } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) - { - intrinsic = isMax ? NI_AVX512_Max : NI_AVX512_Min; - } -#elif defined(TARGET_ARM64) - if (!varTypeIsLong(simdBaseType)) + else if (simdSize == 32) { - if (isScalar) + switch (simdBaseType) { - simdSize = 8; - type = TYP_SIMD8; + case TYP_BYTE: + case TYP_UBYTE: + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); - op1 = gtNewSimdCreateScalarUnsafeNode(type, op1, simdBaseType, simdSize); - op2 = gtNewSimdCreateScalarUnsafeNode(type, op2, simdBaseType, simdSize); + // This is the same in principle to the other comments below, however due to + // code formatting, its too long to reasonably display here. + GenTreeVecCon* vecCon1 = gtNewVconNode(type); - intrinsic = isMax ? NI_AdvSimd_Arm64_MaxScalar : NI_AdvSimd_Arm64_MinScalar; - } - else if (simdBaseType == TYP_DOUBLE) - { - if (simdSize == 8) - { - intrinsic = isMax ? NI_AdvSimd_Arm64_MaxScalar : NI_AdvSimd_Arm64_MinScalar; + for (unsigned i = 0; i < (simdSize / 8); i++) + { + vecCon1->gtSimdVal.u64[i] = 0x00FF00FF00FF00FF; + } + + GenTree* vecCon2 = gtCloneExpr(vecCon1); + + tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseType, simdSize); + tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseType, simdSize); + tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_AVX2_PackUnsignedSaturate, TYP_UBYTE, simdSize); + + var_types permuteBaseType = (simdBaseType == TYP_BYTE) ? TYP_LONG : TYP_ULONG; + return gtNewSimdHWIntrinsicNode(type, tmp3, gtNewIconNode(SHUFFLE_WYZX), NI_AVX2_Permute4x64, + permuteBaseType, simdSize); } - else + + case TYP_SHORT: + case TYP_USHORT: { - intrinsic = isMax ? NI_AdvSimd_Arm64_Max : NI_AdvSimd_Arm64_Min; - } - } - else - { - intrinsic = isMax ? NI_AdvSimd_Max : NI_AdvSimd_Min; - } - } -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 + assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); - if (intrinsic != NI_Illegal) - { - GenTree* retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseType, simdSize); + // op1 = Elements 0L, 0U, 1L, 1U, 2L, 2U, 3L, 3U | 4L, 4U, 5L, 5U, 6L, 6U, 7L, 7U + // op2 = Elements 8L, 8U, 9L, 9U, AL, AU, BL, BU | CL, CU, DL, DU, EL, EU, FL, FU + // + // tmp2 = Elements 0L, --, 1L, --, 2L, --, 3L, -- | 4L, --, 5L, --, 6L, --, 7L, -- + // tmp3 = Elements 8L, --, 9L, --, AL, --, BL, -- | CL, --, DL, --, EL, --, FL, -- + // tmp4 = Elements 0L, 1L, 2L, 3L, 8L, 9L, AL, BL | 4L, 5L, 6L, 7L, CL, DL, EL, FL + // return Elements 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L | 8L, 9L, AL, BL, CL, DL, EL, FL + // + // var vcns = Vector256.Create(0x0000FFFF).AsInt16(); + // var tmp1 = Avx2.And(op1.AsInt16(), vcns); + // var tmp2 = Avx2.And(op2.AsInt16(), vcns); + // var tmp3 = Avx2.PackUnsignedSaturate(tmp1, tmp2); + // return Avx2.Permute4x64(tmp3.AsUInt64(), SHUFFLE_WYZX).As(); - if (isScalar) - { - retNode = gtNewSimdToScalarNode(simdBaseType, retNode, simdBaseType, simdSize); - } - return retNode; - } + GenTreeVecCon* vecCon1 = gtNewVconNode(type); - assert(!isScalar); + for (unsigned i = 0; i < (simdSize / 8); i++) + { + vecCon1->gtSimdVal.u64[i] = 0x0000FFFF0000FFFF; + } - GenTree* op1Dup = fgMakeMultiUse(&op1); - GenTree* op2Dup = fgMakeMultiUse(&op2); + GenTree* vecCon2 = gtCloneExpr(vecCon1); - // op1 = op1 < op2 - // -or- - // op1 = op1 > op2 - op1 = gtNewSimdCmpOpNode(isMax ? GT_GT : GT_LT, type, op1, op2, simdBaseType, simdSize); + tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseType, simdSize); + tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseType, simdSize); + tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_AVX2_PackUnsignedSaturate, TYP_USHORT, simdSize); - // result = ConditionalSelect(op1, op1Dup, op2Dup) - return gtNewSimdCndSelNode(type, op1, op1Dup, op2Dup, simdBaseType, simdSize); -} + var_types permuteBaseType = (simdBaseType == TYP_BYTE) ? TYP_LONG : TYP_ULONG; + return gtNewSimdHWIntrinsicNode(type, tmp3, gtNewIconNode(SHUFFLE_WYZX), NI_AVX2_Permute4x64, + permuteBaseType, simdSize); + } -GenTree* Compiler::gtNewSimdNarrowNode( - var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) -{ - assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); + case TYP_INT: + case TYP_UINT: + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); - assert(op1 != nullptr); - assert(op1->TypeIs(type)); + // op1 = Elements 0, 1 | 2, 3; 0L, 0U, 1L, 1U | 2L, 2U, 3L, 3U + // op2 = Elements 4, 5 | 6, 7; 4L, 4U, 5L, 5U | 6L, 6U, 7L, 7U + // + // tmp1 = Elements 0L, 4L, 0U, 4U | 2L, 6L, 2U, 6U + // tmp2 = Elements 1L, 5L, 1U, 5U | 3L, 7L, 3U, 7U + // tmp3 = Elements 0L, 1L, 4L, 5L | 2L, 3L, 6L, 7L + // return Elements 0L, 1L, 2L, 3L | 4L, 5L, 6L, 7L + // + // var tmp1 = Avx2.UnpackLow(op1, op2); + // var tmp2 = Avx2.UnpackHigh(op1, op2); + // var tmp3 = Avx2.UnpackLow(tmp1, tmp2); + // return Avx2.Permute4x64(tmp3.AsUInt64(), SHUFFLE_WYZX).AsUInt32(); + + var_types opBaseType = (simdBaseType == TYP_INT) ? TYP_LONG : TYP_ULONG; + + GenTree* op1Dup = fgMakeMultiUse(&op1); + GenTree* op2Dup = fgMakeMultiUse(&op2); + + tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX2_UnpackLow, simdBaseType, simdSize); + tmp2 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_AVX2_UnpackHigh, simdBaseType, simdSize); + tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_AVX2_UnpackLow, simdBaseType, simdSize); + + return gtNewSimdHWIntrinsicNode(type, tmp3, gtNewIconNode(SHUFFLE_WYZX), NI_AVX2_Permute4x64, + opBaseType, simdSize); + } + + case TYP_FLOAT: + { + // op1 = Elements 0, 1 | 2, 3 + // op2 = Elements 4, 5 | 6, 7 + // + // tmp1 = Elements 0, 1, 2, 3 | -, -, -, - + // tmp1 = Elements 4, 5, 6, 7 + // return Elements 0, 1, 2, 3 | 4, 5, 6, 7 + // + // var tmp1 = Avx.ConvertToVector128Single(op1).ToVector256Unsafe(); + // var tmp2 = Avx.ConvertToVector128Single(op2); + // return tmp1.WithUpper(tmp2); - assert(op2 != nullptr); - assert(op2->TypeIs(type)); + var_types opBaseType = TYP_DOUBLE; - assert(varTypeIsArithmetic(simdBaseType) && !varTypeIsLong(simdBaseType)); + tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_AVX_ConvertToVector128Single, opBaseType, simdSize); + tmp2 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_AVX_ConvertToVector128Single, opBaseType, simdSize); - GenTree* tmp1; - GenTree* tmp2; + tmp1 = gtNewSimdHWIntrinsicNode(type, tmp1, NI_Vector128_ToVector256Unsafe, simdBaseType, 16); + return gtNewSimdWithUpperNode(type, tmp1, tmp2, simdBaseType, simdSize); + } -#if defined(TARGET_XARCH) - GenTree* tmp3; - if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) + default: + { + unreached(); + } + } + } + else { - // This is the same in principle to the other comments below, however due to - // code formatting, its too long to reasonably display here. - - assert((simdSize == 16) || (simdSize == 32) || (simdSize == 64)); - var_types tmpSimdType = (simdSize == 64) ? TYP_SIMD32 : TYP_SIMD16; - - NamedIntrinsic intrinsicId; - var_types opBaseType; + assert(simdSize == 16); switch (simdBaseType) { case TYP_BYTE: + case TYP_UBYTE: { - if (simdSize == 64) - { - intrinsicId = NI_AVX512_ConvertToVector256SByte; - } - else - { - intrinsicId = NI_AVX512_ConvertToVector128SByte; - } + // op1 = Elements 0, 1, 2, 3, 4, 5, 6, 7; 0L, 0U, 1L, 1U, 2L, 2U, 3L, 3U, 4L, 4U, 5L, 5U, 6L, 6U, 7L, 7U + // op2 = Elements 8, 9, A, B, C, D, E, F; 8L, 8U, 9L, 9U, AL, AU, BL, BU, CL, CU, DL, DU, EL, EU, FL, FU + // + // tmp2 = Elements 0L, --, 1L, --, 2L, --, 3L, --, 4L, --, 5L, --, 6L, --, 7L, -- + // tmp3 = Elements 8L, --, 9L, --, AL, --, BL, --, CL, --, DL, --, EL, --, FL, -- + // return Elements 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, AL, BL, CL, DL, EL, FL + // + // var vcns = Vector128.Create((ushort)(0x00FF)).AsSByte(); + // var tmp1 = Sse2.And(op1.AsSByte(), vcns); + // var tmp2 = Sse2.And(op2.AsSByte(), vcns); + // return Sse2.PackUnsignedSaturate(tmp1, tmp2).As(); - opBaseType = TYP_SHORT; - break; - } + GenTreeVecCon* vecCon1 = gtNewVconNode(type); - case TYP_UBYTE: - { - if (simdSize == 64) - { - intrinsicId = NI_AVX512_ConvertToVector256Byte; - } - else + for (unsigned i = 0; i < (simdSize / 8); i++) { - intrinsicId = NI_AVX512_ConvertToVector128Byte; + vecCon1->gtSimdVal.u64[i] = 0x00FF00FF00FF00FF; } - opBaseType = TYP_USHORT; - break; - } + GenTree* vecCon2 = gtCloneExpr(vecCon1); - case TYP_SHORT: - { - if (simdSize == 64) - { - intrinsicId = NI_AVX512_ConvertToVector256Int16; - } - else - { - intrinsicId = NI_AVX512_ConvertToVector128Int16; - } + tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseType, simdSize); + tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseType, simdSize); - opBaseType = TYP_INT; - break; + return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_PackUnsignedSaturate, TYP_UBYTE, simdSize); } + case TYP_SHORT: case TYP_USHORT: { - if (simdSize == 64) - { - intrinsicId = NI_AVX512_ConvertToVector256UInt16; - } - else + // op1 = Elements 0, 1, 2, 3; 0L, 0U, 1L, 1U, 2L, 2U, 3L, 3U + // op2 = Elements 4, 5, 6, 7; 4L, 4U, 5L, 5U, 6L, 6U, 7L, 7U + // + // tmp2 = Elements 0L, --, 1L, --, 2L, --, 3L, -- + // tmp3 = Elements 4L, --, 5L, --, 6L, --, 7L, -- + // return Elements 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L + // + // var vcns = Vector128.Create(0x0000FFFF).AsInt16(); + // var tmp1 = Sse2.And(op1.AsInt16(), vcns); + // var tmp2 = Sse2.And(op2.AsInt16(), vcns); + // return Sse2.PackUnsignedSaturate(tmp1, tmp2).As(); + + GenTreeVecCon* vecCon1 = gtNewVconNode(type); + + for (unsigned i = 0; i < (simdSize / 8); i++) { - intrinsicId = NI_AVX512_ConvertToVector128UInt16; + vecCon1->gtSimdVal.u64[i] = 0x0000FFFF0000FFFF; } - opBaseType = TYP_UINT; - break; + GenTree* vecCon2 = gtCloneExpr(vecCon1); + + tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseType, simdSize); + tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseType, simdSize); + + return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_PackUnsignedSaturate, TYP_USHORT, + simdSize); } case TYP_INT: + case TYP_UINT: { - if (simdSize == 64) - { - intrinsicId = NI_AVX512_ConvertToVector256Int32; - } - else - { - intrinsicId = NI_AVX512_ConvertToVector128Int32; - } + // op1 = Elements 0, 1; 0L, 0U, 1L, 1U + // op2 = Elements 2, 3; 2L, 2U, 3L, 3U + // + // tmp1 = Elements 0L, 2L, 0U, 2U + // tmp2 = Elements 1L, 3L, 1U, 3U + // return Elements 0L, 1L, 2L, 3L + // + // var tmp1 = Sse2.UnpackLow(op1.AsUInt32(), op2.AsUInt32()); + // var tmp2 = Sse2.UnpackHigh(op1.AsUInt32(), op2.AsUInt32()); + // return Sse2.UnpackLow(tmp1, tmp2).As(); - opBaseType = TYP_LONG; - break; + GenTree* op1Dup = fgMakeMultiUse(&op1); + GenTree* op2Dup = fgMakeMultiUse(&op2); + + tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_X86Base_UnpackLow, simdBaseType, simdSize); + tmp2 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_X86Base_UnpackHigh, simdBaseType, simdSize); + + return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_UnpackLow, simdBaseType, simdSize); } - case TYP_UINT: + case TYP_FLOAT: { - if (simdSize == 64) - { - intrinsicId = NI_AVX512_ConvertToVector256UInt32; - } - else - { - intrinsicId = NI_AVX512_ConvertToVector128UInt32; - } + // op1 = Elements 0, 1 + // op2 = Elements 2, 3 + // + // tmp1 = Elements 0, 1, -, - + // tmp1 = Elements 2, 3, -, - + // return Elements 0, 1, 2, 3 + // + // var tmp1 = Sse2.ConvertToVector128Single(op1); + // var tmp2 = Sse2.ConvertToVector128Single(op2); + // return Sse.MoveLowToHigh(tmp1, tmp2); - opBaseType = TYP_ULONG; - break; + var_types opBaseType = TYP_DOUBLE; + + tmp1 = gtNewSimdHWIntrinsicNode(type, op1, NI_X86Base_ConvertToVector128Single, opBaseType, simdSize); + tmp2 = gtNewSimdHWIntrinsicNode(type, op2, NI_X86Base_ConvertToVector128Single, opBaseType, simdSize); + + return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_MoveLowToHigh, simdBaseType, simdSize); } - case TYP_FLOAT: + default: { - if (simdSize == 64) - { - intrinsicId = NI_AVX512_ConvertToVector256Single; - } - else if (simdSize == 32) - { - intrinsicId = NI_AVX_ConvertToVector128Single; - } - else - { - intrinsicId = NI_X86Base_ConvertToVector128Single; - } + unreached(); + } + } + } +#elif defined(TARGET_ARM64) + if (simdSize == 16) + { + if (varTypeIsFloating(simdBaseType)) + { + // var tmp1 = AdvSimd.Arm64.ConvertToSingleLower(op1); + // return AdvSimd.Arm64.ConvertToSingleUpper(tmp1, op2); + + tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_AdvSimd_Arm64_ConvertToSingleLower, simdBaseType, 8); + return gtNewSimdHWIntrinsicNode(type, tmp1, op2, NI_AdvSimd_Arm64_ConvertToSingleUpper, simdBaseType, + simdSize); + } + else + { + // var tmp1 = AdvSimd.ExtractNarrowingLower(op1); + // return AdvSimd.ExtractNarrowingUpper(tmp1, op2); + + tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_AdvSimd_ExtractNarrowingLower, simdBaseType, 8); + return gtNewSimdHWIntrinsicNode(type, tmp1, op2, NI_AdvSimd_ExtractNarrowingUpper, simdBaseType, simdSize); + } + } + else if (varTypeIsFloating(simdBaseType)) + { + // var tmp1 = op1.ToVector128Unsafe(); + // var tmp2 = AdvSimd.InsertScalar(tmp1, op2); + // return AdvSimd.Arm64.ConvertToSingleLower(tmp2); + + var_types tmp2BaseType = TYP_DOUBLE; + + tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector64_ToVector128Unsafe, simdBaseType, simdSize); + tmp2 = gtNewSimdWithUpperNode(TYP_SIMD16, tmp1, op2, tmp2BaseType, 16); + + return gtNewSimdHWIntrinsicNode(type, tmp2, NI_AdvSimd_Arm64_ConvertToSingleLower, simdBaseType, simdSize); + } + else + { + // var tmp1 = op1.ToVector128Unsafe(); + // var tmp2 = tmp1.WithUpper(op2); + // return AdvSimd.ExtractNarrowingLower(tmp2); + + tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector64_ToVector128Unsafe, simdBaseType, simdSize); + tmp2 = gtNewSimdWithUpperNode(TYP_SIMD16, tmp1, op2, simdBaseType, 16); + + return gtNewSimdHWIntrinsicNode(type, tmp2, NI_AdvSimd_ExtractNarrowingLower, simdBaseType, simdSize); + } +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 +} + +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdCreateGeometricSequenceNode: Creates a new simd CreateGeometricSequence node +// +// Arguments: +// type - The return type of SIMD node being created +// op1 - The initial value +// op2 - The multiplier value +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// +// Returns: +// The created CreateGeometricSequence node +// +GenTree* Compiler::gtNewSimdCreateGeometricSequenceNode( + var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + assert(varTypeIsArithmetic(simdBaseType)); + assert(op2->OperIsConst()); - opBaseType = TYP_DOUBLE; - break; - } + // op2 is expected to be constant. When op1 is also constant the whole sequence can be folded + // to a constant; otherwise build the constant multiplier vector and leave one broadcast+multiply. - default: + GenTreeVecCon* vecCon = gtNewVconNode(type); + uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); + bool isPartial = !op1->OperIsConst(); + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + uint64_t initial = isPartial ? 1 : static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t multiplier = static_cast(op2->AsIntConCommon()->IntegralValue()); + + for (uint32_t index = 0; index < simdCount; index++) { - unreached(); + vecCon->gtSimdVal.u8[index] = static_cast(initial); + initial *= multiplier; } + break; } - tmp1 = gtNewSimdHWIntrinsicNode(tmpSimdType, op1, intrinsicId, opBaseType, simdSize); - tmp2 = gtNewSimdHWIntrinsicNode(tmpSimdType, op2, intrinsicId, opBaseType, simdSize); - - if (simdSize == 16) + case TYP_SHORT: + case TYP_USHORT: { - return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_MoveLowToHigh, TYP_FLOAT, simdSize); - } - - intrinsicId = (simdSize == 64) ? NI_Vector256_ToVector512Unsafe : NI_Vector128_ToVector256Unsafe; + uint64_t initial = isPartial ? 1 : static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t multiplier = static_cast(op2->AsIntConCommon()->IntegralValue()); - tmp1 = gtNewSimdHWIntrinsicNode(type, tmp1, intrinsicId, simdBaseType, simdSize / 2); - return gtNewSimdWithUpperNode(type, tmp1, tmp2, simdBaseType, simdSize); - } - else if (simdSize == 32) - { - switch (simdBaseType) - { - case TYP_BYTE: - case TYP_UBYTE: + for (uint32_t index = 0; index < simdCount; index++) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); - - // This is the same in principle to the other comments below, however due to - // code formatting, its too long to reasonably display here. - GenTreeVecCon* vecCon1 = gtNewVconNode(type); - - for (unsigned i = 0; i < (simdSize / 8); i++) - { - vecCon1->gtSimdVal.u64[i] = 0x00FF00FF00FF00FF; - } - - GenTree* vecCon2 = gtCloneExpr(vecCon1); - - tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseType, simdSize); - tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseType, simdSize); - tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_AVX2_PackUnsignedSaturate, TYP_UBYTE, simdSize); - - var_types permuteBaseType = (simdBaseType == TYP_BYTE) ? TYP_LONG : TYP_ULONG; - return gtNewSimdHWIntrinsicNode(type, tmp3, gtNewIconNode(SHUFFLE_WYZX), NI_AVX2_Permute4x64, - permuteBaseType, simdSize); + vecCon->gtSimdVal.u16[index] = static_cast(initial); + initial *= multiplier; } + break; + } - case TYP_SHORT: - case TYP_USHORT: - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); - - // op1 = Elements 0L, 0U, 1L, 1U, 2L, 2U, 3L, 3U | 4L, 4U, 5L, 5U, 6L, 6U, 7L, 7U - // op2 = Elements 8L, 8U, 9L, 9U, AL, AU, BL, BU | CL, CU, DL, DU, EL, EU, FL, FU - // - // tmp2 = Elements 0L, --, 1L, --, 2L, --, 3L, -- | 4L, --, 5L, --, 6L, --, 7L, -- - // tmp3 = Elements 8L, --, 9L, --, AL, --, BL, -- | CL, --, DL, --, EL, --, FL, -- - // tmp4 = Elements 0L, 1L, 2L, 3L, 8L, 9L, AL, BL | 4L, 5L, 6L, 7L, CL, DL, EL, FL - // return Elements 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L | 8L, 9L, AL, BL, CL, DL, EL, FL - // - // var vcns = Vector256.Create(0x0000FFFF).AsInt16(); - // var tmp1 = Avx2.And(op1.AsInt16(), vcns); - // var tmp2 = Avx2.And(op2.AsInt16(), vcns); - // var tmp3 = Avx2.PackUnsignedSaturate(tmp1, tmp2); - // return Avx2.Permute4x64(tmp3.AsUInt64(), SHUFFLE_WYZX).As(); - - GenTreeVecCon* vecCon1 = gtNewVconNode(type); - - for (unsigned i = 0; i < (simdSize / 8); i++) - { - vecCon1->gtSimdVal.u64[i] = 0x0000FFFF0000FFFF; - } + case TYP_INT: + case TYP_UINT: + { + uint64_t initial = isPartial ? 1 : static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t multiplier = static_cast(op2->AsIntConCommon()->IntegralValue()); - GenTree* vecCon2 = gtCloneExpr(vecCon1); + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u32[index] = static_cast(initial); + initial *= multiplier; + } + break; + } - tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseType, simdSize); - tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseType, simdSize); - tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_AVX2_PackUnsignedSaturate, TYP_USHORT, simdSize); + case TYP_LONG: + case TYP_ULONG: + { + uint64_t initial = isPartial ? 1 : static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t multiplier = static_cast(op2->AsIntConCommon()->IntegralValue()); - var_types permuteBaseType = (simdBaseType == TYP_BYTE) ? TYP_LONG : TYP_ULONG; - return gtNewSimdHWIntrinsicNode(type, tmp3, gtNewIconNode(SHUFFLE_WYZX), NI_AVX2_Permute4x64, - permuteBaseType, simdSize); + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u64[index] = initial; + initial *= multiplier; } + break; + } - case TYP_INT: - case TYP_UINT: + case TYP_FLOAT: + { + float initial = isPartial ? 1.0f : static_cast(op1->AsDblCon()->DconValue()); + float multiplier = static_cast(op2->AsDblCon()->DconValue()); + + for (uint32_t index = 0; index < simdCount; index++) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); + vecCon->gtSimdVal.f32[index] = initial; + initial *= multiplier; + } + break; + } - // op1 = Elements 0, 1 | 2, 3; 0L, 0U, 1L, 1U | 2L, 2U, 3L, 3U - // op2 = Elements 4, 5 | 6, 7; 4L, 4U, 5L, 5U | 6L, 6U, 7L, 7U - // - // tmp1 = Elements 0L, 4L, 0U, 4U | 2L, 6L, 2U, 6U - // tmp2 = Elements 1L, 5L, 1U, 5U | 3L, 7L, 3U, 7U - // tmp3 = Elements 0L, 1L, 4L, 5L | 2L, 3L, 6L, 7L - // return Elements 0L, 1L, 2L, 3L | 4L, 5L, 6L, 7L - // - // var tmp1 = Avx2.UnpackLow(op1, op2); - // var tmp2 = Avx2.UnpackHigh(op1, op2); - // var tmp3 = Avx2.UnpackLow(tmp1, tmp2); - // return Avx2.Permute4x64(tmp3.AsUInt64(), SHUFFLE_WYZX).AsUInt32(); + case TYP_DOUBLE: + { + double initial = isPartial ? 1.0 : op1->AsDblCon()->DconValue(); + double multiplier = op2->AsDblCon()->DconValue(); - var_types opBaseType = (simdBaseType == TYP_INT) ? TYP_LONG : TYP_ULONG; + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.f64[index] = initial; + initial *= multiplier; + } + break; + } - GenTree* op1Dup = fgMakeMultiUse(&op1); - GenTree* op2Dup = fgMakeMultiUse(&op2); + default: + { + unreached(); + } + } - tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX2_UnpackLow, simdBaseType, simdSize); - tmp2 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_AVX2_UnpackHigh, simdBaseType, simdSize); - tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_AVX2_UnpackLow, simdBaseType, simdSize); + GenTree* result = vecCon; - return gtNewSimdHWIntrinsicNode(type, tmp3, gtNewIconNode(SHUFFLE_WYZX), NI_AVX2_Permute4x64, - opBaseType, simdSize); - } + if (isPartial) + { + GenTree* initial = gtNewSimdCreateBroadcastNode(type, op1, simdBaseType, simdSize); + result = gtNewSimdBinOpNode(GT_MUL, type, result, initial, simdBaseType, simdSize); + } - case TYP_FLOAT: - { - // op1 = Elements 0, 1 | 2, 3 - // op2 = Elements 4, 5 | 6, 7 - // - // tmp1 = Elements 0, 1, 2, 3 | -, -, -, - - // tmp1 = Elements 4, 5, 6, 7 - // return Elements 0, 1, 2, 3 | 4, 5, 6, 7 - // - // var tmp1 = Avx.ConvertToVector128Single(op1).ToVector256Unsafe(); - // var tmp2 = Avx.ConvertToVector128Single(op2); - // return tmp1.WithUpper(tmp2); + return result; +} - var_types opBaseType = TYP_DOUBLE; +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdCreateAlternatingSequenceNode: Creates a new simd CreateAlternatingSequence node +// +// Arguments: +// type - The return type of SIMD node being created +// op1 - The even-indexed value +// op2 - The odd-indexed value +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// +// Returns: +// The created CreateAlternatingSequence node +// +GenTree* Compiler::gtNewSimdCreateAlternatingSequenceNode( + var_types type, GenTree* op1, GenTree* op2, var_types simdBaseType, unsigned simdSize) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + assert(varTypeIsArithmetic(simdBaseType)); - tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_AVX_ConvertToVector128Single, opBaseType, simdSize); - tmp2 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_AVX_ConvertToVector128Single, opBaseType, simdSize); + // Fold constant pairs directly. Otherwise build two broadcasts and zip them, except where + // the target has a better way to broadcast the two-lane pattern directly. - tmp1 = gtNewSimdHWIntrinsicNode(type, tmp1, NI_Vector128_ToVector256Unsafe, simdBaseType, 16); - return gtNewSimdWithUpperNode(type, tmp1, tmp2, simdBaseType, simdSize); - } + uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); - default: - { - unreached(); - } - } - } - else + if (simdCount == 1) { - assert(simdSize == 16); + GenTree* result = gtNewSimdCreateBroadcastNode(type, op1, simdBaseType, simdSize); + return gtWrapWithSideEffects(result, op2, GTF_ALL_EFFECT); + } - switch (simdBaseType) - { - case TYP_BYTE: - case TYP_UBYTE: - { - // op1 = Elements 0, 1, 2, 3, 4, 5, 6, 7; 0L, 0U, 1L, 1U, 2L, 2U, 3L, 3U, 4L, 4U, 5L, 5U, 6L, 6U, 7L, 7U - // op2 = Elements 8, 9, A, B, C, D, E, F; 8L, 8U, 9L, 9U, AL, AU, BL, BU, CL, CU, DL, DU, EL, EU, FL, FU - // - // tmp2 = Elements 0L, --, 1L, --, 2L, --, 3L, --, 4L, --, 5L, --, 6L, --, 7L, -- - // tmp3 = Elements 8L, --, 9L, --, AL, --, BL, --, CL, --, DL, --, EL, --, FL, -- - // return Elements 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, AL, BL, CL, DL, EL, FL - // - // var vcns = Vector128.Create((ushort)(0x00FF)).AsSByte(); - // var tmp1 = Sse2.And(op1.AsSByte(), vcns); - // var tmp2 = Sse2.And(op2.AsSByte(), vcns); - // return Sse2.PackUnsignedSaturate(tmp1, tmp2).As(); + if (op1->OperIsConst() && op2->OperIsConst()) + { + GenTreeVecCon* vecCon = gtNewVconNode(type); - GenTreeVecCon* vecCon1 = gtNewVconNode(type); + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + uint64_t even = static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t odd = static_cast(op2->AsIntConCommon()->IntegralValue()); - for (unsigned i = 0; i < (simdSize / 8); i++) + for (uint32_t index = 0; index < simdCount; index++) { - vecCon1->gtSimdVal.u64[i] = 0x00FF00FF00FF00FF; + vecCon->gtSimdVal.u8[index] = static_cast(((index & 1) == 0) ? even : odd); } - - GenTree* vecCon2 = gtCloneExpr(vecCon1); - - tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseType, simdSize); - tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseType, simdSize); - - return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_PackUnsignedSaturate, TYP_UBYTE, simdSize); + break; } case TYP_SHORT: case TYP_USHORT: { - // op1 = Elements 0, 1, 2, 3; 0L, 0U, 1L, 1U, 2L, 2U, 3L, 3U - // op2 = Elements 4, 5, 6, 7; 4L, 4U, 5L, 5U, 6L, 6U, 7L, 7U - // - // tmp2 = Elements 0L, --, 1L, --, 2L, --, 3L, -- - // tmp3 = Elements 4L, --, 5L, --, 6L, --, 7L, -- - // return Elements 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L - // - // var vcns = Vector128.Create(0x0000FFFF).AsInt16(); - // var tmp1 = Sse2.And(op1.AsInt16(), vcns); - // var tmp2 = Sse2.And(op2.AsInt16(), vcns); - // return Sse2.PackUnsignedSaturate(tmp1, tmp2).As(); - - GenTreeVecCon* vecCon1 = gtNewVconNode(type); + uint64_t even = static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t odd = static_cast(op2->AsIntConCommon()->IntegralValue()); - for (unsigned i = 0; i < (simdSize / 8); i++) + for (uint32_t index = 0; index < simdCount; index++) { - vecCon1->gtSimdVal.u64[i] = 0x0000FFFF0000FFFF; + vecCon->gtSimdVal.u16[index] = static_cast(((index & 1) == 0) ? even : odd); } - - GenTree* vecCon2 = gtCloneExpr(vecCon1); - - tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseType, simdSize); - tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseType, simdSize); - - return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_PackUnsignedSaturate, TYP_USHORT, - simdSize); + break; } case TYP_INT: case TYP_UINT: { - // op1 = Elements 0, 1; 0L, 0U, 1L, 1U - // op2 = Elements 2, 3; 2L, 2U, 3L, 3U - // - // tmp1 = Elements 0L, 2L, 0U, 2U - // tmp2 = Elements 1L, 3L, 1U, 3U - // return Elements 0L, 1L, 2L, 3L - // - // var tmp1 = Sse2.UnpackLow(op1.AsUInt32(), op2.AsUInt32()); - // var tmp2 = Sse2.UnpackHigh(op1.AsUInt32(), op2.AsUInt32()); - // return Sse2.UnpackLow(tmp1, tmp2).As(); + uint64_t even = static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t odd = static_cast(op2->AsIntConCommon()->IntegralValue()); - GenTree* op1Dup = fgMakeMultiUse(&op1); - GenTree* op2Dup = fgMakeMultiUse(&op2); + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u32[index] = static_cast(((index & 1) == 0) ? even : odd); + } + break; + } - tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_X86Base_UnpackLow, simdBaseType, simdSize); - tmp2 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_X86Base_UnpackHigh, simdBaseType, simdSize); + case TYP_LONG: + case TYP_ULONG: + { + uint64_t even = static_cast(op1->AsIntConCommon()->IntegralValue()); + uint64_t odd = static_cast(op2->AsIntConCommon()->IntegralValue()); - return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_UnpackLow, simdBaseType, simdSize); + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u64[index] = ((index & 1) == 0) ? even : odd; + } + break; } case TYP_FLOAT: { - // op1 = Elements 0, 1 - // op2 = Elements 2, 3 - // - // tmp1 = Elements 0, 1, -, - - // tmp1 = Elements 2, 3, -, - - // return Elements 0, 1, 2, 3 - // - // var tmp1 = Sse2.ConvertToVector128Single(op1); - // var tmp2 = Sse2.ConvertToVector128Single(op2); - // return Sse.MoveLowToHigh(tmp1, tmp2); + double even = op1->AsDblCon()->DconValue(); + double odd = op2->AsDblCon()->DconValue(); - var_types opBaseType = TYP_DOUBLE; + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.f32[index] = static_cast(((index & 1) == 0) ? even : odd); + } + break; + } - tmp1 = gtNewSimdHWIntrinsicNode(type, op1, NI_X86Base_ConvertToVector128Single, opBaseType, simdSize); - tmp2 = gtNewSimdHWIntrinsicNode(type, op2, NI_X86Base_ConvertToVector128Single, opBaseType, simdSize); + case TYP_DOUBLE: + { + double even = op1->AsDblCon()->DconValue(); + double odd = op2->AsDblCon()->DconValue(); - return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_MoveLowToHigh, simdBaseType, simdSize); + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.f64[index] = ((index & 1) == 0) ? even : odd; + } + break; } default: @@ -27080,55 +27008,127 @@ GenTree* Compiler::gtNewSimdNarrowNode( unreached(); } } + + return vecCon; } -#elif defined(TARGET_ARM64) - if (simdSize == 16) + +#if defined(TARGET_XARCH) + if (((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)) && + ((compOpportunisticallyDependsOn(InstructionSet_AVX2) && ((simdSize == 16) || (simdSize == 32))) || + (compOpportunisticallyDependsOn(InstructionSet_AVX512) && (simdSize == 64)))) { - if (varTypeIsFloating(simdBaseType)) - { - // var tmp1 = AdvSimd.Arm64.ConvertToSingleLower(op1); - // return AdvSimd.Arm64.ConvertToSingleUpper(tmp1, op2); + // var pattern = Vector128.CreateScalarUnsafe(op1).WithElement(1, op2); + // return Broadcast(pattern.AsInt64()).As(); - tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_AdvSimd_Arm64_ConvertToSingleLower, simdBaseType, 8); - return gtNewSimdHWIntrinsicNode(type, tmp1, op2, NI_AdvSimd_Arm64_ConvertToSingleUpper, simdBaseType, + GenTree* pattern = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, simdBaseType, 16); + pattern = gtNewSimdWithElementNode(TYP_SIMD16, pattern, gtNewIconNode(1), op2, simdBaseType, 16); + + if (simdSize == 64) + { + return gtNewSimdHWIntrinsicNode(type, pattern, NI_AVX512_BroadcastPairScalarToVector512, simdBaseType, simdSize); } - else - { - // var tmp1 = AdvSimd.ExtractNarrowingLower(op1); - // return AdvSimd.ExtractNarrowingUpper(tmp1, op2); - tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_AdvSimd_ExtractNarrowingLower, simdBaseType, 8); - return gtNewSimdHWIntrinsicNode(type, tmp1, op2, NI_AdvSimd_ExtractNarrowingUpper, simdBaseType, simdSize); - } + var_types broadcastBaseType = (simdBaseType == TYP_INT) ? TYP_LONG : TYP_ULONG; + NamedIntrinsic broadcast = + (simdSize == 16) ? NI_AVX2_BroadcastScalarToVector128 : NI_AVX2_BroadcastScalarToVector256; + return gtNewSimdHWIntrinsicNode(type, pattern, broadcast, broadcastBaseType, simdSize); } - else if (varTypeIsFloating(simdBaseType)) - { - // var tmp1 = op1.ToVector128Unsafe(); - // var tmp2 = AdvSimd.InsertScalar(tmp1, op2); - // return AdvSimd.Arm64.ConvertToSingleLower(tmp2); +#endif // TARGET_XARCH - var_types tmp2BaseType = TYP_DOUBLE; + GenTree* even = gtNewSimdCreateBroadcastNode(type, op1, simdBaseType, simdSize); + GenTree* odd = gtNewSimdCreateBroadcastNode(type, op2, simdBaseType, simdSize); - tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector64_ToVector128Unsafe, simdBaseType, simdSize); - tmp2 = gtNewSimdWithUpperNode(TYP_SIMD16, tmp1, op2, tmp2BaseType, 16); + return gtNewSimdZipNode(type, even, odd, simdBaseType, simdSize, false); +} - return gtNewSimdHWIntrinsicNode(type, tmp2, NI_AdvSimd_Arm64_ConvertToSingleLower, simdBaseType, simdSize); - } - else +//---------------------------------------------------------------------------------------------- +// Compiler::gtNewSimdGetSignSequenceNode: Creates a new simd SignSequence node +// +// Arguments: +// type - The return type of SIMD node being created +// simdBaseType - The base type of SIMD type of the intrinsic +// simdSize - The size of the SIMD type of the intrinsic +// +// Returns: +// The created SignSequence node +// +GenTree* Compiler::gtNewSimdGetSignSequenceNode(var_types type, var_types simdBaseType, unsigned simdSize) +{ + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + assert(varTypeIsArithmetic(simdBaseType)); + + GenTreeVecCon* vecCon = gtNewVconNode(type); + uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); + + switch (simdBaseType) { - // var tmp1 = op1.ToVector128Unsafe(); - // var tmp2 = tmp1.WithUpper(op2); - // return AdvSimd.ExtractNarrowingLower(tmp2); + case TYP_BYTE: + case TYP_UBYTE: + { + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u8[index] = ((index & 1) == 0) ? 1 : UINT8_MAX; + } + break; + } - tmp1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector64_ToVector128Unsafe, simdBaseType, simdSize); - tmp2 = gtNewSimdWithUpperNode(TYP_SIMD16, tmp1, op2, simdBaseType, 16); + case TYP_SHORT: + case TYP_USHORT: + { + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u16[index] = ((index & 1) == 0) ? 1 : UINT16_MAX; + } + break; + } - return gtNewSimdHWIntrinsicNode(type, tmp2, NI_AdvSimd_ExtractNarrowingLower, simdBaseType, simdSize); + case TYP_INT: + case TYP_UINT: + { + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u32[index] = ((index & 1) == 0) ? 1 : UINT32_MAX; + } + break; + } + + case TYP_LONG: + case TYP_ULONG: + { + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.u64[index] = ((index & 1) == 0) ? 1 : UINT64_MAX; + } + break; + } + + case TYP_FLOAT: + { + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.f32[index] = ((index & 1) == 0) ? 1.0f : -1.0f; + } + break; + } + + case TYP_DOUBLE: + { + for (uint32_t index = 0; index < simdCount; index++) + { + vecCon->gtSimdVal.f64[index] = ((index & 1) == 0) ? 1.0 : -1.0; + } + break; + } + + default: + { + unreached(); + } } -#else -#error Unsupported platform -#endif // !TARGET_XARCH && !TARGET_ARM64 + + return vecCon; } //---------------------------------------------------------------------------------------------- From db983ebc28e48b9c15720cb15e3e35b92fddd1c5 Mon Sep 17 00:00:00 2001 From: Steven He Date: Sun, 3 May 2026 23:49:59 +0900 Subject: [PATCH 11/24] Resolve feedbacks --- src/coreclr/jit/hwintrinsicxarch.cpp | 5 ----- .../System.Private.CoreLib/src/System/Numerics/Vector_1.cs | 5 ++++- .../src/System/Runtime/Intrinsics/Vector128_1.cs | 5 ++++- .../src/System/Runtime/Intrinsics/Vector256_1.cs | 5 ++++- .../src/System/Runtime/Intrinsics/Vector512_1.cs | 5 ++++- 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 7a1a220f5698ef..821bda88cf3223 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -2269,11 +2269,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - if (!impStackTop(1).val->OperIsConst() && varTypeIsFloating(simdBaseType)) - { - break; - } - if (!impStackTop(1).val->OperIsConst() && (simdSize == 32) && varTypeIsIntegral(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs index ef2e9f6bd2f55b..6744f5886ea099 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_1.cs @@ -164,7 +164,10 @@ public static Vector Indices } } - /// Gets a new with elements that alternate between one and negative one, starting with one. + /// + /// Gets a new with elements that alternate between one and negative one, starting with one; + /// for unsigned element types, the negative-one value is represented as all bits set. + /// /// The type of the vector () is not supported. public static Vector SignSequence { diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs index 39ca308a3cfc4e..b1a16747d3ee81 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs @@ -74,7 +74,10 @@ public static Vector128 Indices } } - /// Gets a new with elements that alternate between one and negative one, starting with one. + /// + /// Gets a new with elements that alternate between one and negative one, starting with one; + /// for unsigned element types, the negative-one value is represented as all bits set. + /// /// The type of the vector () is not supported. public static Vector128 SignSequence { diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs index 1f64922bc77eb1..1b0b371ab1c710 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs @@ -73,7 +73,10 @@ public static Vector256 Indices } } - /// Gets a new with elements that alternate between one and negative one, starting with one. + /// + /// Gets a new with elements that alternate between one and negative one, starting with one; + /// for unsigned element types, the negative-one value is represented as all bits set. + /// /// The type of the vector () is not supported. public static Vector256 SignSequence { diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs index abbd5a3f9561d3..cb0b30cc0d8906 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512_1.cs @@ -73,7 +73,10 @@ public static Vector512 Indices } } - /// Gets a new with elements that alternate between one and negative one, starting with one. + /// + /// Gets a new with elements that alternate between one and negative one, starting with one; + /// for unsigned element types, the negative-one value is represented as all bits set. + /// /// The type of the vector () is not supported. public static Vector512 SignSequence { From be101f187b84ec1464df792a5045363ff90a2669 Mon Sep 17 00:00:00 2001 From: Steven He Date: Mon, 4 May 2026 00:01:00 +0900 Subject: [PATCH 12/24] Address more feedback issues --- src/coreclr/jit/hwintrinsicarm64.cpp | 5 ----- .../src/System/Runtime/Intrinsics/Vector64_1.cs | 5 ++++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index a0085e9c807c98..7554c4b44b9c72 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1344,11 +1344,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - if (!impStackTop(1).val->OperIsConst() && varTypeIsFloating(simdBaseType)) - { - break; - } - if (varTypeIsLong(simdBaseType) && !impStackTop(1).val->OperIsConst() && (simdSize != 8)) { // TODO-ARM64-CQ: We should support long/ulong multiplication. diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs index 5818dce386f591..9df3a302d09f3c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs @@ -73,7 +73,10 @@ public static Vector64 Indices } } - /// Gets a new with elements that alternate between one and negative one, starting with one. + /// + /// Gets a new with elements that alternate between one and negative one, starting with one; + /// for unsigned element types, the negative-one value is represented as all bits set. + /// /// The type of the vector () is not supported. public static Vector64 SignSequence { From 0f9e7eab72c3c0c7b44fb9a6deb8f244d17f6d32 Mon Sep 17 00:00:00 2001 From: Steven He Date: Mon, 4 May 2026 00:16:26 +0900 Subject: [PATCH 13/24] Make sure op1 is evaluated before op2 --- src/coreclr/jit/gentree.cpp | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 53a2240a3b2b11..57ab6d788ba28a 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -26917,8 +26917,9 @@ GenTree* Compiler::gtNewSimdCreateAlternatingSequenceNode( if (simdCount == 1) { - GenTree* result = gtNewSimdCreateBroadcastNode(type, op1, simdBaseType, simdSize); - return gtWrapWithSideEffects(result, op2, GTF_ALL_EFFECT); + GenTree* result = gtNewSimdCreateBroadcastNode(type, op1, simdBaseType, simdSize); + GenTree* resultDup = fgMakeMultiUse(&result); + return gtNewOperNode(GT_COMMA, type, result, gtWrapWithSideEffects(resultDup, op2, GTF_ALL_EFFECT)); } if (op1->OperIsConst() && op2->OperIsConst()) @@ -27163,7 +27164,9 @@ GenTree* Compiler::gtNewSimdConcatNode(var_types type, if (simdCount == 1) { - return gtWrapWithSideEffects(op1, op2, GTF_ALL_EFFECT); + GenTree* result = op1; + GenTree* resultDup = fgMakeMultiUse(&result); + return gtNewOperNode(GT_COMMA, type, result, gtWrapWithSideEffects(resultDup, op2, GTF_ALL_EFFECT)); } #if defined(TARGET_ARM64) @@ -27222,7 +27225,7 @@ GenTree* Compiler::gtNewSimdConcatNode(var_types type, #endif // !TARGET_XARCH && !TARGET_ARM64 { #if defined(TARGET_XARCH) - if ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)) + if ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT) || (simdBaseType == TYP_FLOAT)) { // return Sse.Shuffle(op1.AsSingle(), op2.AsSingle(), immediate).As(); @@ -27354,7 +27357,9 @@ GenTree* Compiler::gtNewSimdZipNode( if (getSIMDVectorLength(simdSize, simdBaseType) == 1) { - return gtWrapWithSideEffects(op1, op2, GTF_ALL_EFFECT); + GenTree* result = op1; + GenTree* resultDup = fgMakeMultiUse(&result); + return gtNewOperNode(GT_COMMA, type, result, gtWrapWithSideEffects(resultDup, op2, GTF_ALL_EFFECT)); } #if defined(TARGET_XARCH) @@ -27442,9 +27447,15 @@ GenTree* Compiler::gtNewSimdUnzipNode( if (simdCount == 1) { - GenTree* result = odd ? gtNewZeroConNode(type) : op1; - result = gtWrapWithSideEffects(result, odd ? op1 : op2, GTF_ALL_EFFECT); - return odd ? gtWrapWithSideEffects(result, op2, GTF_ALL_EFFECT) : result; + if (odd) + { + GenTree* result = gtWrapWithSideEffects(gtNewZeroConNode(type), op2, GTF_ALL_EFFECT); + return gtWrapWithSideEffects(result, op1, GTF_ALL_EFFECT); + } + + GenTree* result = op1; + GenTree* resultDup = fgMakeMultiUse(&result); + return gtNewOperNode(GT_COMMA, type, result, gtWrapWithSideEffects(resultDup, op2, GTF_ALL_EFFECT)); } #if defined(TARGET_ARM64) @@ -27455,7 +27466,7 @@ GenTree* Compiler::gtNewSimdUnzipNode( #elif defined(TARGET_XARCH) if (simdSize == 16) { - if ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT)) + if ((simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT) || (simdBaseType == TYP_FLOAT)) { // return Sse.Shuffle(op1.AsSingle(), op2.AsSingle(), odd ? 0xDD : 0x88).As(); From 76b04d2d29068dbbbb301fb0e1cb5cf5fe6ce3bf Mon Sep 17 00:00:00 2001 From: Steven He Date: Mon, 4 May 2026 00:16:38 +0900 Subject: [PATCH 14/24] Relax the gate --- src/coreclr/jit/hwintrinsicxarch.cpp | 36 +++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 821bda88cf3223..ca314dc4629550 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -4242,9 +4242,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 2); - if ((simdSize == 16) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + if (simdSize == 16) { - break; + bool supportsX86BaseShuffle = + (simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT) || (simdBaseType == TYP_FLOAT); + + if (!supportsX86BaseShuffle && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } } op2 = impSIMDPopStack(); @@ -4296,9 +4302,23 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 2); - if (!compOpportunisticallyDependsOn(InstructionSet_AVX2)) + if (simdSize == 16) { - break; + bool supportsX86BaseShuffle = + (simdBaseType == TYP_INT) || (simdBaseType == TYP_UINT) || (simdBaseType == TYP_FLOAT); + + if (!supportsX86BaseShuffle && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + break; + } + } + else if (simdSize > 16) + { + if (!compOpportunisticallyDependsOn(varTypeIsFloating(simdBaseType) ? InstructionSet_AVX + : InstructionSet_AVX2)) + { + break; + } } op2 = impSIMDPopStack(); @@ -4316,9 +4336,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); - if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) + if (simdSize == 32) { - break; + if (!compOpportunisticallyDependsOn(varTypeIsFloating(simdBaseType) ? InstructionSet_AVX + : InstructionSet_AVX2)) + { + break; + } } if ((simdSize == 64) && varTypeIsByte(simdBaseType) && From 2439729a2c5e1e4feca50eb6fb70e69ca73bf2d6 Mon Sep 17 00:00:00 2001 From: Steven He Date: Mon, 4 May 2026 00:16:58 +0900 Subject: [PATCH 15/24] Fix test float-point number comparison --- .../tests/GenericVectorTests.cs | 4 +-- .../tests/Vectors/Vector128Tests.cs | 6 ++-- .../tests/Vectors/Vector256Tests.cs | 6 ++-- .../tests/Vectors/Vector512Tests.cs | 6 ++-- .../tests/Vectors/Vector64Tests.cs | 32 +++++++++++++++++-- 5 files changed, 40 insertions(+), 14 deletions(-) diff --git a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs index acc586a3ccaadc..5e2af2151c2a1d 100644 --- a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs +++ b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs @@ -4563,7 +4563,7 @@ public void CreateHarmonicSequenceDoubleTest() for (int index = 0; index < Vector.Count; index++) { - Assert.Equal(1.0 / expected, sequence.GetElement(index)); + AssertExtensions.Equal(1.0 / expected, sequence.GetElement(index), 1e-15); expected += 1.0; } } @@ -4576,7 +4576,7 @@ public void CreateCauchySequenceDoubleTest() for (int index = 0; index < Vector.Count; index++) { - Assert.Equal(Math.Sqrt(expected), sequence.GetElement(index)); + AssertExtensions.Equal(Math.Sqrt(expected), sequence.GetElement(index), 1e-15); expected += 1.0; } } diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs index 599e74a3463f9e..c8f358e8c32374 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; @@ -5337,7 +5337,7 @@ public void CreateHarmonicSequenceDoubleTest() for (int index = 0; index < Vector128.Count; index++) { - Assert.Equal(1.0 / expected, sequence.GetElement(index)); + AssertExtensions.Equal(1.0 / expected, sequence.GetElement(index), 1e-15); expected += 1.0; } } @@ -5350,7 +5350,7 @@ public void CreateCauchySequenceDoubleTest() for (int index = 0; index < Vector128.Count; index++) { - Assert.Equal(Math.Sqrt(expected), sequence.GetElement(index)); + AssertExtensions.Equal(Math.Sqrt(expected), sequence.GetElement(index), 1e-15); expected += 1.0; } } diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs index 9656040071ae69..0dea01225c59f1 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; @@ -6513,7 +6513,7 @@ public void CreateHarmonicSequenceDoubleTest() for (int index = 0; index < Vector256.Count; index++) { - Assert.Equal(1.0 / expected, sequence.GetElement(index)); + AssertExtensions.Equal(1.0 / expected, sequence.GetElement(index), 1e-15); expected += 1.0; } } @@ -6526,7 +6526,7 @@ public void CreateCauchySequenceDoubleTest() for (int index = 0; index < Vector256.Count; index++) { - Assert.Equal(Math.Sqrt(expected), sequence.GetElement(index)); + AssertExtensions.Equal(Math.Sqrt(expected), sequence.GetElement(index), 1e-15); expected += 1.0; } } diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs index 73f5529810e721..b18e6ac7b9635e 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; @@ -6296,7 +6296,7 @@ public void CreateHarmonicSequenceDoubleTest() for (int index = 0; index < Vector512.Count; index++) { - Assert.Equal(1.0 / expected, sequence.GetElement(index)); + AssertExtensions.Equal(1.0 / expected, sequence.GetElement(index), 1e-15); expected += 1.0; } } @@ -6309,7 +6309,7 @@ public void CreateCauchySequenceDoubleTest() for (int index = 0; index < Vector512.Count; index++) { - Assert.Equal(Math.Sqrt(expected), sequence.GetElement(index)); + AssertExtensions.Equal(Math.Sqrt(expected), sequence.GetElement(index), 1e-15); expected += 1.0; } } diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs index fa332d75809c4e..14b0268aa704e1 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; @@ -4611,7 +4611,7 @@ public void CreateHarmonicSequenceDoubleTest() for (int index = 0; index < Vector64.Count; index++) { - Assert.Equal(1.0 / expected, sequence.GetElement(index)); + AssertExtensions.Equal(1.0 / expected, sequence.GetElement(index), 1e-15); expected += 1.0; } } @@ -4624,7 +4624,7 @@ public void CreateCauchySequenceDoubleTest() for (int index = 0; index < Vector64.Count; index++) { - Assert.Equal(Math.Sqrt(expected), sequence.GetElement(index)); + AssertExtensions.Equal(Math.Sqrt(expected), sequence.GetElement(index), 1e-15); expected += 1.0; } } @@ -4671,6 +4671,32 @@ public void LaneOperationsInt32Test() AssertVectorEqual(CreateVector64(index => left.GetElement(count - 1 - index)), Vector64.Reverse(left)); } + [Fact] + public void LaneOperationsInt64CountOneTest() + { + Vector64 left = Vector64.Create(10L); + Vector64 right = Vector64.Create(20L); + + AssertVectorEqual(left, Vector64.ZipLower(left, right)); + AssertVectorEqual(left, Vector64.ZipUpper(left, right)); + + (Vector64 lower, Vector64 upper) = Vector64.Zip(left, right); + AssertVectorEqual(left, lower); + AssertVectorEqual(left, upper); + + AssertVectorEqual(left, Vector64.UnzipEven(left, right)); + AssertVectorEqual(Vector64.Zero, Vector64.UnzipOdd(left, right)); + + (Vector64 even, Vector64 odd) = Vector64.Unzip(left, right); + AssertVectorEqual(left, even); + AssertVectorEqual(Vector64.Zero, odd); + + AssertVectorEqual(left, Vector64.ConcatLowerLower(left, right)); + AssertVectorEqual(left, Vector64.ConcatUpperLower(left, right)); + AssertVectorEqual(left, Vector64.ConcatUpperUpper(left, right)); + AssertVectorEqual(left, Vector64.ConcatLowerUpper(left, right)); + } + private static Vector64 CreateVector64(Func elementSelector) { int[] values = new int[Vector64.Count]; From 762a59768155807b6658498d9c639adf3cd38cff Mon Sep 17 00:00:00 2001 From: Steven He Date: Mon, 4 May 2026 00:27:33 +0900 Subject: [PATCH 16/24] Meh --- .../System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs | 2 +- .../System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs | 2 +- .../System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs | 2 +- .../System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs index c8f358e8c32374..606448c8c49004 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs index 0dea01225c59f1..c9f32f5d1d4515 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs index b18e6ac7b9635e..ac4804cb8586e8 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs index 14b0268aa704e1..517490b831b1af 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; From 28337a944b65dae7ccde15f22c40f2fd45c088dd Mon Sep 17 00:00:00 2001 From: Steven He Date: Mon, 4 May 2026 00:46:31 +0900 Subject: [PATCH 17/24] Minor enhancements --- .../System/Runtime/Intrinsics/Vector256.cs | 33 +++++++------- .../System/Runtime/Intrinsics/Vector512.cs | 44 +++++++++---------- 2 files changed, 35 insertions(+), 42 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index a758fcb983a313..27cba5857f241f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -1671,25 +1671,22 @@ public static Vector256 CreateGeometricSequence(T initial, [ConstantExpect if (Vector128.Count >= 2) { - T multiplier2 = Scalar.Multiply(multiplier, multiplier); - upperMultiplier = multiplier2; + upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); + } - if (Vector128.Count >= 4) - { - T multiplier4 = Scalar.Multiply(multiplier2, multiplier2); - upperMultiplier = multiplier4; - - if (Vector128.Count >= 8) - { - T multiplier8 = Scalar.Multiply(multiplier4, multiplier4); - upperMultiplier = multiplier8; - - if (Vector128.Count >= 16) - { - upperMultiplier = Scalar.Multiply(multiplier8, multiplier8); - } - } - } + if (Vector128.Count >= 4) + { + upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); + } + + if (Vector128.Count >= 8) + { + upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); + } + + if (Vector128.Count >= 16) + { + upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); } T upperInitial = Scalar.Multiply(initial, upperMultiplier); diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs index 77fd2af26f4c13..cb0a7b8ab9f284 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs @@ -1704,31 +1704,27 @@ public static Vector512 CreateGeometricSequence(T initial, [ConstantExpect if (Vector256.Count >= 2) { - T multiplier2 = Scalar.Multiply(multiplier, multiplier); - upperMultiplier = multiplier2; + upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); + } - if (Vector256.Count >= 4) - { - T multiplier4 = Scalar.Multiply(multiplier2, multiplier2); - upperMultiplier = multiplier4; - - if (Vector256.Count >= 8) - { - T multiplier8 = Scalar.Multiply(multiplier4, multiplier4); - upperMultiplier = multiplier8; - - if (Vector256.Count >= 16) - { - T multiplier16 = Scalar.Multiply(multiplier8, multiplier8); - upperMultiplier = multiplier16; - - if (Vector256.Count >= 32) - { - upperMultiplier = Scalar.Multiply(multiplier16, multiplier16); - } - } - } - } + if (Vector256.Count >= 4) + { + upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); + } + + if (Vector256.Count >= 8) + { + upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); + } + + if (Vector256.Count >= 16) + { + upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); + } + + if (Vector256.Count >= 32) + { + upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); } T upperInitial = Scalar.Multiply(initial, upperMultiplier); From 345bac87de93c859a3c80326761a8669f0fc2eca Mon Sep 17 00:00:00 2001 From: Steven He Date: Mon, 4 May 2026 01:36:22 +0900 Subject: [PATCH 18/24] Address exception document issues --- .../System.Private.CoreLib/src/System/Numerics/Vector.cs | 2 ++ .../src/System/Runtime/Intrinsics/Vector128.cs | 1 + .../src/System/Runtime/Intrinsics/Vector256.cs | 1 + .../src/System/Runtime/Intrinsics/Vector512.cs | 1 + .../src/System/Runtime/Intrinsics/Vector64.cs | 1 + 5 files changed, 6 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs index 3672c452de311f..416ad3cf608500 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs @@ -936,6 +936,7 @@ public static Vector CreateAlternatingSequence(T even, T odd) /// The value that element 0 of the arithmetic sequence will be initialized to. /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. /// A new instance whose elements are initialized to the square root of the corresponding element of the arithmetic sequence. + /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector CreateCauchySequence(T start, T step) => SquareRoot(CreateSequence(start, step)); @@ -3206,6 +3207,7 @@ public static (Vector Sin, Vector Cos) SinCos(Vector vector /// The vector whose square root is to be computed. /// The type of the elements in the vector. /// A vector whose elements are the square root of the corresponding elements in . + /// The type of () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector SquareRoot(Vector value) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index c9bb5a48f9aa83..dffbbec75917c6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -1635,6 +1635,7 @@ public static Vector128 CreateAlternatingSequence(T even, T odd) /// The value that element 0 of the arithmetic sequence will be initialized to. /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. /// A new instance whose elements are initialized to the square root of the corresponding element of the arithmetic sequence. + /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 CreateCauchySequence(T start, T step) => Sqrt(CreateSequence(start, step)); diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 27cba5857f241f..9f342aa3c82501 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -1741,6 +1741,7 @@ public static Vector256 CreateHarmonicSequence(T start, T step) /// The value that element 0 of the arithmetic sequence will be initialized to. /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. /// A new instance whose elements are initialized to the square root of the corresponding element of the arithmetic sequence. + /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateCauchySequence(T start, T step) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs index cb0a7b8ab9f284..3fcdf9849707ae 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs @@ -1779,6 +1779,7 @@ public static Vector512 CreateHarmonicSequence(T start, T step) /// The value that element 0 of the arithmetic sequence will be initialized to. /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. /// A new instance whose elements are initialized to the square root of the corresponding element of the arithmetic sequence. + /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 CreateCauchySequence(T start, T step) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index c449a7128f30db..06499db6daf8de 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -1443,6 +1443,7 @@ public static Vector64 CreateAlternatingSequence(T even, T odd) /// The value that element 0 of the arithmetic sequence will be initialized to. /// The value that indicates how far apart each element of the arithmetic sequence should be from the previous. /// A new instance whose elements are initialized to the square root of the corresponding element of the arithmetic sequence. + /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 CreateCauchySequence(T start, T step) => Sqrt(CreateSequence(start, step)); From 9cf287f3ee3075544108b1cdeb5271f4d94db062 Mon Sep 17 00:00:00 2001 From: Steven He Date: Mon, 4 May 2026 01:41:42 +0900 Subject: [PATCH 19/24] Exercise more code in tests --- .../tests/GenericVectorTests.cs | 22 ++++++++++++++++ .../tests/Vectors/Vector64Tests.cs | 26 +++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs index 5e2af2151c2a1d..2695a157671808 100644 --- a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs +++ b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs @@ -4592,6 +4592,28 @@ public void SignSequenceInt32Test() } } + [Fact] + public void SignSequenceUInt32Test() + { + Vector sequence = Vector.SignSequence; + + for (int index = 0; index < Vector.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1u : uint.MaxValue, sequence.GetElement(index)); + } + } + + [Fact] + public void SignSequenceSingleTest() + { + Vector sequence = Vector.SignSequence; + + for (int index = 0; index < Vector.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1.0f : -1.0f, sequence.GetElement(index)); + } + } + [Fact] public void LaneOperationsInt32Test() { diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs index 517490b831b1af..0926e3bcb58c3f 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs @@ -4697,6 +4697,32 @@ public void LaneOperationsInt64CountOneTest() AssertVectorEqual(left, Vector64.ConcatLowerUpper(left, right)); } + [Fact] + public void LaneOperationsDoubleCountOneTest() + { + Vector64 left = Vector64.Create(10.0); + Vector64 right = Vector64.Create(20.0); + + AssertVectorEqual(left, Vector64.ZipLower(left, right)); + AssertVectorEqual(left, Vector64.ZipUpper(left, right)); + + (Vector64 lower, Vector64 upper) = Vector64.Zip(left, right); + AssertVectorEqual(left, lower); + AssertVectorEqual(left, upper); + + AssertVectorEqual(left, Vector64.UnzipEven(left, right)); + AssertVectorEqual(Vector64.Zero, Vector64.UnzipOdd(left, right)); + + (Vector64 even, Vector64 odd) = Vector64.Unzip(left, right); + AssertVectorEqual(left, even); + AssertVectorEqual(Vector64.Zero, odd); + + AssertVectorEqual(left, Vector64.ConcatLowerLower(left, right)); + AssertVectorEqual(left, Vector64.ConcatUpperLower(left, right)); + AssertVectorEqual(left, Vector64.ConcatUpperUpper(left, right)); + AssertVectorEqual(left, Vector64.ConcatLowerUpper(left, right)); + } + private static Vector64 CreateVector64(Func elementSelector) { int[] values = new int[Vector64.Count]; From f2764ba5a8f26b7f371fee46ef7135c838b9fcd7 Mon Sep 17 00:00:00 2001 From: Steven He Date: Mon, 4 May 2026 01:49:52 +0900 Subject: [PATCH 20/24] Nit --- .../src/System/Runtime/Intrinsics/Vector256.cs | 8 ++------ .../src/System/Runtime/Intrinsics/Vector512.cs | 8 ++------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 9f342aa3c82501..d018b0f3d1c361 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -1706,12 +1706,8 @@ public static Vector256 CreateGeometricSequence(T initial, [ConstantExpect [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateAlternatingSequence(T even, T odd) { - Vector128 lower = Vector128.CreateAlternatingSequence(even, odd); - Vector128 upper = ((Vector128.Count & 1) == 0) - ? Vector128.CreateAlternatingSequence(even, odd) - : Vector128.CreateAlternatingSequence(odd, even); - - return Create(lower, upper); + Vector128 sequence = Vector128.CreateAlternatingSequence(even, odd); + return Create(sequence, sequence); } /// Creates a new instance whose elements are the reciprocal of an arithmetic sequence. diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs index 3fcdf9849707ae..3cb27b04d8b1f7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs @@ -1744,12 +1744,8 @@ public static Vector512 CreateGeometricSequence(T initial, [ConstantExpect [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 CreateAlternatingSequence(T even, T odd) { - Vector256 lower = Vector256.CreateAlternatingSequence(even, odd); - Vector256 upper = ((Vector256.Count & 1) == 0) - ? Vector256.CreateAlternatingSequence(even, odd) - : Vector256.CreateAlternatingSequence(odd, even); - - return Create(lower, upper); + Vector256 sequence = Vector256.CreateAlternatingSequence(even, odd); + return Create(sequence, sequence); } /// Creates a new instance whose elements are the reciprocal of an arithmetic sequence. From bd4adcbee732137039a0b06be64bf820f4690788 Mon Sep 17 00:00:00 2001 From: Steven He Date: Mon, 4 May 2026 02:02:33 +0900 Subject: [PATCH 21/24] Add more tests --- .../tests/Vectors/Vector128Tests.cs | 33 +++++++++++++++++++ .../tests/Vectors/Vector256Tests.cs | 33 +++++++++++++++++++ .../tests/Vectors/Vector512Tests.cs | 33 +++++++++++++++++++ .../tests/Vectors/Vector64Tests.cs | 33 +++++++++++++++++++ 4 files changed, 132 insertions(+) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs index 606448c8c49004..3f8e3387d31f38 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs @@ -5366,6 +5366,39 @@ public void SignSequenceInt32Test() } } + [Fact] + public void SignSequenceUInt32Test() + { + Vector128 sequence = Vector128.SignSequence; + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1u : uint.MaxValue, sequence.GetElement(index)); + } + } + + [Fact] + public void SignSequenceSingleTest() + { + Vector128 sequence = Vector128.SignSequence; + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1.0f : -1.0f, sequence.GetElement(index)); + } + } + + [Fact] + public void SignSequenceDoubleTest() + { + Vector128 sequence = Vector128.SignSequence; + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1.0 : -1.0, sequence.GetElement(index)); + } + } + [Fact] public void LaneOperationsInt32Test() { diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs index c9f32f5d1d4515..c10e5fab628c37 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs @@ -6542,6 +6542,39 @@ public void SignSequenceInt32Test() } } + [Fact] + public void SignSequenceUInt32Test() + { + Vector256 sequence = Vector256.SignSequence; + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1u : uint.MaxValue, sequence.GetElement(index)); + } + } + + [Fact] + public void SignSequenceSingleTest() + { + Vector256 sequence = Vector256.SignSequence; + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1.0f : -1.0f, sequence.GetElement(index)); + } + } + + [Fact] + public void SignSequenceDoubleTest() + { + Vector256 sequence = Vector256.SignSequence; + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1.0 : -1.0, sequence.GetElement(index)); + } + } + [Fact] public void LaneOperationsInt32Test() { diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs index ac4804cb8586e8..cdd4e6eb1598d8 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs @@ -6325,6 +6325,39 @@ public void SignSequenceInt32Test() } } + [Fact] + public void SignSequenceUInt32Test() + { + Vector512 sequence = Vector512.SignSequence; + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1u : uint.MaxValue, sequence.GetElement(index)); + } + } + + [Fact] + public void SignSequenceSingleTest() + { + Vector512 sequence = Vector512.SignSequence; + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1.0f : -1.0f, sequence.GetElement(index)); + } + } + + [Fact] + public void SignSequenceDoubleTest() + { + Vector512 sequence = Vector512.SignSequence; + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1.0 : -1.0, sequence.GetElement(index)); + } + } + [Fact] public void LaneOperationsInt32Test() { diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs index 0926e3bcb58c3f..f5f7160bbc082d 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs @@ -4640,6 +4640,39 @@ public void SignSequenceInt32Test() } } + [Fact] + public void SignSequenceUInt32Test() + { + Vector64 sequence = Vector64.SignSequence; + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1u : uint.MaxValue, sequence.GetElement(index)); + } + } + + [Fact] + public void SignSequenceSingleTest() + { + Vector64 sequence = Vector64.SignSequence; + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1.0f : -1.0f, sequence.GetElement(index)); + } + } + + [Fact] + public void SignSequenceDoubleTest() + { + Vector64 sequence = Vector64.SignSequence; + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1.0 : -1.0, sequence.GetElement(index)); + } + } + [Fact] public void LaneOperationsInt32Test() { From c5b38ca8e47f631484b5b45082a09176bae7ac8a Mon Sep 17 00:00:00 2001 From: Steven He Date: Mon, 4 May 2026 03:32:34 +0900 Subject: [PATCH 22/24] Fix floating-point multiplication association issue --- src/coreclr/jit/hwintrinsicarm64.cpp | 7 ++++ src/coreclr/jit/hwintrinsicxarch.cpp | 7 ++++ .../System/Runtime/Intrinsics/Vector256.cs | 23 ++----------- .../System/Runtime/Intrinsics/Vector512.cs | 28 ++-------------- .../tests/Vectors/Vector128Tests.cs | 18 ++++++++++ .../tests/Vectors/Vector256Tests.cs | 33 +++++++++++++++++++ .../tests/Vectors/Vector512Tests.cs | 33 +++++++++++++++++++ 7 files changed, 104 insertions(+), 45 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 7554c4b44b9c72..9a3b47ac45de47 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1344,6 +1344,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + if (varTypeIsFloating(simdBaseType) && !impStackTop(1).val->OperIsConst() && + (getSIMDVectorLength(simdSize, simdBaseType) > 2)) + { + // Floating-point multiplication is not associative; use the fallback to preserve recurrence order. + break; + } + if (varTypeIsLong(simdBaseType) && !impStackTop(1).val->OperIsConst() && (simdSize != 8)) { // TODO-ARM64-CQ: We should support long/ulong multiplication. diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index ca314dc4629550..c82264f387ccd3 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -2269,6 +2269,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + if (varTypeIsFloating(simdBaseType) && !impStackTop(1).val->OperIsConst() && + (getSIMDVectorLength(simdSize, simdBaseType) > 2)) + { + // Floating-point multiplication is not associative; use the fallback to preserve recurrence order. + break; + } + if (!impStackTop(1).val->OperIsConst() && (simdSize == 32) && varTypeIsIntegral(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX2)) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index d018b0f3d1c361..77e5bd1e499cfa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -1667,30 +1667,13 @@ public static Vector256 CreateScalarUnsafe(T value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateGeometricSequence(T initial, [ConstantExpected] T multiplier) { - T upperMultiplier = multiplier; + T upperInitial = initial; - if (Vector128.Count >= 2) + for (int index = 0; index < Vector128.Count; index++) { - upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); + upperInitial = Scalar.Multiply(upperInitial, multiplier); } - if (Vector128.Count >= 4) - { - upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); - } - - if (Vector128.Count >= 8) - { - upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); - } - - if (Vector128.Count >= 16) - { - upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); - } - - T upperInitial = Scalar.Multiply(initial, upperMultiplier); - return Create( Vector128.CreateGeometricSequence(initial, multiplier), Vector128.CreateGeometricSequence(upperInitial, multiplier) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs index 3cb27b04d8b1f7..483633c140dc2e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs @@ -1700,35 +1700,13 @@ public static Vector512 CreateScalarUnsafe(T value) [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 CreateGeometricSequence(T initial, [ConstantExpected] T multiplier) { - T upperMultiplier = multiplier; + T upperInitial = initial; - if (Vector256.Count >= 2) + for (int index = 0; index < Vector256.Count; index++) { - upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); + upperInitial = Scalar.Multiply(upperInitial, multiplier); } - if (Vector256.Count >= 4) - { - upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); - } - - if (Vector256.Count >= 8) - { - upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); - } - - if (Vector256.Count >= 16) - { - upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); - } - - if (Vector256.Count >= 32) - { - upperMultiplier = Scalar.Multiply(upperMultiplier, upperMultiplier); - } - - T upperInitial = Scalar.Multiply(initial, upperMultiplier); - return Create( Vector256.CreateGeometricSequence(initial, multiplier), Vector256.CreateGeometricSequence(upperInitial, multiplier) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs index 3f8e3387d31f38..ab9e3c93d98624 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs @@ -5318,6 +5318,21 @@ public void CreateGeometricSequenceInt32Test() } } + [Fact] + public void CreateGeometricSequenceSingleNonConstantInitialTest() + { + const float multiplier = 1.0064822f; + float initial = GetNonConstant(1.0059024f); + Vector128 sequence = Vector128.CreateGeometricSequence(initial, multiplier); + float expected = initial; + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal(expected, sequence.GetElement(index)); + expected *= multiplier; + } + } + [Fact] public void CreateAlternatingSequenceInt32Test() { @@ -5451,6 +5466,9 @@ private static void AssertVectorEqual(Vector128 expected, Vector128 act } } + [MethodImpl(MethodImplOptions.NoInlining)] + private static T GetNonConstant(T value) => value; + [Theory] [MemberData(nameof(GenericMathTestMemberData.AsinDouble), MemberType = typeof(GenericMathTestMemberData))] public void AsinDoubleTest(double value, double expectedResult, double variance) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs index c10e5fab628c37..33649743726a39 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs @@ -6494,6 +6494,36 @@ public void CreateGeometricSequenceInt32Test() } } + [Fact] + public void CreateGeometricSequenceSingleNonConstantInitialTest() + { + const float multiplier = 1.0064822f; + float initial = GetNonConstant(1.0059024f); + Vector256 sequence = Vector256.CreateGeometricSequence(initial, multiplier); + float expected = initial; + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal(expected, sequence.GetElement(index)); + expected *= multiplier; + } + } + + [Fact] + public void CreateGeometricSequenceDoubleNonConstantInitialTest() + { + const double multiplier = 1e-50; + double initial = GetNonConstant(1e-154); + Vector256 sequence = Vector256.CreateGeometricSequence(initial, multiplier); + double expected = initial; + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal(expected, sequence.GetElement(index)); + expected *= multiplier; + } + } + [Fact] public void CreateAlternatingSequenceInt32Test() { @@ -6627,6 +6657,9 @@ private static void AssertVectorEqual(Vector256 expected, Vector256 act } } + [MethodImpl(MethodImplOptions.NoInlining)] + private static T GetNonConstant(T value) => value; + [Theory] [MemberData(nameof(GenericMathTestMemberData.AsinDouble), MemberType = typeof(GenericMathTestMemberData))] public void AsinDoubleTest(double value, double expectedResult, double variance) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs index cdd4e6eb1598d8..d020b66f60c2c1 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs @@ -6277,6 +6277,36 @@ public void CreateGeometricSequenceInt32Test() } } + [Fact] + public void CreateGeometricSequenceSingleNonConstantInitialTest() + { + const float multiplier = 1.0064822f; + float initial = GetNonConstant(1.0059024f); + Vector512 sequence = Vector512.CreateGeometricSequence(initial, multiplier); + float expected = initial; + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal(expected, sequence.GetElement(index)); + expected *= multiplier; + } + } + + [Fact] + public void CreateGeometricSequenceDoubleNonConstantInitialTest() + { + const double multiplier = 1e-50; + double initial = GetNonConstant(1e-154); + Vector512 sequence = Vector512.CreateGeometricSequence(initial, multiplier); + double expected = initial; + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal(expected, sequence.GetElement(index)); + expected *= multiplier; + } + } + [Fact] public void CreateAlternatingSequenceInt32Test() { @@ -6410,6 +6440,9 @@ private static void AssertVectorEqual(Vector512 expected, Vector512 act } } + [MethodImpl(MethodImplOptions.NoInlining)] + private static T GetNonConstant(T value) => value; + [Theory] [MemberData(nameof(GenericMathTestMemberData.AsinDouble), MemberType = typeof(GenericMathTestMemberData))] public void AsinDoubleTest(double value, double expectedResult, double variance) From 8cb01870a849a5652f38133ae2d4a11af38d1031 Mon Sep 17 00:00:00 2001 From: Steven He Date: Mon, 4 May 2026 03:47:13 +0900 Subject: [PATCH 23/24] Adding missing tests and comments --- .../tests/GenericVectorTests.cs | 11 +++++++++++ .../src/System/Numerics/Vector.cs | 1 + .../src/System/Runtime/Intrinsics/Vector128.cs | 1 + .../src/System/Runtime/Intrinsics/Vector256.cs | 1 + .../src/System/Runtime/Intrinsics/Vector512.cs | 1 + .../src/System/Runtime/Intrinsics/Vector64.cs | 1 + .../tests/Vectors/Vector128Tests.cs | 15 +++++++++++++++ 7 files changed, 31 insertions(+) diff --git a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs index 2695a157671808..ea809b1851c167 100644 --- a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs +++ b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs @@ -4614,6 +4614,17 @@ public void SignSequenceSingleTest() } } + [Fact] + public void SignSequenceDoubleTest() + { + Vector sequence = Vector.SignSequence; + + for (int index = 0; index < Vector.Count; index++) + { + Assert.Equal(((index & 1) == 0) ? 1.0 : -1.0, sequence.GetElement(index)); + } + } + [Fact] public void LaneOperationsInt32Test() { diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs index 416ad3cf608500..6dd60a9a58b994 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs @@ -884,6 +884,7 @@ public static Vector CreateScalarUnsafe(T value) /// The value that element 0 will be initialized to. /// The value that indicates how each element should be scaled from the previous. /// A new instance with the first element initialized to and each subsequent element initialized to the value of the previous element multiplied by . + /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector CreateGeometricSequence(T initial, [ConstantExpected] T multiplier) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index dffbbec75917c6..cbac312e44db73 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -1583,6 +1583,7 @@ public static Vector128 CreateScalarUnsafe(T value) /// The value that element 0 will be initialized to. /// The value that indicates how each element should be scaled from the previous. /// A new instance with the first element initialized to and each subsequent element initialized to the value of the previous element multiplied by . + /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 CreateGeometricSequence(T initial, [ConstantExpected] T multiplier) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 77e5bd1e499cfa..fbbf02c28f731b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -1663,6 +1663,7 @@ public static Vector256 CreateScalarUnsafe(T value) /// The value that element 0 will be initialized to. /// The value that indicates how each element should be scaled from the previous. /// A new instance with the first element initialized to and each subsequent element initialized to the value of the previous element multiplied by . + /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 CreateGeometricSequence(T initial, [ConstantExpected] T multiplier) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs index 483633c140dc2e..5b6bd969079906 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs @@ -1696,6 +1696,7 @@ public static Vector512 CreateScalarUnsafe(T value) /// The value that element 0 will be initialized to. /// The value that indicates how each element should be scaled from the previous. /// A new instance with the first element initialized to and each subsequent element initialized to the value of the previous element multiplied by . + /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 CreateGeometricSequence(T initial, [ConstantExpected] T multiplier) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index 06499db6daf8de..d25feac61ba53e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -1391,6 +1391,7 @@ public static Vector64 CreateScalarUnsafe(T value) /// The value that element 0 will be initialized to. /// The value that indicates how each element should be scaled from the previous. /// A new instance with the first element initialized to and each subsequent element initialized to the value of the previous element multiplied by . + /// The type of and () is not supported. [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector64 CreateGeometricSequence(T initial, [ConstantExpected] T multiplier) diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs index ab9e3c93d98624..ac20fe9f0adf7f 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs @@ -5333,6 +5333,21 @@ public void CreateGeometricSequenceSingleNonConstantInitialTest() } } + [Fact] + public void CreateGeometricSequenceDoubleNonConstantInitialTest() + { + const double multiplier = 1e-50; + double initial = GetNonConstant(1e-154); + Vector128 sequence = Vector128.CreateGeometricSequence(initial, multiplier); + double expected = initial; + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal(expected, sequence.GetElement(index)); + expected *= multiplier; + } + } + [Fact] public void CreateAlternatingSequenceInt32Test() { From 6296ef8cc2ebe9402d2f58929f824815c7e8c171 Mon Sep 17 00:00:00 2001 From: Steven He Date: Mon, 4 May 2026 12:05:45 +0900 Subject: [PATCH 24/24] Use alternating sequence to implement sign sequence --- src/coreclr/jit/compiler.h | 2 - src/coreclr/jit/gentree.cpp | 90 +--------------------------- src/coreclr/jit/hwintrinsicarm64.cpp | 8 ++- src/coreclr/jit/hwintrinsicxarch.cpp | 8 ++- 4 files changed, 15 insertions(+), 93 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f7d7a36ba23c94..25ca1a7e764016 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3452,8 +3452,6 @@ class Compiler GenTree* gtNewSimdGetIndicesNode(var_types type, var_types simdBaseType, unsigned simdSize); - GenTree* gtNewSimdGetSignSequenceNode(var_types type, var_types simdBaseType, unsigned simdSize); - GenTree* gtNewSimdGetLowerNode(var_types type, GenTree* op1, var_types simdBaseType, diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 57ab6d788ba28a..742094d4a0dd10 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -26917,6 +26917,7 @@ GenTree* Compiler::gtNewSimdCreateAlternatingSequenceNode( if (simdCount == 1) { + // Only the even-indexed value contributes to the result, but op2 still needs to be evaluated for side effects. GenTree* result = gtNewSimdCreateBroadcastNode(type, op1, simdBaseType, simdSize); GenTree* resultDup = fgMakeMultiUse(&result); return gtNewOperNode(GT_COMMA, type, result, gtWrapWithSideEffects(resultDup, op2, GTF_ALL_EFFECT)); @@ -27043,95 +27044,6 @@ GenTree* Compiler::gtNewSimdCreateAlternatingSequenceNode( return gtNewSimdZipNode(type, even, odd, simdBaseType, simdSize, false); } -//---------------------------------------------------------------------------------------------- -// Compiler::gtNewSimdGetSignSequenceNode: Creates a new simd SignSequence node -// -// Arguments: -// type - The return type of SIMD node being created -// simdBaseType - The base type of SIMD type of the intrinsic -// simdSize - The size of the SIMD type of the intrinsic -// -// Returns: -// The created SignSequence node -// -GenTree* Compiler::gtNewSimdGetSignSequenceNode(var_types type, var_types simdBaseType, unsigned simdSize) -{ - assert(varTypeIsSIMD(type)); - assert(getSIMDTypeForSize(simdSize) == type); - assert(varTypeIsArithmetic(simdBaseType)); - - GenTreeVecCon* vecCon = gtNewVconNode(type); - uint32_t simdCount = getSIMDVectorLength(simdSize, simdBaseType); - - switch (simdBaseType) - { - case TYP_BYTE: - case TYP_UBYTE: - { - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.u8[index] = ((index & 1) == 0) ? 1 : UINT8_MAX; - } - break; - } - - case TYP_SHORT: - case TYP_USHORT: - { - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.u16[index] = ((index & 1) == 0) ? 1 : UINT16_MAX; - } - break; - } - - case TYP_INT: - case TYP_UINT: - { - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.u32[index] = ((index & 1) == 0) ? 1 : UINT32_MAX; - } - break; - } - - case TYP_LONG: - case TYP_ULONG: - { - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.u64[index] = ((index & 1) == 0) ? 1 : UINT64_MAX; - } - break; - } - - case TYP_FLOAT: - { - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.f32[index] = ((index & 1) == 0) ? 1.0f : -1.0f; - } - break; - } - - case TYP_DOUBLE: - { - for (uint32_t index = 0; index < simdCount; index++) - { - vecCon->gtSimdVal.f64[index] = ((index & 1) == 0) ? 1.0 : -1.0; - } - break; - } - - default: - { - unreached(); - } - } - - return vecCon; -} - //---------------------------------------------------------------------------------------------- // Compiler::gtNewSimdConcatNode: Creates a new simd ConcatLowerLower/... node // diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 9a3b47ac45de47..ad82209de9c9c9 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1571,7 +1571,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector128_get_SignSequence: { assert(sig->numArgs == 0); - retNode = gtNewSimdGetSignSequenceNode(retType, simdBaseType, simdSize); + + var_types scalarType = genActualType(simdBaseType); + GenTree* one = gtNewOneConNode(scalarType); + GenTree* negativeOne = varTypeIsFloating(simdBaseType) ? gtNewDconNode(-1.0, simdBaseType) + : gtNewAllBitsSetConNode(scalarType); + + retNode = gtNewSimdCreateAlternatingSequenceNode(retType, one, negativeOne, simdBaseType, simdSize); break; } diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index c82264f387ccd3..cdf8dcda42186f 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -2608,7 +2608,13 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector512_get_SignSequence: { assert(sig->numArgs == 0); - retNode = gtNewSimdGetSignSequenceNode(retType, simdBaseType, simdSize); + + var_types scalarType = genActualType(simdBaseType); + GenTree* one = gtNewOneConNode(scalarType); + GenTree* negativeOne = varTypeIsFloating(simdBaseType) ? gtNewDconNode(-1.0, simdBaseType) + : gtNewAllBitsSetConNode(scalarType); + + retNode = gtNewSimdCreateAlternatingSequenceNode(retType, one, negativeOne, simdBaseType, simdSize); break; }