From ffaa18a0d09564875c74c17d9bac84e1096b57e2 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 20 Sep 2023 19:17:22 -0700 Subject: [PATCH 01/10] Added OperIsHWIntrinsicSIMDScalar. Do not remove CAST on SIMD scalar operations for stores. --- src/coreclr/jit/gentree.cpp | 35 +++++++++++++++++++++++++++++++++++ src/coreclr/jit/gentree.h | 2 ++ src/coreclr/jit/morph.cpp | 10 ++++++++++ 3 files changed, 47 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d6d524f375ea73..fa2072f8cc4486 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -26403,3 +26403,38 @@ bool GenTree::CanDivOrModPossiblyOverflow(Compiler* comp) const // Not enough known information; therefore we might overflow. return true; } + +//------------------------------------------------------------------------ +// OperIsHWIntrinsicSIMDScalar: returns true if the given tree is GT_HWINTRINSIC and +// is a SIMD scalar operation +// +// Return Value: +// true if the given tree is a hwintrinsic and a SIMD scalar operation +// +bool GenTree::OperIsHWIntrinsicSIMDScalar() +{ + if (!this->OperIsHWIntrinsic()) + return false; + +#if defined(FEATURE_HW_INTRINSICS) + GenTreeHWIntrinsic* hwintrinsic = this->AsHWIntrinsic(); + NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId(); + +#if defined(TARGET_AMD64) + HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(hwintrinsic->GetHWIntrinsicId()); + + switch (category) + { + case HW_Category_SIMDScalar: + return true; + + default: + break; + } +#elif defined(TARGET_ARM64) + return HWIntrinsicInfo::SIMDScalar(intrinsicId); +#endif // TARGET_ARM64 && !TARGET_AMD64 +#endif // FEATURE_HW_INTRINSICS + + return false; +} diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 109da6a15c30d8..c893f6ca8bd9d9 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1653,6 +1653,8 @@ struct GenTree bool OperIsHWIntrinsic(NamedIntrinsic intrinsicId) const; + bool OperIsHWIntrinsicSIMDScalar(); + // This is here for cleaner GT_LONG #ifdefs. static bool OperIsLong(genTreeOps gtOper) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 3deada8eec085b..2d5a548428f541 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -10096,6 +10096,11 @@ GenTree* Compiler::fgOptimizeCastOnStore(GenTree* store) if (!src->OperIs(GT_CAST)) return store; + // SIMD scalar operations may be specialized for stores, but not when the store expects a small type. + // Therefore, we keep the CAST. + if (src->gtGetOp1()->OperIsHWIntrinsicSIMDScalar() && varTypeIsSmall(store)) + return store; + if (store->OperIs(GT_STORE_LCL_VAR)) { LclVarDsc* varDsc = lvaGetDesc(store->AsLclVarCommon()->GetLclNum()); @@ -11804,6 +11809,11 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree, bool* optAssertionPropD if (op2->gtOper == GT_CAST && !op2->gtOverflow()) { + // SIMD scalar operations may be specialized for stores, but not when the store expects a small type. + // Therefore, we keep the CAST. + if (op2->gtGetOp1()->OperIsHWIntrinsicSIMDScalar() && varTypeIsSmall(tree)) + break; + var_types srct; var_types cast; var_types dstt; From af6d1bf6f41c684b33f91bc291d385d1c7dc45f2 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 20 Sep 2023 19:37:11 -0700 Subject: [PATCH 02/10] Minor cleanup --- src/coreclr/jit/gentree.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index fa2072f8cc4486..23b1d7d39a7d10 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -26421,9 +26421,7 @@ bool GenTree::OperIsHWIntrinsicSIMDScalar() NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId(); #if defined(TARGET_AMD64) - HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(hwintrinsic->GetHWIntrinsicId()); - - switch (category) + switch (HWIntrinsicInfo::lookupCategory(intrinsicId)) { case HW_Category_SIMDScalar: return true; From 8d1d10cf0ca34d6a7f6ca7bff6a612b6577a8701 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 20 Sep 2023 19:38:46 -0700 Subject: [PATCH 03/10] Minor cleanup --- src/coreclr/jit/gentree.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 23b1d7d39a7d10..873168ed497d69 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -26413,25 +26413,20 @@ bool GenTree::CanDivOrModPossiblyOverflow(Compiler* comp) const // bool GenTree::OperIsHWIntrinsicSIMDScalar() { +#if defined(FEATURE_HW_INTRINSICS) + if (!this->OperIsHWIntrinsic()) return false; -#if defined(FEATURE_HW_INTRINSICS) GenTreeHWIntrinsic* hwintrinsic = this->AsHWIntrinsic(); NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId(); #if defined(TARGET_AMD64) - switch (HWIntrinsicInfo::lookupCategory(intrinsicId)) - { - case HW_Category_SIMDScalar: - return true; - - default: - break; - } + return HWIntrinsicInfo::lookupCategory(intrinsicId) == HW_Category_SIMDScalar; #elif defined(TARGET_ARM64) return HWIntrinsicInfo::SIMDScalar(intrinsicId); #endif // TARGET_ARM64 && !TARGET_AMD64 + #endif // FEATURE_HW_INTRINSICS return false; From 807810c863143d35c07e07abafd0b91d6b7cc44b Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 21 Sep 2023 03:48:17 -0700 Subject: [PATCH 04/10] Feedback --- src/coreclr/jit/gentree.cpp | 28 ---------------------------- src/coreclr/jit/gentree.h | 2 -- src/coreclr/jit/lowerxarch.cpp | 2 +- src/coreclr/jit/morph.cpp | 10 ---------- 4 files changed, 1 insertion(+), 41 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 873168ed497d69..d6d524f375ea73 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -26403,31 +26403,3 @@ bool GenTree::CanDivOrModPossiblyOverflow(Compiler* comp) const // Not enough known information; therefore we might overflow. return true; } - -//------------------------------------------------------------------------ -// OperIsHWIntrinsicSIMDScalar: returns true if the given tree is GT_HWINTRINSIC and -// is a SIMD scalar operation -// -// Return Value: -// true if the given tree is a hwintrinsic and a SIMD scalar operation -// -bool GenTree::OperIsHWIntrinsicSIMDScalar() -{ -#if defined(FEATURE_HW_INTRINSICS) - - if (!this->OperIsHWIntrinsic()) - return false; - - GenTreeHWIntrinsic* hwintrinsic = this->AsHWIntrinsic(); - NamedIntrinsic intrinsicId = hwintrinsic->GetHWIntrinsicId(); - -#if defined(TARGET_AMD64) - return HWIntrinsicInfo::lookupCategory(intrinsicId) == HW_Category_SIMDScalar; -#elif defined(TARGET_ARM64) - return HWIntrinsicInfo::SIMDScalar(intrinsicId); -#endif // TARGET_ARM64 && !TARGET_AMD64 - -#endif // FEATURE_HW_INTRINSICS - - return false; -} diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index c893f6ca8bd9d9..109da6a15c30d8 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1653,8 +1653,6 @@ struct GenTree bool OperIsHWIntrinsic(NamedIntrinsic intrinsicId) const; - bool OperIsHWIntrinsicSIMDScalar(); - // This is here for cleaner GT_LONG #ifdefs. static bool OperIsLong(genTreeOps gtOper) { diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 319238aaec628f..c8c7d406908e61 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6504,7 +6504,7 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) case NI_AVX2_ConvertToUInt32: { // These intrinsics are "ins reg/mem, xmm" - isContainable = varTypeIsIntegral(simdBaseType); + isContainable = varTypeIsIntegral(simdBaseType) && (genTypeSize(src) == genTypeSize(node)); break; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 2d5a548428f541..3deada8eec085b 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -10096,11 +10096,6 @@ GenTree* Compiler::fgOptimizeCastOnStore(GenTree* store) if (!src->OperIs(GT_CAST)) return store; - // SIMD scalar operations may be specialized for stores, but not when the store expects a small type. - // Therefore, we keep the CAST. - if (src->gtGetOp1()->OperIsHWIntrinsicSIMDScalar() && varTypeIsSmall(store)) - return store; - if (store->OperIs(GT_STORE_LCL_VAR)) { LclVarDsc* varDsc = lvaGetDesc(store->AsLclVarCommon()->GetLclNum()); @@ -11809,11 +11804,6 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree, bool* optAssertionPropD if (op2->gtOper == GT_CAST && !op2->gtOverflow()) { - // SIMD scalar operations may be specialized for stores, but not when the store expects a small type. - // Therefore, we keep the CAST. - if (op2->gtGetOp1()->OperIsHWIntrinsicSIMDScalar() && varTypeIsSmall(tree)) - break; - var_types srct; var_types cast; var_types dstt; From a7e0e934b3ad957f925a50141bb614b0ae62b4e7 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 21 Sep 2023 13:03:38 -0700 Subject: [PATCH 05/10] Added test case --- .../JitBlue/Runtime_92349/Runtime_92349.cs | 24 +++++++++++++++++++ .../Runtime_92349/Runtime_92349.csproj | 9 +++++++ 2 files changed, 33 insertions(+) create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.csproj diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs b/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs new file mode 100644 index 00000000000000..27a029881c4ad0 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs @@ -0,0 +1,24 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Threading; +using Xunit; + +public static class Runtime_92349 +{ + [MethodImpl(MethodImplOptions.AggressiveOptimization)] + unsafe static void Test(byte* pValue) + { + *pValue = (byte)Sse2.ConvertToInt32(Vector128.Create(-10, 0, 0, 0)); + } + + [Fact] + unsafe static void EntryPoint() + { + ulong value = 0; + Test((byte*)Unsafe.AsPointer(ref value)); + Assert.True(value == 246); + } +} \ No newline at end of file diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.csproj new file mode 100644 index 00000000000000..6bb210527e0797 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.csproj @@ -0,0 +1,9 @@ + + + True + True + + + + + \ No newline at end of file From ba92e22726089965d2c9240a55ffb561a8c107b0 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Thu, 21 Sep 2023 15:18:48 -0700 Subject: [PATCH 06/10] Update Runtime_92349.cs --- .../JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs b/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs index 27a029881c4ad0..ddfe33cafffa4a 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs @@ -9,16 +9,16 @@ public static class Runtime_92349 { [MethodImpl(MethodImplOptions.AggressiveOptimization)] - unsafe static void Test(byte* pValue) + public unsafe static void Test(byte* pValue) { *pValue = (byte)Sse2.ConvertToInt32(Vector128.Create(-10, 0, 0, 0)); } [Fact] - unsafe static void EntryPoint() + public unsafe static void EntryPoint() { ulong value = 0; Test((byte*)Unsafe.AsPointer(ref value)); Assert.True(value == 246); } -} \ No newline at end of file +} From 83d2d4612c8d10207bc070c8552c987be48750d4 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Fri, 22 Sep 2023 11:17:17 -0700 Subject: [PATCH 07/10] Update Runtime_92349.cs --- src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs b/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs index ddfe33cafffa4a..69a82bdc6f588f 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. using System; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; using System.Runtime.CompilerServices; using System.Threading; using Xunit; From 6f96aa5fdef7b87c901ed7b5d8e46488fdb78f63 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Fri, 22 Sep 2023 12:30:57 -0700 Subject: [PATCH 08/10] Update Runtime_92349.cs --- src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs b/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs index 69a82bdc6f588f..79769714c394ab 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs @@ -11,7 +11,7 @@ public static class Runtime_92349 { [MethodImpl(MethodImplOptions.AggressiveOptimization)] - public unsafe static void Test(byte* pValue) + unsafe static void Test(byte* pValue) { *pValue = (byte)Sse2.ConvertToInt32(Vector128.Create(-10, 0, 0, 0)); } From 78a2165e22cc4aeba0cde030a9be27d7e23787ae Mon Sep 17 00:00:00 2001 From: Will Smith Date: Fri, 22 Sep 2023 14:29:14 -0700 Subject: [PATCH 09/10] Update Runtime_92349.cs --- .../Regression/JitBlue/Runtime_92349/Runtime_92349.cs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs b/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs index 79769714c394ab..5de0a28895b268 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_92349/Runtime_92349.cs @@ -19,8 +19,11 @@ unsafe static void Test(byte* pValue) [Fact] public unsafe static void EntryPoint() { - ulong value = 0; - Test((byte*)Unsafe.AsPointer(ref value)); - Assert.True(value == 246); + if (Sse2.IsSupported) + { + ulong value = 0; + Test((byte*)Unsafe.AsPointer(ref value)); + Assert.True(value == 246); + } } } From 11fe3ea667e95d313b8c90f066a21372d73487ff Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 26 Sep 2023 12:23:04 +0200 Subject: [PATCH 10/10] JIT: Fix containment of extract intrinsics as STOREIND sources Fix #92590 --- src/coreclr/jit/lowerxarch.cpp | 3 +- .../JitBlue/Runtime_92590/Runtime_92590.cs | 63 +++++++++++++++++++ .../Runtime_92590/Runtime_92590.csproj | 8 +++ 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_92590/Runtime_92590.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_92590/Runtime_92590.csproj diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index c8c7d406908e61..150ad04a55d99f 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6568,7 +6568,8 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) size_t numArgs = hwintrinsic->GetOperandCount(); GenTree* lastOp = hwintrinsic->Op(numArgs); - isContainable = HWIntrinsicInfo::isImmOp(intrinsicId, lastOp) && lastOp->IsCnsIntOrI(); + isContainable = HWIntrinsicInfo::isImmOp(intrinsicId, lastOp) && lastOp->IsCnsIntOrI() && + (genTypeSize(simdBaseType) == genTypeSize(node)); if (isContainable && (intrinsicId == NI_SSE2_Extract)) { diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_92590/Runtime_92590.cs b/src/tests/JIT/Regression/JitBlue/Runtime_92590/Runtime_92590.cs new file mode 100644 index 00000000000000..99a5ef2ee5d18d --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_92590/Runtime_92590.cs @@ -0,0 +1,63 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using Xunit; + +public class Runtime_92590 +{ + [Fact] + public static void TestEntryPoint() + { + Span bytes = stackalloc byte[4]; + bytes.Fill(0xff); + TestByteByte(ref bytes[0], 0, Vector256.Create((byte)1)); + + Assert.True(bytes.SequenceEqual(stackalloc byte[] { 0x2, 0xff, 0xff, 0xff })); + + bytes.Fill(0xff); + TestByteInt(ref bytes[0], 0, Vector256.Create(1)); + + Assert.True(bytes.SequenceEqual(stackalloc byte[] { 0x2, 0xff, 0xff, 0xff })); + + int i = int.MaxValue; + TestIntByte(ref i, 0, Vector256.Create((byte)1)); + + Assert.Equal(2, i); + + i = int.MaxValue; + TestIntInt(ref i, 0, Vector256.Create(1)); + + Assert.Equal(2, i); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void TestByteByte(ref byte b, int x, Vector256 vin) + { + Vector256 v = vin + vin; + Unsafe.Add(ref b, x) = v[3]; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void TestByteInt(ref byte b, int x, Vector256 vin) + { + Vector256 v = vin + vin; + Unsafe.Add(ref b, x) = (byte)v[3]; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void TestIntByte(ref int b, int x, Vector256 vin) + { + Vector256 v = vin + vin; + Unsafe.Add(ref b, x) = v[3]; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void TestIntInt(ref int b, int x, Vector256 vin) + { + Vector256 v = vin + vin; + Unsafe.Add(ref b, x) = v[3]; + } +} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_92590/Runtime_92590.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_92590/Runtime_92590.csproj new file mode 100644 index 00000000000000..15edd99711a1a4 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_92590/Runtime_92590.csproj @@ -0,0 +1,8 @@ + + + True + + + + + \ No newline at end of file