From 5e6c6a90b4a1faf2cc13ccb44668df17729a7e17 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 27 Aug 2024 16:43:12 +0100 Subject: [PATCH 01/18] ARM64-SVE: Delay free all ops within conditional select --- src/coreclr/jit/lsraarm64.cpp | 22 ++++---------- .../JitBlue/Runtime_106864/Runtime_106864.cs | 30 +++++++++++++++++++ .../Runtime_106864/Runtime_106864.csproj | 9 ++++++ 3 files changed, 45 insertions(+), 16 deletions(-) create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_106864/Runtime_106864.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_106864/Runtime_106864.csproj diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 5dcf005afd7405..aeb550e91e13b1 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -2040,26 +2040,16 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou buildInternalIntRegisterDefForNode(embOp2Node); } - size_t prefUseOpNum = 1; - if (intrinEmb.id == NI_Sve_CreateBreakPropagateMask) - { - prefUseOpNum = 2; - } - GenTree* prefUseNode = embOp2Node->Op(prefUseOpNum); + // The embedded op may be prefixed by a MOVPFRX instruction. If so, then the embedded + // op can't use the reuse a source register as the destination. Ensure all the inputs + // are marked as delay free. + for (size_t argNum = 1; argNum <= numArgs; argNum++) { - if (argNum == prefUseOpNum) - { - tgtPrefUse = BuildUse(prefUseNode); - srcCount += 1; - } - else - { - srcCount += BuildDelayFreeUses(embOp2Node->Op(argNum), prefUseNode); - } + srcCount += BuildDelayFreeUses(embOp2Node->Op(argNum)); } - srcCount += BuildDelayFreeUses(intrin.op3, prefUseNode); + srcCount += BuildDelayFreeUses(intrin.op3); } } else if (intrin.op2 != nullptr) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106864/Runtime_106864.cs b/src/tests/JIT/Regression/JitBlue/Runtime_106864/Runtime_106864.cs new file mode 100644 index 00000000000000..a0a7c3cb2d3ee3 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_106864/Runtime_106864.cs @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Xunit; +using System; +using System.Numerics; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; + +public class C1 +{ + public Vector F1; +} + +public class Runtime_106864 +{ + public static C1 s_2 = new C1(); + + [Fact] + public static void TestEntryPoint() + { + if (Sve.IsSupported) + { + C1 vr2 = s_2; + var vr3 = vr2.F1; + var vr4 = vr2.F1; + vr2.F1 = Sve.Max(vr3, vr4); + } + } +} \ No newline at end of file diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106864/Runtime_106864.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_106864/Runtime_106864.csproj new file mode 100644 index 00000000000000..1352ebe3277bc7 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_106864/Runtime_106864.csproj @@ -0,0 +1,9 @@ + + + True + $(NoWarn),SYSLIB5003 + + + + + From 23fe687f328562b105450cb03fc4d747713aef24 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 28 Aug 2024 10:04:43 +0100 Subject: [PATCH 02/18] Fix comment --- src/coreclr/jit/lsraarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index aeb550e91e13b1..4ee9e6d800bc89 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -2041,7 +2041,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } // The embedded op may be prefixed by a MOVPFRX instruction. If so, then the embedded - // op can't use the reuse a source register as the destination. Ensure all the inputs + // op can't reuse a source register as the destination. Ensure all the inputs // are marked as delay free. for (size_t argNum = 1; argNum <= numArgs; argNum++) From 62a2ce6e945e9d4cc912f02d032d04342ce3a288 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 28 Aug 2024 10:36:05 +0100 Subject: [PATCH 03/18] Add test header --- .../JitBlue/Runtime_106864/Runtime_106864.cs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106864/Runtime_106864.cs b/src/tests/JIT/Regression/JitBlue/Runtime_106864/Runtime_106864.cs index a0a7c3cb2d3ee3..1faf1dd9a2a273 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_106864/Runtime_106864.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_106864/Runtime_106864.cs @@ -2,6 +2,16 @@ // The .NET Foundation licenses this file to you under the MIT license. using Xunit; + +// Generated by Fuzzlyn v2.3 on 2024-08-23 09:12:06 +// Run on Arm64 Windows +// Seed: 9639718980642677114-vectort,vector64,vector128,armsve +// Reduced from 52.6 KiB to 0.4 KiB in 00:00:26 +// Hits JIT assert in Release: +// Assertion failed 'targetReg != embMaskOp2Reg' in 'Program:Main(Fuzzlyn.ExecutionServer.IRuntime)' during 'Generate code' (IL size 32; hash 0xade6b36b; FullOpts) +// +// File: C:\dev\dotnet\runtime2\src\coreclr\jit\hwintrinsiccodegenarm64.cpp Line: 818 +// using System; using System.Numerics; using System.Runtime.Intrinsics; From a730062ac0494e649aebba54fe534b7b6bcceafa Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 28 Aug 2024 10:40:20 +0100 Subject: [PATCH 04/18] don't delay prefUseOpNum --- src/coreclr/jit/lsraarm64.cpp | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 4ee9e6d800bc89..820dc995b974ff 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -2040,16 +2040,26 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou buildInternalIntRegisterDefForNode(embOp2Node); } - // The embedded op may be prefixed by a MOVPFRX instruction. If so, then the embedded - // op can't reuse a source register as the destination. Ensure all the inputs - // are marked as delay free. - + size_t prefUseOpNum = 1; + if (intrinEmb.id == NI_Sve_CreateBreakPropagateMask) + { + prefUseOpNum = 2; + } + GenTree* prefUseNode = embOp2Node->Op(prefUseOpNum); for (size_t argNum = 1; argNum <= numArgs; argNum++) { - srcCount += BuildDelayFreeUses(embOp2Node->Op(argNum)); + if (argNum == prefUseOpNum) + { + tgtPrefUse = BuildUse(prefUseNode); + srcCount += 1; + } + else + { + srcCount += BuildDelayFreeUses(embOp2Node->Op(argNum)); + } } - srcCount += BuildDelayFreeUses(intrin.op3); + srcCount += BuildDelayFreeUses(intrin.op3, prefUseNode); } } else if (intrin.op2 != nullptr) From 472b40a2bc702dce83a6f6626a7fc6e4834dbe7e Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 28 Aug 2024 14:37:35 +0100 Subject: [PATCH 05/18] Fix FMA --- src/coreclr/jit/lsraarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 820dc995b974ff..2b27b36314e085 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1994,7 +1994,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else if (op == emitOp2 || op == emitOp3) { - srcCount += BuildDelayFreeUses(op, emitOp1); + srcCount += BuildDelayFreeUses(op); } } From 851b66da44ce861d0bb6711953a8194c67d16f5f Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 29 Aug 2024 11:20:05 +0100 Subject: [PATCH 06/18] Add assert checks for delay free --- src/coreclr/jit/lsraarm64.cpp | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 2b27b36314e085..e0761e9d269d76 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1994,7 +1994,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else if (op == emitOp2 || op == emitOp3) { - srcCount += BuildDelayFreeUses(op); + RefPosition* useRefPosition = nullptr; + + srcCount += BuildDelayFreeUses(op, nullptr, RBM_NONE, &useRefPosition); + +#if defined(DEBUG) + // Ensure that if this node and the RMW node refer to the same local variable, then this + // node must be marked as delay free. + if (isCandidateLocalRef(op) && isCandidateLocalRef(emitOp1) && + (getIntervalForLocalVarNode(op->AsLclVar()) == getIntervalForLocalVarNode(emitOp1->AsLclVar()))) + { + assert(useRefPosition->delayRegFree); + } +#endif // defined(DEBUG) } } @@ -2055,7 +2067,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - srcCount += BuildDelayFreeUses(embOp2Node->Op(argNum)); + RefPosition* useRefPosition = nullptr; + + srcCount += BuildDelayFreeUses(embOp2Node->Op(argNum), nullptr, RBM_NONE, &useRefPosition); + +#if defined(DEBUG) + // Ensure that if this node and the RMW node refer to the same local variable, then this + // node must be marked as delay free. + if (isCandidateLocalRef(embOp2Node->Op(argNum)) && isCandidateLocalRef(prefUseNode) && + (getIntervalForLocalVarNode(embOp2Node->Op(argNum)->AsLclVar()) == getIntervalForLocalVarNode(prefUseNode->AsLclVar()))) + { + assert(useRefPosition->delayRegFree); + } +#endif // defined(DEBUG) } } From 7455bc0056066955c178a235eec454e7279b3599 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 29 Aug 2024 11:29:19 +0100 Subject: [PATCH 07/18] Merge embedded op build code --- src/coreclr/jit/lsraarm64.cpp | 74 ++++++++++++----------------------- 1 file changed, 26 insertions(+), 48 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index e0761e9d269d76..cd5fed5e6bfb4e 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1945,6 +1945,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou size_t numArgs = embOp2Node->GetOperandCount(); const HWIntrinsic intrinEmb(embOp2Node); numArgs = embOp2Node->GetOperandCount(); + GenTree* prefUseNode = nullptr; if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmb.id)) { @@ -1984,33 +1985,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // Nothing needs to be done } - GenTree* ops[] = {intrinEmb.op1, intrinEmb.op2, intrinEmb.op3}; - for (GenTree* op : ops) - { - if (op == emitOp1) - { - tgtPrefUse = BuildUse(op); - srcCount++; - } - else if (op == emitOp2 || op == emitOp3) - { - RefPosition* useRefPosition = nullptr; - - srcCount += BuildDelayFreeUses(op, nullptr, RBM_NONE, &useRefPosition); - -#if defined(DEBUG) - // Ensure that if this node and the RMW node refer to the same local variable, then this - // node must be marked as delay free. - if (isCandidateLocalRef(op) && isCandidateLocalRef(emitOp1) && - (getIntervalForLocalVarNode(op->AsLclVar()) == getIntervalForLocalVarNode(emitOp1->AsLclVar()))) - { - assert(useRefPosition->delayRegFree); - } -#endif // defined(DEBUG) - } - } - - srcCount += BuildDelayFreeUses(intrin.op3, emitOp1); + prefUseNode = emitOp1; } else { @@ -2057,34 +2032,37 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { prefUseOpNum = 2; } - GenTree* prefUseNode = embOp2Node->Op(prefUseOpNum); - for (size_t argNum = 1; argNum <= numArgs; argNum++) + prefUseNode = embOp2Node->Op(prefUseOpNum); + } + + for (size_t argNum = 1; argNum <= numArgs; argNum++) + { + GenTree* node = embOp2Node->Op(argNum); + + if (node == prefUseNode) + { + tgtPrefUse = BuildUse(node); + srcCount++; + } + else { - if (argNum == prefUseOpNum) - { - tgtPrefUse = BuildUse(prefUseNode); - srcCount += 1; - } - else - { - RefPosition* useRefPosition = nullptr; + RefPosition* useRefPosition = nullptr; - srcCount += BuildDelayFreeUses(embOp2Node->Op(argNum), nullptr, RBM_NONE, &useRefPosition); + srcCount += BuildDelayFreeUses(node, nullptr, RBM_NONE, &useRefPosition); #if defined(DEBUG) - // Ensure that if this node and the RMW node refer to the same local variable, then this - // node must be marked as delay free. - if (isCandidateLocalRef(embOp2Node->Op(argNum)) && isCandidateLocalRef(prefUseNode) && - (getIntervalForLocalVarNode(embOp2Node->Op(argNum)->AsLclVar()) == getIntervalForLocalVarNode(prefUseNode->AsLclVar()))) - { - assert(useRefPosition->delayRegFree); - } -#endif // defined(DEBUG) + // Ensure that if this node and the RMW node refer to the same local variable, then this + // node must be marked as delay free. + if (isCandidateLocalRef(node) && isCandidateLocalRef(prefUseNode) && + (getIntervalForLocalVarNode(node->AsLclVar()) == getIntervalForLocalVarNode(prefUseNode->AsLclVar()))) + { + assert(useRefPosition->delayRegFree); } +#endif // defined(DEBUG) } - - srcCount += BuildDelayFreeUses(intrin.op3, prefUseNode); } + + srcCount += BuildDelayFreeUses(intrin.op3, prefUseNode); } else if (intrin.op2 != nullptr) { From 06cbe55636de310fff2c6553adfbbb8b6f4ebd4d Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 29 Aug 2024 11:43:22 +0100 Subject: [PATCH 08/18] fix formatting --- src/coreclr/jit/lsraarm64.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index cd5fed5e6bfb4e..748f68663d2da6 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1944,7 +1944,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou GenTreeHWIntrinsic* embOp2Node = intrin.op2->AsHWIntrinsic(); size_t numArgs = embOp2Node->GetOperandCount(); const HWIntrinsic intrinEmb(embOp2Node); - numArgs = embOp2Node->GetOperandCount(); + numArgs = embOp2Node->GetOperandCount(); GenTree* prefUseNode = nullptr; if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmb.id)) @@ -2054,7 +2054,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // Ensure that if this node and the RMW node refer to the same local variable, then this // node must be marked as delay free. if (isCandidateLocalRef(node) && isCandidateLocalRef(prefUseNode) && - (getIntervalForLocalVarNode(node->AsLclVar()) == getIntervalForLocalVarNode(prefUseNode->AsLclVar()))) + (getIntervalForLocalVarNode(node->AsLclVar()) == + getIntervalForLocalVarNode(prefUseNode->AsLclVar()))) { assert(useRefPosition->delayRegFree); } From c99f44f8da202988045cdde0303ac3c052cf9577 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 29 Aug 2024 12:25:47 +0100 Subject: [PATCH 09/18] simplify assert --- src/coreclr/jit/lsraarm64.cpp | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 748f68663d2da6..183f4bccbcabef 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -2048,18 +2048,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { RefPosition* useRefPosition = nullptr; - srcCount += BuildDelayFreeUses(node, nullptr, RBM_NONE, &useRefPosition); - -#if defined(DEBUG) - // Ensure that if this node and the RMW node refer to the same local variable, then this - // node must be marked as delay free. - if (isCandidateLocalRef(node) && isCandidateLocalRef(prefUseNode) && - (getIntervalForLocalVarNode(node->AsLclVar()) == - getIntervalForLocalVarNode(prefUseNode->AsLclVar()))) - { - assert(useRefPosition->delayRegFree); - } -#endif // defined(DEBUG) + int uses = BuildDelayFreeUses(node, nullptr, RBM_NONE, &useRefPosition); + srcCount += uses; + + // It is a hard requirement that these are not allocated to the same register as the destination, + // so verify no optimizations kicked in to skip setting the delay-free. + assert((useRefPosition != nullptr && useRefPosition->delayRegFree) || (uses == 0)); } } From c6b58c38265ef12ddba6ed11ce95c83809893733 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 29 Aug 2024 16:03:16 +0100 Subject: [PATCH 10/18] simplify FMA code --- src/coreclr/jit/lsraarm64.cpp | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 183f4bccbcabef..79bf3c16778df6 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1962,30 +1962,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou unsigned resultOpNum = embOp2Node->GetResultOpNumForRmwIntrinsic(user, intrinEmb.op1, intrinEmb.op2, intrinEmb.op3); - GenTree* emitOp1 = intrinEmb.op1; - GenTree* emitOp2 = intrinEmb.op2; - GenTree* emitOp3 = intrinEmb.op3; - - if (resultOpNum == 2) - { - // op2 = op1 + (op2 * op3) - std::swap(emitOp1, emitOp3); - std::swap(emitOp1, emitOp2); - // op1 = (op1 * op2) + op3 - } - else if (resultOpNum == 3) + if (resultOpNum == 0) { - // op3 = op1 + (op2 * op3) - std::swap(emitOp1, emitOp3); - // op1 = (op1 * op2) + op3 + prefUseNode = embOp2Node->Op(1); } else { - // op1 = op1 + (op2 * op3) - // Nothing needs to be done + assert(resultOpNum >= 1 && resultOpNum <= 3); + prefUseNode = embOp2Node->Op(resultOpNum); } - - prefUseNode = emitOp1; } else { From e21db13f03110f470219dda8e58eb0a788950089 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 30 Aug 2024 09:34:09 +0100 Subject: [PATCH 11/18] Add tests for 106867 --- .../JitBlue/Runtime_106867/Runtime_106867.cs | 51 +++++++++++++++++++ .../Runtime_106867/Runtime_106867.csproj | 9 ++++ .../Runtime_106867/Runtime_106867_1.cs | 39 ++++++++++++++ .../Runtime_106867/Runtime_106867_1.csproj | 9 ++++ 4 files changed, 108 insertions(+) create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867.csproj create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867_1.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867_1.csproj diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867.cs b/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867.cs new file mode 100644 index 00000000000000..051d3f9d027612 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867.cs @@ -0,0 +1,51 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Xunit; + +// Generated by Fuzzlyn v2.3 on 2024-08-23 10:10:06 +// Run on Arm64 Windows +// Seed: 13584223539078280353-vectort,vector64,vector128,armsve +// Reduced from 87.4 KiB to 0.8 KiB in 00:00:52 +// Hits JIT assert in Release: +// Assertion failed 'secondId->idReg1() != secondId->idReg4()' in 'S0:M6(ubyte,double):this' during 'Emit code' (IL size 81; hash 0x596acd7c; FullOpts) +// +// File: C:\dev\dotnet\runtime2\src\coreclr\jit\emitarm64sve.cpp Line: 18601 +// +using System; +using System.Numerics; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; + +public struct S0 +{ + public void M6(byte arg0, double arg1) + { + var vr0 = Vector128.CreateScalar(119.12962f).AsVector(); + var vr3 = Runtime_106867.s_2; + var vr4 = Vector128.CreateScalar(1f).AsVector(); + var vr5 = Runtime_106867.s_2; + var vr2 = Sve.FusedMultiplySubtractNegated(vr3, vr4, vr5); + if ((Sve.ConditionalExtractLastActiveElement(vr0, 0, vr2) < 0)) + { + this = this; + } + } +} + +public class Runtime_106867 +{ + public static Vector s_2; + public static double[] s_5 = new double[] + { + 0 + }; + public static byte s_16; + + [Fact] + public static void TestEntryPoint() + { + var vr6 = s_5[0]; + new S0().M6(s_16, vr6); + } +} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867.csproj new file mode 100644 index 00000000000000..1352ebe3277bc7 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867.csproj @@ -0,0 +1,9 @@ + + + True + $(NoWarn),SYSLIB5003 + + + + + diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867_1.cs b/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867_1.cs new file mode 100644 index 00000000000000..238d39196b2760 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867_1.cs @@ -0,0 +1,39 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Xunit; +using System.Runtime.CompilerServices; + +// Generated by Fuzzlyn v2.3 on 2024-08-23 10:12:51 +// Run on Arm64 Windows +// Seed: 4576767951799510057-vectort,vector64,vector128,armsve +// Reduced from 32.2 KiB to 0.5 KiB in 00:00:25 +// Hits JIT assert in Release: +// Assertion failed 'secondId->idReg1() != secondId->idReg3()' in 'Program:Main(Fuzzlyn.ExecutionServer.IRuntime)' during 'Emit code' (IL size 55; hash 0xade6b36b; FullOpts) +// +// File: C:\dev\dotnet\runtime2\src\coreclr\jit\emitarm64sve.cpp Line: 18600 +// +using System; +using System.Numerics; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; + +public class Runtime_106866_2 +{ + [Fact] + public static void TestEntryPoint() + { + Vector vr4 = default(Vector); + vr4 = Sve.MultiplyAdd(vr4, vr4, vr4); + var vr5 = (short)0; + var vr6 = Vector128.CreateScalar(vr5).AsVector(); + var vr7 = (short)0; + var vr8 = Sve.ConditionalExtractLastActiveElement(vr6, vr7, vr4); + Consume(vr8); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void Consume(T val) + { + } +} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867_1.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867_1.csproj new file mode 100644 index 00000000000000..1352ebe3277bc7 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867_1.csproj @@ -0,0 +1,9 @@ + + + True + $(NoWarn),SYSLIB5003 + + + + + From c0e460331d68d1af78df847800c2380ec23b2988 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 30 Aug 2024 11:31:28 +0100 Subject: [PATCH 12/18] ARM64-SVE: Allow op inside conditionselect to be non HWintrinsic TEST_IMG: ubuntu/dotnet-build TEST_CMD: safe ./projects/dotnet/build-runtime.sh Jira: ENTLLT-7634 Change-Id: I337a291be6661f104fe90c7cdc27150eede43647 --- src/coreclr/jit/lowerarmarch.cpp | 66 ++++++++++--------- .../JitBlue/Runtime_106869/Runtime_106869.cs | 63 ++++++++++++++++++ .../Runtime_106869/Runtime_106869.csproj | 9 +++ 3 files changed, 106 insertions(+), 32 deletions(-) create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.csproj diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index e0bf56e0eeb3fc..34440ce792b84a 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -4062,46 +4062,48 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode) GenTree* nestedOp1 = nestedCndSel->Op(1); GenTree* nestedOp2 = nestedCndSel->Op(2); assert(varTypeIsMask(nestedOp1)); - assert(nestedOp2->OperIsHWIntrinsic()); - NamedIntrinsic nestedOp2Id = nestedOp2->AsHWIntrinsic()->GetHWIntrinsicId(); - - // If the nested op uses Pg/Z, then inactive lanes will result in zeros, so can only transform if - // op3 is all zeros. - - if (nestedOp1->IsMaskAllBitsSet() && - (!HWIntrinsicInfo::IsZeroingMaskedOperation(nestedOp2Id) || op3->IsVectorZero())) + if (nestedOp2->OperIsHWIntrinsic()) { - GenTree* nestedOp2 = nestedCndSel->Op(2); - GenTree* nestedOp3 = nestedCndSel->Op(3); + NamedIntrinsic nestedOp2Id = nestedOp2->AsHWIntrinsic()->GetHWIntrinsicId(); - JITDUMP("lowering nested ConditionalSelect HWIntrinisic (before):\n"); - DISPTREERANGE(BlockRange(), cndSelNode); - JITDUMP("\n"); + // If the nested op uses Pg/Z, then inactive lanes will result in zeros, so can only transform if + // op3 is all zeros. - // Transform: - // - // CndSel(mask, CndSel(AllTrue, embeddedMask(trueValOp2), trueValOp3), op3) to - // CndSel(mask, embedded(trueValOp2), op3) - // - cndSelNode->Op(2) = nestedCndSel->Op(2); - if (nestedOp3->IsMaskZero()) - { - BlockRange().Remove(nestedOp3); - } - else + if (nestedOp1->IsMaskAllBitsSet() && + (!HWIntrinsicInfo::IsZeroingMaskedOperation(nestedOp2Id) || op3->IsVectorZero())) { - nestedOp3->SetUnusedValue(); - } + GenTree* nestedOp2 = nestedCndSel->Op(2); + GenTree* nestedOp3 = nestedCndSel->Op(3); + + JITDUMP("lowering nested ConditionalSelect HWIntrinisic (before):\n"); + DISPTREERANGE(BlockRange(), cndSelNode); + JITDUMP("\n"); + + // Transform: + // + // CndSel(mask, CndSel(AllTrue, embeddedMask(trueValOp2), trueValOp3), op3) to + // CndSel(mask, embedded(trueValOp2), op3) + // + cndSelNode->Op(2) = nestedCndSel->Op(2); + if (nestedOp3->IsMaskZero()) + { + BlockRange().Remove(nestedOp3); + } + else + { + nestedOp3->SetUnusedValue(); + } - BlockRange().Remove(nestedOp1); - BlockRange().Remove(nestedCndSel); + BlockRange().Remove(nestedOp1); + BlockRange().Remove(nestedCndSel); - JITDUMP("lowering nested ConditionalSelect HWIntrinisic (after):\n"); - DISPTREERANGE(BlockRange(), cndSelNode); - JITDUMP("\n"); + JITDUMP("lowering nested ConditionalSelect HWIntrinisic (after):\n"); + DISPTREERANGE(BlockRange(), cndSelNode); + JITDUMP("\n"); - return cndSelNode; + return cndSelNode; + } } } else if (op1->IsMaskAllBitsSet()) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.cs b/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.cs new file mode 100644 index 00000000000000..006d14a1dde9e9 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.cs @@ -0,0 +1,63 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Xunit; +using System.Runtime.CompilerServices; + +// Generated by Fuzzlyn v2.3 on 2024-08-23 10:25:51 +// Run on Arm64 Windows +// Seed: 13938901376337307772-vectort,vector64,vector128,armsve +// Reduced from 210.5 KiB to 1.1 KiB in 00:02:19 +// Hits JIT assert in Release: +// Assertion failed 'nestedOp2->OperIsHWIntrinsic()' in 'Program:Main(Fuzzlyn.ExecutionServer.IRuntime)' during 'Lowering nodeinfo' (IL size 119; hash 0xade6b36b; FullOpts) +// +// File: C:\dev\dotnet\runtime2\src\coreclr\jit\lowerarmarch.cpp Line: 4062 +// +using System; +using System.Numerics; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; + +public struct S0 +{ + public ulong F5; +} + +public class C0 +{ + public int F1; +} + +public class Runtime_1068867 +{ + public static S0 s_7; + public static byte s_14; + + [Fact] + public static void TestEntryPoint() + { + var vr12 = new C0(); + var vr14 = vr12.F1; + var vr15 = Vector128.CreateScalar(vr14).AsVector(); + var vr16 = Vector128.CreateScalar(0).AsVector(); + var vr17 = Vector128.CreateScalar(0).AsVector(); + var vr18 = Vector128.CreateScalar(0).AsVector(); + var vr19 = Vector128.CreateScalar(1).AsVector(); + var vr20 = Sve.ConditionalSelect(vr17, vr18, vr19); + var vr21 = Vector128.CreateScalar(0).AsVector(); + var vr22 = Sve.ConditionalSelect(vr16, vr20, vr21); + // var vr23 = (uint)Sve.GetActiveElementCount(vr15, vr22); + // M17(s_7, vr20 + Consume(vr22); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void Consume(T val) + { + } + // public static byte M17(S0 argThis, uint arg0) + // { + // var vr0 = argThis.F5; + // return (byte)(4294967295U | (sbyte)Sve.SaturatingDecrementByActiveElementCount(vr0, Vector.Create(s_14))); + // } +} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.csproj new file mode 100644 index 00000000000000..1352ebe3277bc7 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.csproj @@ -0,0 +1,9 @@ + + + True + $(NoWarn),SYSLIB5003 + + + + + From 64a671d455dbc4396b805f159e8e1eb409401095 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 4 Sep 2024 12:14:09 +0100 Subject: [PATCH 13/18] Add Sve.IsSupported to tests --- .../JitBlue/Runtime_106867/Runtime_106867.cs | 7 +++++-- .../JitBlue/Runtime_106867/Runtime_106867_1.cs | 17 ++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867.cs b/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867.cs index 051d3f9d027612..1d106ce17aa3bd 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867.cs @@ -45,7 +45,10 @@ public class Runtime_106867 [Fact] public static void TestEntryPoint() { - var vr6 = s_5[0]; - new S0().M6(s_16, vr6); + if (Sve.IsSupported) + { + var vr6 = s_5[0]; + new S0().M6(s_16, vr6); + } } } diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867_1.cs b/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867_1.cs index 238d39196b2760..e5e05cc6553bcc 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867_1.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_106867/Runtime_106867_1.cs @@ -23,13 +23,16 @@ public class Runtime_106866_2 [Fact] public static void TestEntryPoint() { - Vector vr4 = default(Vector); - vr4 = Sve.MultiplyAdd(vr4, vr4, vr4); - var vr5 = (short)0; - var vr6 = Vector128.CreateScalar(vr5).AsVector(); - var vr7 = (short)0; - var vr8 = Sve.ConditionalExtractLastActiveElement(vr6, vr7, vr4); - Consume(vr8); + if (Sve.IsSupported) + { + Vector vr4 = default(Vector); + vr4 = Sve.MultiplyAdd(vr4, vr4, vr4); + var vr5 = (short)0; + var vr6 = Vector128.CreateScalar(vr5).AsVector(); + var vr7 = (short)0; + var vr8 = Sve.ConditionalExtractLastActiveElement(vr6, vr7, vr4); + Consume(vr8); + } } [MethodImpl(MethodImplOptions.NoInlining)] From 3e6f109a3a936ec6661c7f80a69eb71dc4af824f Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 4 Sep 2024 12:29:55 +0100 Subject: [PATCH 14/18] Add Sve.IsSupported to test --- .../JitBlue/Runtime_106869/Runtime_106869.cs | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.cs b/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.cs index 006d14a1dde9e9..b4b9856c795950 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.cs @@ -36,28 +36,24 @@ public class Runtime_1068867 [Fact] public static void TestEntryPoint() { - var vr12 = new C0(); - var vr14 = vr12.F1; - var vr15 = Vector128.CreateScalar(vr14).AsVector(); - var vr16 = Vector128.CreateScalar(0).AsVector(); - var vr17 = Vector128.CreateScalar(0).AsVector(); - var vr18 = Vector128.CreateScalar(0).AsVector(); - var vr19 = Vector128.CreateScalar(1).AsVector(); - var vr20 = Sve.ConditionalSelect(vr17, vr18, vr19); - var vr21 = Vector128.CreateScalar(0).AsVector(); - var vr22 = Sve.ConditionalSelect(vr16, vr20, vr21); - // var vr23 = (uint)Sve.GetActiveElementCount(vr15, vr22); - // M17(s_7, vr20 - Consume(vr22); + if (Sve.IsSupported) + { + var vr12 = new C0(); + var vr14 = vr12.F1; + var vr15 = Vector128.CreateScalar(vr14).AsVector(); + var vr16 = Vector128.CreateScalar(0).AsVector(); + var vr17 = Vector128.CreateScalar(0).AsVector(); + var vr18 = Vector128.CreateScalar(0).AsVector(); + var vr19 = Vector128.CreateScalar(1).AsVector(); + var vr20 = Sve.ConditionalSelect(vr17, vr18, vr19); + var vr21 = Vector128.CreateScalar(0).AsVector(); + var vr22 = Sve.ConditionalSelect(vr16, vr20, vr21); + Consume(vr22); + } } [MethodImpl(MethodImplOptions.NoInlining)] static void Consume(T val) { } - // public static byte M17(S0 argThis, uint arg0) - // { - // var vr0 = argThis.F5; - // return (byte)(4294967295U | (sbyte)Sve.SaturatingDecrementByActiveElementCount(vr0, Vector.Create(s_14))); - // } } From d27ebe24ef914bbe15e5546a6380014938272616 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 4 Sep 2024 15:19:54 +0100 Subject: [PATCH 15/18] fix formatting --- src/coreclr/jit/lowerarmarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 7625dbabbd1119..6b96564af8668b 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -4056,8 +4056,8 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode) NamedIntrinsic nestedOp2Id = nestedOp2->AsHWIntrinsic()->GetHWIntrinsicId(); // If the nested op uses Pg/Z, then inactive lanes will result in zeros, so can only transform if - // op3 is all zeros. Such a Csel operation is absorbed into the instruction when emitted. Skip this optimisation - // when the nestedOp is a reduce operation. + // op3 is all zeros. Such a Csel operation is absorbed into the instruction when emitted. Skip this + // optimisation when the nestedOp is a reduce operation. if (nestedOp1->IsMaskAllBitsSet() && !HWIntrinsicInfo::IsReduceOperation(nestedOp2Id) && (!HWIntrinsicInfo::IsZeroingMaskedOperation(nestedOp2Id) || op3->IsVectorZero())) From fe6080fa91e8259b8e189adb27f100e175dcc1e2 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 4 Sep 2024 15:23:26 +0100 Subject: [PATCH 16/18] Revert "ARM64-SVE: Allow op inside conditionselect to be non HWintrinsic" --- src/coreclr/jit/lowerarmarch.cpp | 67 +++++++++---------- .../JitBlue/Runtime_106869/Runtime_106869.cs | 59 ---------------- .../Runtime_106869/Runtime_106869.csproj | 9 --- 3 files changed, 32 insertions(+), 103 deletions(-) delete mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.cs delete mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.csproj diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 6b96564af8668b..978c066be998a5 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -4050,49 +4050,46 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode) GenTree* nestedOp1 = nestedCndSel->Op(1); GenTree* nestedOp2 = nestedCndSel->Op(2); assert(varTypeIsMask(nestedOp1)); + assert(nestedOp2->OperIsHWIntrinsic()); - if (nestedOp2->OperIsHWIntrinsic()) + NamedIntrinsic nestedOp2Id = nestedOp2->AsHWIntrinsic()->GetHWIntrinsicId(); + + // If the nested op uses Pg/Z, then inactive lanes will result in zeros, so can only transform if + // op3 is all zeros. + + if (nestedOp1->IsMaskAllBitsSet() && + (!HWIntrinsicInfo::IsZeroingMaskedOperation(nestedOp2Id) || op3->IsVectorZero())) { - NamedIntrinsic nestedOp2Id = nestedOp2->AsHWIntrinsic()->GetHWIntrinsicId(); + GenTree* nestedOp2 = nestedCndSel->Op(2); + GenTree* nestedOp3 = nestedCndSel->Op(3); - // If the nested op uses Pg/Z, then inactive lanes will result in zeros, so can only transform if - // op3 is all zeros. Such a Csel operation is absorbed into the instruction when emitted. Skip this - // optimisation when the nestedOp is a reduce operation. + JITDUMP("lowering nested ConditionalSelect HWIntrinisic (before):\n"); + DISPTREERANGE(BlockRange(), cndSelNode); + JITDUMP("\n"); - if (nestedOp1->IsMaskAllBitsSet() && !HWIntrinsicInfo::IsReduceOperation(nestedOp2Id) && - (!HWIntrinsicInfo::IsZeroingMaskedOperation(nestedOp2Id) || op3->IsVectorZero())) + // Transform: + // + // CndSel(mask, CndSel(AllTrue, embeddedMask(trueValOp2), trueValOp3), op3) to + // CndSel(mask, embedded(trueValOp2), op3) + // + cndSelNode->Op(2) = nestedCndSel->Op(2); + if (nestedOp3->IsMaskZero()) { - GenTree* nestedOp2 = nestedCndSel->Op(2); - GenTree* nestedOp3 = nestedCndSel->Op(3); - - JITDUMP("lowering nested ConditionalSelect HWIntrinisic (before):\n"); - DISPTREERANGE(BlockRange(), cndSelNode); - JITDUMP("\n"); - - // Transform: - // - // CndSel(mask, CndSel(AllTrue, embeddedMask(trueValOp2), trueValOp3), op3) to - // CndSel(mask, embedded(trueValOp2), op3) - // - cndSelNode->Op(2) = nestedCndSel->Op(2); - if (nestedOp3->IsMaskZero()) - { - BlockRange().Remove(nestedOp3); - } - else - { - nestedOp3->SetUnusedValue(); - } + BlockRange().Remove(nestedOp3); + } + else + { + nestedOp3->SetUnusedValue(); + } - BlockRange().Remove(nestedOp1); - BlockRange().Remove(nestedCndSel); + BlockRange().Remove(nestedOp1); + BlockRange().Remove(nestedCndSel); - JITDUMP("lowering nested ConditionalSelect HWIntrinisic (after):\n"); - DISPTREERANGE(BlockRange(), cndSelNode); - JITDUMP("\n"); + JITDUMP("lowering nested ConditionalSelect HWIntrinisic (after):\n"); + DISPTREERANGE(BlockRange(), cndSelNode); + JITDUMP("\n"); - return cndSelNode; - } + return cndSelNode; } } else if (op1->IsMaskAllBitsSet()) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.cs b/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.cs deleted file mode 100644 index b4b9856c795950..00000000000000 --- a/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.cs +++ /dev/null @@ -1,59 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using Xunit; -using System.Runtime.CompilerServices; - -// Generated by Fuzzlyn v2.3 on 2024-08-23 10:25:51 -// Run on Arm64 Windows -// Seed: 13938901376337307772-vectort,vector64,vector128,armsve -// Reduced from 210.5 KiB to 1.1 KiB in 00:02:19 -// Hits JIT assert in Release: -// Assertion failed 'nestedOp2->OperIsHWIntrinsic()' in 'Program:Main(Fuzzlyn.ExecutionServer.IRuntime)' during 'Lowering nodeinfo' (IL size 119; hash 0xade6b36b; FullOpts) -// -// File: C:\dev\dotnet\runtime2\src\coreclr\jit\lowerarmarch.cpp Line: 4062 -// -using System; -using System.Numerics; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; - -public struct S0 -{ - public ulong F5; -} - -public class C0 -{ - public int F1; -} - -public class Runtime_1068867 -{ - public static S0 s_7; - public static byte s_14; - - [Fact] - public static void TestEntryPoint() - { - if (Sve.IsSupported) - { - var vr12 = new C0(); - var vr14 = vr12.F1; - var vr15 = Vector128.CreateScalar(vr14).AsVector(); - var vr16 = Vector128.CreateScalar(0).AsVector(); - var vr17 = Vector128.CreateScalar(0).AsVector(); - var vr18 = Vector128.CreateScalar(0).AsVector(); - var vr19 = Vector128.CreateScalar(1).AsVector(); - var vr20 = Sve.ConditionalSelect(vr17, vr18, vr19); - var vr21 = Vector128.CreateScalar(0).AsVector(); - var vr22 = Sve.ConditionalSelect(vr16, vr20, vr21); - Consume(vr22); - } - } - - [MethodImpl(MethodImplOptions.NoInlining)] - static void Consume(T val) - { - } -} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.csproj deleted file mode 100644 index 1352ebe3277bc7..00000000000000 --- a/src/tests/JIT/Regression/JitBlue/Runtime_106869/Runtime_106869.csproj +++ /dev/null @@ -1,9 +0,0 @@ - - - True - $(NoWarn),SYSLIB5003 - - - - - From 97e528b35aae0ed7ee71b743e3bb162b6b6fb22f Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 4 Sep 2024 15:25:27 +0100 Subject: [PATCH 17/18] Revert "ARM64-SVE: Allow op inside conditionselect to be non HWintrinsic" --- src/coreclr/jit/lowerarmarch.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 978c066be998a5..6c430cafbeb59d 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -4054,10 +4054,10 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode) NamedIntrinsic nestedOp2Id = nestedOp2->AsHWIntrinsic()->GetHWIntrinsicId(); - // If the nested op uses Pg/Z, then inactive lanes will result in zeros, so can only transform if - // op3 is all zeros. + // op3 is all zeros. Such a Csel operation is absorbed into the instruction when emitted. Skip this optimisation + // when the nestedOp is a reduce operation. - if (nestedOp1->IsMaskAllBitsSet() && + if (nestedOp1->IsMaskAllBitsSet() && !HWIntrinsicInfo::IsReduceOperation(nestedOp2Id) && (!HWIntrinsicInfo::IsZeroingMaskedOperation(nestedOp2Id) || op3->IsVectorZero())) { GenTree* nestedOp2 = nestedCndSel->Op(2); From 1044604b11dddf91b53b922046aa0cf6d0621309 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 4 Sep 2024 15:25:58 +0100 Subject: [PATCH 18/18] Revert "ARM64-SVE: Allow op inside conditionselect to be non HWintrinsic" --- src/coreclr/jit/lowerarmarch.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 6c430cafbeb59d..a74bb3651c88f9 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -4054,6 +4054,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode) NamedIntrinsic nestedOp2Id = nestedOp2->AsHWIntrinsic()->GetHWIntrinsicId(); + // If the nested op uses Pg/Z, then inactive lanes will result in zeros, so can only transform if // op3 is all zeros. Such a Csel operation is absorbed into the instruction when emitted. Skip this optimisation // when the nestedOp is a reduce operation.