From 5513cad8d76674a7ad23a70f3d82ccad765a02a7 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 10 Jul 2024 18:43:23 -0700 Subject: [PATCH 01/11] Added Sve.CreateBreakPropagateMask --- src/coreclr/jit/hwintrinsic.cpp | 47 ++++++++++-------- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 35 ++++++++++--- src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 + src/coreclr/jit/lsraarm64.cpp | 25 +++++++--- .../Arm/Sve.PlatformNotSupported.cs | 49 +++++++++++++++++++ .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 49 +++++++++++++++++++ .../ref/System.Runtime.Intrinsics.cs | 9 ++++ .../GenerateHWIntrinsicTests_Arm.cs | 11 ++++- .../HardwareIntrinsics/Arm/Shared/Helpers.cs | 20 ++++++++ 9 files changed, 211 insertions(+), 35 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 4f016940840b2c..3d46e2f585d809 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1904,10 +1904,16 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } #if defined(TARGET_ARM64) + auto convertToMaskIfPossible = [&](GenTree*& op) { + if (!varTypeIsMask(op)) + { + op = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op, simdBaseJitType, simdSize); + } + }; + if (HWIntrinsicInfo::IsExplicitMaskedOperation(intrinsic)) { assert(numArgs > 0); - GenTree* op1 = retNode->AsHWIntrinsic()->Op(1); switch (intrinsic) { @@ -1922,14 +1928,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, case NI_Sve_TestFirstTrue: case NI_Sve_TestLastTrue: { - GenTree* op2 = retNode->AsHWIntrinsic()->Op(2); - // HWInstrinsic requires a mask for op2 - if (!varTypeIsMask(op2)) - { - retNode->AsHWIntrinsic()->Op(2) = - gtNewSimdCvtVectorToMaskNode(TYP_MASK, op2, simdBaseJitType, simdSize); - } + convertToMaskIfPossible(retNode->AsHWIntrinsic()->Op(2)); break; } @@ -1942,14 +1942,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, case NI_Sve_CreateBreakAfterPropagateMask: case NI_Sve_CreateBreakBeforePropagateMask: { - GenTree* op3 = retNode->AsHWIntrinsic()->Op(3); - // HWInstrinsic requires a mask for op3 - if (!varTypeIsMask(op3)) - { - retNode->AsHWIntrinsic()->Op(3) = - gtNewSimdCvtVectorToMaskNode(TYP_MASK, op3, simdBaseJitType, simdSize); - } + convertToMaskIfPossible(retNode->AsHWIntrinsic()->Op(3)); break; } @@ -1957,11 +1951,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, break; } - if (!varTypeIsMask(op1)) - { - // Op1 input is a vector. HWInstrinsic requires a mask. - retNode->AsHWIntrinsic()->Op(1) = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseJitType, simdSize); - } + // HWInstrinsic requires a mask for op1 + convertToMaskIfPossible(retNode->AsHWIntrinsic()->Op(1)); if (HWIntrinsicInfo::IsMultiReg(intrinsic)) { @@ -1972,6 +1963,22 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } } + if (HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinsic)) + { + switch (intrinsic) + { + case NI_Sve_CreateBreakPropagateMask: + { + convertToMaskIfPossible(retNode->AsHWIntrinsic()->Op(1)); + convertToMaskIfPossible(retNode->AsHWIntrinsic()->Op(2)); + break; + } + + default: + break; + } + } + if (retType != nodeRetType) { // HWInstrinsic returns a mask, but all returns must be vectors, so convert mask to vector. diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 2fa052fcbdcadc..e0a2c939d95463 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -598,6 +598,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) insScalableOpts sopt = INS_SCALABLE_OPTS_NONE; bool hasShift = false; + insOpts embOpt = opt; switch (intrinEmbMask.id) { case NI_Sve_ShiftLeftLogical: @@ -617,6 +618,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) hasShift = true; break; + case NI_Sve_CreateBreakPropagateMask: + embOpt = INS_OPTS_SCALABLE_B; + break; + default: break; } @@ -627,13 +632,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic()); for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) { - GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, reg1, reg2, helper.ImmValue(), opt, + GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, reg1, reg2, helper.ImmValue(), embOpt, sopt); } } else { - GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, reg1, reg2, reg3, opt, sopt); + GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, reg1, reg2, reg3, embOpt, sopt); } }; @@ -642,12 +647,28 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the // destination using /Z. - assert(targetReg != embMaskOp2Reg); - GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, opt); + switch (intrinEmbMask.id) + { + case NI_Sve_CreateBreakPropagateMask: + if (targetReg != embMaskOp2Reg) + { + assert(targetReg != embMaskOp1Reg); + GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp2Reg, + /* canSkip */ true); + } + emitInsHelper(targetReg, maskReg, embMaskOp1Reg); + break; - // Finally, perform the actual "predicated" operation so that `targetReg` is the first operand - // and `embMaskOp2Reg` is the second operand. - emitInsHelper(targetReg, maskReg, embMaskOp2Reg); + default: + assert(targetReg != embMaskOp2Reg); + GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, + embMaskOp1Reg, opt); + + // Finally, perform the actual "predicated" operation so that `targetReg` is the first + // operand and `embMaskOp2Reg` is the second operand. + emitInsHelper(targetReg, maskReg, embMaskOp2Reg); + break; + } } else if (targetReg != falseReg) { diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 0dcee81c83e4d7..41b6261a5b5616 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -58,6 +58,7 @@ HARDWARE_INTRINSIC(Sve, CreateBreakAfterMask, HARDWARE_INTRINSIC(Sve, CreateBreakAfterPropagateMask, -1, 3, true, {INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_sve_brkpa, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, CreateBreakBeforeMask, -1, 2, true, {INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, CreateBreakBeforePropagateMask, -1, 3, true, {INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, CreateBreakPropagateMask, -1, -1, false, {INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, CreateFalseMaskByte, -1, 0, false, {INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateFalseMaskDouble, -1, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt16, -1, 0, false, {INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index d1fb48fc16d939..4e2da68fffc71a 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1933,15 +1933,26 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } } - tgtPrefUse = BuildUse(embOp2Node->Op(1)); - srcCount += 1; - - for (size_t argNum = 2; argNum <= numArgs; argNum++) + switch (intrinEmb.id) { - srcCount += BuildDelayFreeUses(embOp2Node->Op(argNum), embOp2Node->Op(1)); - } + case NI_Sve_CreateBreakPropagateMask: + tgtPrefUse = BuildUse(embOp2Node->Op(2)); + srcCount += 1; + srcCount += BuildDelayFreeUses(embOp2Node->Op(1), embOp2Node->Op(2)); + srcCount += BuildDelayFreeUses(intrin.op3, embOp2Node->Op(2)); + break; + + default: + tgtPrefUse = BuildUse(embOp2Node->Op(1)); + srcCount += 1; + for (size_t argNum = 2; argNum <= numArgs; argNum++) + { + srcCount += BuildDelayFreeUses(embOp2Node->Op(argNum), embOp2Node->Op(1)); + } - srcCount += BuildDelayFreeUses(intrin.op3, embOp2Node->Op(1)); + srcCount += BuildDelayFreeUses(intrin.op3, embOp2Node->Op(1)); + break; + } } } else if (intrin.op2 != nullptr) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index 079541962cc9f1..29339019e8e40e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -2157,6 +2157,55 @@ internal Arm64() { } public static unsafe Vector CreateBreakBeforePropagateMask(Vector mask, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) { throw new PlatformNotSupportedException(); } + + /// Set all predicate elements to false /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index ae978c4362f24d..6315791a0e925f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -2216,6 +2216,55 @@ internal Arm64() { } public static unsafe Vector CreateBreakBeforePropagateMask(Vector mask, Vector left, Vector right) => CreateBreakBeforePropagateMask(mask, left, right); + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) => CreateBreakPropagateMask(totalMask, fromMask); + + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) => CreateBreakPropagateMask(totalMask, fromMask); + + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) => CreateBreakPropagateMask(totalMask, fromMask); + + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) => CreateBreakPropagateMask(totalMask, fromMask); + + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) => CreateBreakPropagateMask(totalMask, fromMask); + + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) => CreateBreakPropagateMask(totalMask, fromMask); + + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) => CreateBreakPropagateMask(totalMask, fromMask); + + /// + /// svbool_t svbrkn[_b]_z(svbool_t pg, svbool_t op1, svbool_t op2) + /// BRKN Ptied2.B, Pg/Z, Pop1.B, Ptied2.B + /// + public static unsafe Vector CreateBreakPropagateMask(Vector totalMask, Vector fromMask) => CreateBreakPropagateMask(totalMask, fromMask); + + /// Set all predicate elements to false /// diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 3df9a05b405ba2..d45ad10f20ef14 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4531,6 +4531,15 @@ internal Arm64() { } public static System.Numerics.Vector CreateBreakBeforePropagateMask(System.Numerics.Vector mask, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector CreateBreakBeforePropagateMask(System.Numerics.Vector mask, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static unsafe System.Numerics.Vector CreateBreakPropagateMask(System.Numerics.Vector totalMask, System.Numerics.Vector fromMask) { throw null; } + public static unsafe System.Numerics.Vector CreateBreakPropagateMask(System.Numerics.Vector totalMask, System.Numerics.Vector fromMask) { throw null; } + public static unsafe System.Numerics.Vector CreateBreakPropagateMask(System.Numerics.Vector totalMask, System.Numerics.Vector fromMask) { throw null; } + public static unsafe System.Numerics.Vector CreateBreakPropagateMask(System.Numerics.Vector totalMask, System.Numerics.Vector fromMask) { throw null; } + public static unsafe System.Numerics.Vector CreateBreakPropagateMask(System.Numerics.Vector totalMask, System.Numerics.Vector fromMask) { throw null; } + public static unsafe System.Numerics.Vector CreateBreakPropagateMask(System.Numerics.Vector totalMask, System.Numerics.Vector fromMask) { throw null; } + public static unsafe System.Numerics.Vector CreateBreakPropagateMask(System.Numerics.Vector totalMask, System.Numerics.Vector fromMask) { throw null; } + public static unsafe System.Numerics.Vector CreateBreakPropagateMask(System.Numerics.Vector totalMask, System.Numerics.Vector fromMask) { throw null; } + public static System.Numerics.Vector CreateFalseMaskByte() { throw null; } public static System.Numerics.Vector CreateFalseMaskDouble() { throw null; } public static System.Numerics.Vector CreateFalseMaskInt16() { throw null; } diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index 7455aebc0ad324..15dd60f450965a 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -3310,7 +3310,16 @@ ("SveVecTernOpVecTest.template", new Dictionary { ["TestName"] = "Sve_CreateBreakBeforePropagateMask_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateBreakBeforePropagateMask", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2BaseType"] = "Int16", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "Helpers.getMaskInt16()", ["NextValueOp2"] = "Helpers.getMaskInt16()", ["NextValueOp3"] = "Helpers.getMaskInt16()", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.CreateBreakBeforePropagateMask(firstOp, secondOp, thirdOp))", ["GetVectorResult"] = "Helpers.CreateBreakBeforePropagateMask(first, second, third)"}), ("SveVecTernOpVecTest.template", new Dictionary { ["TestName"] = "Sve_CreateBreakBeforePropagateMask_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateBreakBeforePropagateMask", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2BaseType"] = "Int32", ["Op3BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "Helpers.getMaskInt32()", ["NextValueOp2"] = "Helpers.getMaskInt32()", ["NextValueOp3"] = "Helpers.getMaskInt32()", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.CreateBreakBeforePropagateMask(firstOp, secondOp, thirdOp))", ["GetVectorResult"] = "Helpers.CreateBreakBeforePropagateMask(first, second, third)"}), ("SveVecTernOpVecTest.template", new Dictionary { ["TestName"] = "Sve_CreateBreakBeforePropagateMask_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateBreakBeforePropagateMask", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["Op2BaseType"] = "Int64", ["Op3BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "Helpers.getMaskInt64()", ["NextValueOp2"] = "Helpers.getMaskInt64()", ["NextValueOp3"] = "Helpers.getMaskInt64()", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.CreateBreakBeforePropagateMask(firstOp, secondOp, thirdOp))", ["GetVectorResult"] = "Helpers.CreateBreakBeforePropagateMask(first, second, third)"}), - + + ("SveVecBinOpVecTest.template", new Dictionary { ["TestName"] = "Sve_CreateBreakPropagateMask_byte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateBreakPropagateMask", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "Helpers.getMaskByte()", ["NextValueOp2"] = "Helpers.getMaskByte()", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.CreateBreakPropagateMask(left, right))", ["GetVectorResult"] = "Helpers.CreateBreakPropagateMask(left, right)"}), + ("SveVecBinOpVecTest.template", new Dictionary { ["TestName"] = "Sve_CreateBreakPropagateMask_ushort", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateBreakPropagateMask", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "Helpers.getMaskUInt16()", ["NextValueOp2"] = "Helpers.getMaskUInt16()", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.CreateBreakPropagateMask(left, right))", ["GetVectorResult"] = "Helpers.CreateBreakPropagateMask(left, right)"}), + ("SveVecBinOpVecTest.template", new Dictionary { ["TestName"] = "Sve_CreateBreakPropagateMask_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateBreakPropagateMask", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "Helpers.getMaskUInt32()", ["NextValueOp2"] = "Helpers.getMaskUInt32()", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.CreateBreakPropagateMask(left, right))", ["GetVectorResult"] = "Helpers.CreateBreakPropagateMask(left, right)"}), + ("SveVecBinOpVecTest.template", new Dictionary { ["TestName"] = "Sve_CreateBreakPropagateMask_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateBreakPropagateMask", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "Helpers.getMaskUInt64()", ["NextValueOp2"] = "Helpers.getMaskUInt64()", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.CreateBreakPropagateMask(left, right))", ["GetVectorResult"] = "Helpers.CreateBreakPropagateMask(left, right)"}), + ("SveVecBinOpVecTest.template", new Dictionary { ["TestName"] = "Sve_CreateBreakPropagateMask_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateBreakPropagateMask", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "Helpers.getMaskSByte()", ["NextValueOp2"] = "Helpers.getMaskSByte()", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.CreateBreakPropagateMask(left, right))", ["GetVectorResult"] = "Helpers.CreateBreakPropagateMask(left, right)"}), + ("SveVecBinOpVecTest.template", new Dictionary { ["TestName"] = "Sve_CreateBreakPropagateMask_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateBreakPropagateMask", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "Helpers.getMaskInt16()", ["NextValueOp2"] = "Helpers.getMaskInt16()", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.CreateBreakPropagateMask(left, right))", ["GetVectorResult"] = "Helpers.CreateBreakPropagateMask(left, right)"}), + ("SveVecBinOpVecTest.template", new Dictionary { ["TestName"] = "Sve_CreateBreakPropagateMask_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateBreakPropagateMask", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "Helpers.getMaskInt32()", ["NextValueOp2"] = "Helpers.getMaskInt32()", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.CreateBreakPropagateMask(left, right))", ["GetVectorResult"] = "Helpers.CreateBreakPropagateMask(left, right)"}), + ("SveVecBinOpVecTest.template", new Dictionary { ["TestName"] = "Sve_CreateBreakPropagateMask_long", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateBreakPropagateMask", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "Helpers.getMaskInt64()", ["NextValueOp2"] = "Helpers.getMaskInt64()", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.CreateBreakPropagateMask(left, right))", ["GetVectorResult"] = "Helpers.CreateBreakPropagateMask(left, right)"}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask16Bit_Int32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask16Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1BaseType"] = "Int32", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (Int32)i, right) != (Int32)result[i]",}), ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask16Bit_Int64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask16Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1BaseType"] = "Int64", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (Int64)i, right) != (Int64)result[i]",}), ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask16Bit_UInt32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask16Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1BaseType"] = "UInt32", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (UInt32)i, right) != (UInt32)result[i]",}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs index 1c9268fa84e3ef..76848611f4b9c6 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs @@ -7962,6 +7962,26 @@ public static T[] CreateBreakBeforePropagateMask(T[] mask, T[] op1, T[] op2) return result; } + public static T[] CreateBreakPropagateMask(T[] op1, T[] op2) where T : IBinaryInteger + { + var count = op1.Length; + var result = new T[count]; + + // embedded true mask + var mask = new T[count]; + for (var i = 0; i < count; i++) + { + mask[i] = T.One; + } + + if (LastActive(mask, op1) != T.Zero) + { + Array.Copy(op2, result, count); + } + + return result; + } + private static byte ConditionalExtract(byte[] op1, byte op2, byte[] op3, bool after) { int last = LastActiveElement(op1); From dd2275c78a1e63ee8aced55c5fc3ae4e2a386415 Mon Sep 17 00:00:00 2001 From: TIHan Date: Wed, 10 Jul 2024 20:13:23 -0700 Subject: [PATCH 02/11] Added assert --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index e0a2c939d95463..dd3c4967f49be6 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -650,6 +650,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinEmbMask.id) { case NI_Sve_CreateBreakPropagateMask: + assert(targetReg != maskReg); if (targetReg != embMaskOp2Reg) { assert(targetReg != embMaskOp1Reg); From 2b8ec026cee45011f3051817b69e9cbb9df72bf9 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 11 Jul 2024 12:27:38 -0700 Subject: [PATCH 03/11] Fixed targetReg and maskReg using the same register --- src/coreclr/jit/lsraarm64.cpp | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 4e2da68fffc71a..44abd1fd0a6dc1 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1528,6 +1528,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou bool tgtPrefOp1 = false; bool tgtPrefOp2 = false; + bool tgtPrefHWIntrinsicOfOp2 = false; bool delayFreeMultiple = false; if (intrin.op1 != nullptr) { @@ -1634,6 +1635,16 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { predMask = RBM_LOWMASK.GetPredicateRegSet(); } + + // Special-case + if (intrinEmb.id == NI_Sve_CreateBreakPropagateMask) + { + assert(embOp2Node->isRMWHWIntrinsic(compiler)); + assert(!tgtPrefOp1); + assert(!tgtPrefOp2); + assert(!tgtPrefHWIntrinsicOfOp2); + tgtPrefHWIntrinsicOfOp2 = true; + } } } else if (HWIntrinsicInfo::IsLowMaskedOperation(intrin.id)) @@ -1641,8 +1652,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou predMask = RBM_LOWMASK.GetPredicateRegSet(); } - if (tgtPrefOp2) + if (tgtPrefHWIntrinsicOfOp2) + { + assert(!tgtPrefOp1); + assert(!tgtPrefOp2); + srcCount += BuildDelayFreeUses(intrin.op1, intrin.op2->AsHWIntrinsic()->Op(2), predMask); + } + else if (tgtPrefOp2) { + assert(!tgtPrefOp1); srcCount += BuildDelayFreeUses(intrin.op1, intrin.op2, predMask); } else @@ -1935,7 +1953,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou switch (intrinEmb.id) { + // Special-case case NI_Sve_CreateBreakPropagateMask: + assert(tgtPrefHWIntrinsicOfOp2); tgtPrefUse = BuildUse(embOp2Node->Op(2)); srcCount += 1; srcCount += BuildDelayFreeUses(embOp2Node->Op(1), embOp2Node->Op(2)); From b88f664f3e1f161de93a89b5062807c6643e357b Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 11 Jul 2024 12:29:07 -0700 Subject: [PATCH 04/11] Minor rename --- src/coreclr/jit/lsraarm64.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 44abd1fd0a6dc1..3759ddd5b0b3f7 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1528,7 +1528,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou bool tgtPrefOp1 = false; bool tgtPrefOp2 = false; - bool tgtPrefHWIntrinsicOfOp2 = false; + bool tgtPrefEmbOp2OfOp2 = false; bool delayFreeMultiple = false; if (intrin.op1 != nullptr) { @@ -1642,8 +1642,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(embOp2Node->isRMWHWIntrinsic(compiler)); assert(!tgtPrefOp1); assert(!tgtPrefOp2); - assert(!tgtPrefHWIntrinsicOfOp2); - tgtPrefHWIntrinsicOfOp2 = true; + assert(!tgtPrefEmbOp2OfOp2); + tgtPrefEmbOp2OfOp2 = true; } } } @@ -1652,7 +1652,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou predMask = RBM_LOWMASK.GetPredicateRegSet(); } - if (tgtPrefHWIntrinsicOfOp2) + if (tgtPrefEmbOp2OfOp2) { assert(!tgtPrefOp1); assert(!tgtPrefOp2); @@ -1955,7 +1955,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { // Special-case case NI_Sve_CreateBreakPropagateMask: - assert(tgtPrefHWIntrinsicOfOp2); + assert(tgtPrefEmbOp2OfOp2); tgtPrefUse = BuildUse(embOp2Node->Op(2)); srcCount += 1; srcCount += BuildDelayFreeUses(embOp2Node->Op(1), embOp2Node->Op(2)); From 244a08ac3a8b347038d3e839e8439b28fa135692 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 11 Jul 2024 12:31:22 -0700 Subject: [PATCH 05/11] Formatting --- src/coreclr/jit/lsraarm64.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 3759ddd5b0b3f7..d9655b8348a6f8 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1526,10 +1526,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // is not allocated the same register as the target. const bool isRMW = intrinsicTree->isRMWHWIntrinsic(compiler); - bool tgtPrefOp1 = false; - bool tgtPrefOp2 = false; - bool tgtPrefEmbOp2OfOp2 = false; - bool delayFreeMultiple = false; + bool tgtPrefOp1 = false; + bool tgtPrefOp2 = false; + bool tgtPrefEmbOp2OfOp2 = false; + bool delayFreeMultiple = false; if (intrin.op1 != nullptr) { bool simdRegToSimdRegMove = false; From 71b1034ae45b01e356e9df3372c48c2ec5df6c20 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 11 Jul 2024 12:33:26 -0700 Subject: [PATCH 06/11] No need to use predMask --- src/coreclr/jit/lsraarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index d9655b8348a6f8..e9be073aaaf47b 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1656,7 +1656,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { assert(!tgtPrefOp1); assert(!tgtPrefOp2); - srcCount += BuildDelayFreeUses(intrin.op1, intrin.op2->AsHWIntrinsic()->Op(2), predMask); + srcCount += BuildDelayFreeUses(intrin.op1, intrin.op2->AsHWIntrinsic()->Op(2)); } else if (tgtPrefOp2) { From 09b3601dddcc157a843538d499b73a15a3004757 Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 11 Jul 2024 12:51:58 -0700 Subject: [PATCH 07/11] More formatting --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 841bf18308058d..e23f93526b0b59 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -632,8 +632,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op2, op2->AsHWIntrinsic()); for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) { - GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, reg1, reg2, helper.ImmValue(), embOpt, - sopt); + GetEmitter()->emitInsSve_R_R_I(insEmbMask, emitSize, reg1, reg2, helper.ImmValue(), + embOpt, sopt); } } else From 3e9cf639c9e59bbf5bf8db6ecb21a5152db3955f Mon Sep 17 00:00:00 2001 From: TIHan Date: Thu, 11 Jul 2024 14:01:57 -0700 Subject: [PATCH 08/11] Add additional comment --- src/coreclr/jit/lsraarm64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index e9be073aaaf47b..0833a09ef38524 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1636,7 +1636,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou predMask = RBM_LOWMASK.GetPredicateRegSet(); } - // Special-case + // Special-case, CreateBreakPropagateMask's op2 is the RMW node. if (intrinEmb.id == NI_Sve_CreateBreakPropagateMask) { assert(embOp2Node->isRMWHWIntrinsic(compiler)); @@ -1953,7 +1953,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou switch (intrinEmb.id) { - // Special-case + // Special-case, CreateBreakPropagateMask's op2 is the RMW node. case NI_Sve_CreateBreakPropagateMask: assert(tgtPrefEmbOp2OfOp2); tgtPrefUse = BuildUse(embOp2Node->Op(2)); From 556bf928ef0ce8549c1db5203c3fe91251787f15 Mon Sep 17 00:00:00 2001 From: TIHan Date: Fri, 12 Jul 2024 14:17:54 -0700 Subject: [PATCH 09/11] Feedback --- src/coreclr/jit/hwintrinsic.cpp | 12 ++++++------ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 10 +++------- src/coreclr/jit/lsraarm64.cpp | 2 ++ 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 6b5bb38bda5908..fdea3514519e18 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1925,7 +1925,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } #if defined(TARGET_ARM64) - auto convertToMaskIfPossible = [&](GenTree*& op) { + auto convertToMaskIfNeeded = [&](GenTree*& op) { if (!varTypeIsMask(op)) { op = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op, simdBaseJitType, simdSize); @@ -1950,7 +1950,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, case NI_Sve_TestLastTrue: { // HWInstrinsic requires a mask for op2 - convertToMaskIfPossible(retNode->AsHWIntrinsic()->Op(2)); + convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(2)); break; } @@ -1964,7 +1964,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, case NI_Sve_CreateBreakBeforePropagateMask: { // HWInstrinsic requires a mask for op3 - convertToMaskIfPossible(retNode->AsHWIntrinsic()->Op(3)); + convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(3)); break; } @@ -1973,7 +1973,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } // HWInstrinsic requires a mask for op1 - convertToMaskIfPossible(retNode->AsHWIntrinsic()->Op(1)); + convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(1)); if (HWIntrinsicInfo::IsMultiReg(intrinsic)) { @@ -1990,8 +1990,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, { case NI_Sve_CreateBreakPropagateMask: { - convertToMaskIfPossible(retNode->AsHWIntrinsic()->Op(1)); - convertToMaskIfPossible(retNode->AsHWIntrinsic()->Op(2)); + convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(1)); + convertToMaskIfNeeded(retNode->AsHWIntrinsic()->Op(2)); break; } diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index e23f93526b0b59..8c480adedcaf64 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -650,13 +650,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinEmbMask.id) { case NI_Sve_CreateBreakPropagateMask: - assert(targetReg != maskReg); - if (targetReg != embMaskOp2Reg) - { - assert(targetReg != embMaskOp1Reg); - GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp2Reg, - /* canSkip */ true); - } + assert(targetReg != embMaskOp1Reg); + GetEmitter()->emitIns_Mov(INS_sve_mov, emitSize, targetReg, embMaskOp2Reg, + /* canSkip */ true); emitInsHelper(targetReg, maskReg, embMaskOp1Reg); break; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 0833a09ef38524..9c017872f10382 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1956,6 +1956,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // Special-case, CreateBreakPropagateMask's op2 is the RMW node. case NI_Sve_CreateBreakPropagateMask: assert(tgtPrefEmbOp2OfOp2); + assert(intrin.op3->isContained()); + assert(intrin.op3->IsVectorZero()); tgtPrefUse = BuildUse(embOp2Node->Op(2)); srcCount += 1; srcCount += BuildDelayFreeUses(embOp2Node->Op(1), embOp2Node->Op(2)); From def0a43d1704bd4db2c8a6a4c1bb87694e5ab1d2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 16 Jul 2024 11:28:36 -0700 Subject: [PATCH 10/11] fix lsra --- src/coreclr/jit/lsraarm64.cpp | 61 ++++++++++++++--------------------- 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 9c017872f10382..cb395a42541b83 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1526,10 +1526,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // is not allocated the same register as the target. const bool isRMW = intrinsicTree->isRMWHWIntrinsic(compiler); - bool tgtPrefOp1 = false; - bool tgtPrefOp2 = false; - bool tgtPrefEmbOp2OfOp2 = false; - bool delayFreeMultiple = false; + bool tgtPrefOp1 = false; + bool tgtPrefOp2 = false; + bool delayFreeMultiple = false; if (intrin.op1 != nullptr) { bool simdRegToSimdRegMove = false; @@ -1619,7 +1618,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - SingleTypeRegSet predMask = RBM_ALLMASK.GetPredicateRegSet(); + bool tgtPrefEmbOp2 = false; + SingleTypeRegSet predMask = RBM_ALLMASK.GetPredicateRegSet(); if (intrin.id == NI_Sve_ConditionalSelect) { // If this is conditional select, make sure to check the embedded @@ -1642,8 +1642,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(embOp2Node->isRMWHWIntrinsic(compiler)); assert(!tgtPrefOp1); assert(!tgtPrefOp2); - assert(!tgtPrefEmbOp2OfOp2); - tgtPrefEmbOp2OfOp2 = true; + tgtPrefEmbOp2 = true; } } } @@ -1652,16 +1651,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou predMask = RBM_LOWMASK.GetPredicateRegSet(); } - if (tgtPrefEmbOp2OfOp2) + if (tgtPrefOp2 || tgtPrefEmbOp2) { assert(!tgtPrefOp1); - assert(!tgtPrefOp2); - srcCount += BuildDelayFreeUses(intrin.op1, intrin.op2->AsHWIntrinsic()->Op(2)); - } - else if (tgtPrefOp2) - { - assert(!tgtPrefOp1); - srcCount += BuildDelayFreeUses(intrin.op1, intrin.op2, predMask); + srcCount += BuildDelayFreeUses(intrin.op1, nullptr, predMask); } else { @@ -1951,30 +1944,26 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } } - switch (intrinEmb.id) + int prefUseOpNum = 1; + if (intrinEmb.id == NI_Sve_CreateBreakPropagateMask) { - // Special-case, CreateBreakPropagateMask's op2 is the RMW node. - case NI_Sve_CreateBreakPropagateMask: - assert(tgtPrefEmbOp2OfOp2); - assert(intrin.op3->isContained()); - assert(intrin.op3->IsVectorZero()); - tgtPrefUse = BuildUse(embOp2Node->Op(2)); - srcCount += 1; - srcCount += BuildDelayFreeUses(embOp2Node->Op(1), embOp2Node->Op(2)); - srcCount += BuildDelayFreeUses(intrin.op3, embOp2Node->Op(2)); - break; - - default: - tgtPrefUse = BuildUse(embOp2Node->Op(1)); + prefUseOpNum = 2; + } + GenTree* prefUseNode = embOp2Node->Op(prefUseOpNum); + for (size_t argNum = 1; argNum <= numArgs; argNum++) + { + if (argNum == prefUseOpNum) + { + tgtPrefUse = BuildUse(prefUseNode); srcCount += 1; - for (size_t argNum = 2; argNum <= numArgs; argNum++) - { - srcCount += BuildDelayFreeUses(embOp2Node->Op(argNum), embOp2Node->Op(1)); - } - - srcCount += BuildDelayFreeUses(intrin.op3, embOp2Node->Op(1)); - break; + } + else + { + srcCount += BuildDelayFreeUses(embOp2Node->Op(argNum), prefUseNode); + } } + + srcCount += BuildDelayFreeUses(intrin.op3, prefUseNode); } } else if (intrin.op2 != nullptr) From 38a7347a6ce13b118f421c32f39b14355688e3c2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 17 Jul 2024 07:29:20 -0700 Subject: [PATCH 11/11] fix build error --- src/coreclr/jit/lsraarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index ed94d54ea86498..c76cfe2031f581 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1994,7 +1994,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } - int prefUseOpNum = 1; + size_t prefUseOpNum = 1; if (intrinEmb.id == NI_Sve_CreateBreakPropagateMask) { prefUseOpNum = 2;