From 8640766e0728700d9c417dd91a2e01f7e87c0a32 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 Nov 2023 22:55:11 -0800 Subject: [PATCH 1/2] Add StoreSelectedScalarVectorNxM instrinsic ids --- src/coreclr/jit/hwintrinsicarm64.cpp | 23 ++++++++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 62 +++++++++------------ src/coreclr/jit/hwintrinsiclistarm64.h | 10 +++- src/coreclr/jit/lowerarmarch.cpp | 6 ++ src/coreclr/jit/lsraarm64.cpp | 24 ++++++++ 5 files changed, 86 insertions(+), 39 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 83eeb02409eee1..482f3589e2a875 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -260,7 +260,13 @@ void HWIntrinsicInfo::lookupImmBounds( case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3: case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4: case NI_AdvSimd_StoreSelectedScalar: + case NI_AdvSimd_StoreSelectedScalarVector64x2: + case NI_AdvSimd_StoreSelectedScalarVector64x3: + case NI_AdvSimd_StoreSelectedScalarVector64x4: case NI_AdvSimd_Arm64_StoreSelectedScalar: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: case NI_AdvSimd_Arm64_DuplicateSelectedScalarToVector128: case NI_AdvSimd_Arm64_InsertSelectedScalar: immUpperBound = Compiler::getSIMDVectorLength(simdSize, baseType) - 1; @@ -1819,6 +1825,23 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (op2->TypeGet() == TYP_STRUCT) { info.compNeedsConsecutiveRegisters = true; + switch (fieldCount) + { + case 2: + intrinsic = simdSize == 8 ? NI_AdvSimd_StoreSelectedScalarVector64x2 + : NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2; + break; + case 3: + intrinsic = simdSize == 8 ? NI_AdvSimd_StoreSelectedScalarVector64x3 + : NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3; + break; + case 4: + intrinsic = simdSize == 8 ? NI_AdvSimd_StoreSelectedScalarVector64x4 + : NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4; + break; + default: + assert("unsupported"); + } if (!op2->OperIs(GT_LCL_VAR)) { diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 7b903a2deb0e4b..03e7de9135c445 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -493,50 +493,32 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) ins = varTypeIsUnsigned(intrin.baseType) ? INS_umsubl : INS_smsubl; break; - case NI_AdvSimd_StoreSelectedScalar: - case NI_AdvSimd_Arm64_StoreSelectedScalar: + case NI_AdvSimd_StoreSelectedScalarVector64x2: + case NI_AdvSimd_StoreSelectedScalarVector64x3: + case NI_AdvSimd_StoreSelectedScalarVector64x4: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: { unsigned regCount = 0; - if (intrin.op2->OperIsFieldList()) - { - GenTreeFieldList* fieldList = intrin.op2->AsFieldList(); - GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); - op2Reg = firstField->GetRegNum(); + assert(intrin.op2->OperIsFieldList()); + GenTreeFieldList* fieldList = intrin.op2->AsFieldList(); + GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); + op2Reg = firstField->GetRegNum(); - INDEBUG(regNumber argReg = op2Reg); - for (GenTreeFieldList::Use& use : fieldList->Uses()) - { - regCount++; #ifdef DEBUG - GenTree* argNode = use.GetNode(); - assert(argReg == argNode->GetRegNum()); - argReg = REG_NEXT(argReg); -#endif - } - } - else + regNumber argReg = op2Reg; + for (GenTreeFieldList::Use& use : fieldList->Uses()) { - regCount = 1; - } + regCount++; - switch (regCount) - { - case 1: - ins = INS_st1; - break; - case 2: - ins = INS_st2; - break; - case 3: - ins = INS_st3; - break; - case 4: - ins = INS_st4; - break; - default: - unreached(); + GenTree* argNode = use.GetNode(); + assert(argReg == argNode->GetRegNum()); + argReg = REG_NEXT(argReg); } - break; + assert((ins == INS_st2 && regCount == 2) || (ins == INS_st3 && regCount == 3) || + (ins == INS_st4 && regCount == 4)); +#endif } default: @@ -830,7 +812,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; case NI_AdvSimd_StoreSelectedScalar: + case NI_AdvSimd_StoreSelectedScalarVector64x2: + case NI_AdvSimd_StoreSelectedScalarVector64x3: + case NI_AdvSimd_StoreSelectedScalarVector64x4: case NI_AdvSimd_Arm64_StoreSelectedScalar: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: { HWIntrinsicImmOpHelper helper(this, intrin.op3, node); diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 1a6c8cd48081b9..0bf81992d4933c 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -478,7 +478,10 @@ HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningLower, HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningUpper, 16, 1, true, {INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AdvSimd, SqrtScalar, 8, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt}, HW_Category_SIMD, HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(AdvSimd, Store, -1, 2, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, 8, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, 8, 3, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalarVector64x2, 8, 3, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_invalid, INS_invalid, INS_st2, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalarVector64x3, 8, 3, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_invalid, INS_invalid, INS_st3, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalarVector64x4, 8, 3, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_invalid, INS_invalid, INS_st4, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd, StoreVector64x2AndZip, 8, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_invalid, INS_invalid, INS_st2, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd, StoreVector64x3AndZip, 8, 2, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_invalid, INS_invalid, INS_st3, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd, StoreVector64x4AndZip, 8, 2, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_invalid, INS_invalid, INS_st4, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) @@ -676,7 +679,10 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePair, HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalar, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stp, INS_stp, INS_invalid, INS_invalid, INS_stp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalarNonTemporal, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stnp, INS_stnp, INS_invalid, INS_invalid, INS_stnp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairNonTemporal, -1, 3, true, {INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stp}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalar, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalar, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x2, 16, 3, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_invalid, INS_invalid, INS_st2, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x3, 16, 3, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_invalid, INS_invalid, INS_st3, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x4, 16, 3, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_invalid, INS_invalid, INS_st4, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x2AndZip, 16, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x3AndZip, 16, 2, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x4AndZip, 16, 2, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 9a314539fd9f9b..4facf33a15d9a8 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3035,7 +3035,13 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AdvSimd_ExtractVector64: case NI_AdvSimd_ExtractVector128: case NI_AdvSimd_StoreSelectedScalar: + case NI_AdvSimd_StoreSelectedScalarVector64x2: + case NI_AdvSimd_StoreSelectedScalarVector64x3: + case NI_AdvSimd_StoreSelectedScalarVector64x4: case NI_AdvSimd_Arm64_StoreSelectedScalar: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: assert(hasImmediateOperand); assert(varTypeIsIntegral(intrin.op3)); if (intrin.op3->IsCnsIntOrI()) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 0b4b383c491b50..9df74f06253bbf 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1412,7 +1412,13 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_AdvSimd_ExtractVector64: case NI_AdvSimd_ExtractVector128: case NI_AdvSimd_StoreSelectedScalar: + case NI_AdvSimd_StoreSelectedScalarVector64x2: + case NI_AdvSimd_StoreSelectedScalarVector64x3: + case NI_AdvSimd_StoreSelectedScalarVector64x4: case NI_AdvSimd_Arm64_StoreSelectedScalar: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed(); break; @@ -1605,6 +1611,24 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_AdvSimd_StoreSelectedScalar: case NI_AdvSimd_Arm64_StoreSelectedScalar: + assert(intrin.op1 != nullptr); + assert(intrin.op3 != nullptr); + srcCount += BuildOperandUses(intrin.op2); + if (!intrin.op3->isContainedIntOrIImmed()) + { + srcCount += BuildOperandUses(intrin.op3); + } + assert(dstCount == 0); + buildInternalRegisterUses(); + *pDstCount = 0; + break; + + case NI_AdvSimd_StoreSelectedScalarVector64x2: + case NI_AdvSimd_StoreSelectedScalarVector64x3: + case NI_AdvSimd_StoreSelectedScalarVector64x4: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: { assert(intrin.op1 != nullptr); assert(intrin.op3 != nullptr); From f0ed720b604dc358d595231571686ef728eefbc0 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 Nov 2023 23:23:58 -0800 Subject: [PATCH 2/2] reorganize code --- src/coreclr/jit/hwintrinsicarm64.cpp | 2 +- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 54 +++++++++------------ src/coreclr/jit/hwintrinsiclistarm64.h | 6 +-- src/coreclr/jit/lsraarm64.cpp | 2 +- 4 files changed, 29 insertions(+), 35 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 482f3589e2a875..7cd273a0bc0e8e 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1829,7 +1829,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { case 2: intrinsic = simdSize == 8 ? NI_AdvSimd_StoreSelectedScalarVector64x2 - : NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2; + : NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2; break; case 3: intrinsic = simdSize == 8 ? NI_AdvSimd_StoreSelectedScalarVector64x3 diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 03e7de9135c445..eba1b6f33a09c4 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -493,34 +493,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) ins = varTypeIsUnsigned(intrin.baseType) ? INS_umsubl : INS_smsubl; break; - case NI_AdvSimd_StoreSelectedScalarVector64x2: - case NI_AdvSimd_StoreSelectedScalarVector64x3: - case NI_AdvSimd_StoreSelectedScalarVector64x4: - case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2: - case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3: - case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: - { - unsigned regCount = 0; - assert(intrin.op2->OperIsFieldList()); - GenTreeFieldList* fieldList = intrin.op2->AsFieldList(); - GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); - op2Reg = firstField->GetRegNum(); - -#ifdef DEBUG - regNumber argReg = op2Reg; - for (GenTreeFieldList::Use& use : fieldList->Uses()) - { - regCount++; - - GenTree* argNode = use.GetNode(); - assert(argReg == argNode->GetRegNum()); - argReg = REG_NEXT(argReg); - } - assert((ins == INS_st2 && regCount == 2) || (ins == INS_st3 && regCount == 3) || - (ins == INS_st4 && regCount == 4)); -#endif - } - default: ins = HWIntrinsicInfo::lookupIns(intrin.id, intrin.baseType); break; @@ -811,14 +783,36 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitIns_R_R_R(ins, emitTypeSize(intrin.baseType), op2Reg, op3Reg, op1Reg); break; - case NI_AdvSimd_StoreSelectedScalar: case NI_AdvSimd_StoreSelectedScalarVector64x2: case NI_AdvSimd_StoreSelectedScalarVector64x3: case NI_AdvSimd_StoreSelectedScalarVector64x4: - case NI_AdvSimd_Arm64_StoreSelectedScalar: case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2: case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3: case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: + { + assert(intrin.op2->OperIsFieldList()); + GenTreeFieldList* fieldList = intrin.op2->AsFieldList(); + GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); + op2Reg = firstField->GetRegNum(); + +#ifdef DEBUG + unsigned regCount = 0; + regNumber argReg = op2Reg; + for (GenTreeFieldList::Use& use : fieldList->Uses()) + { + regCount++; + + GenTree* argNode = use.GetNode(); + assert(argReg == argNode->GetRegNum()); + argReg = REG_NEXT(argReg); + } + assert((ins == INS_st2 && regCount == 2) || (ins == INS_st3 && regCount == 3) || + (ins == INS_st4 && regCount == 4)); +#endif + FALLTHROUGH; + } + case NI_AdvSimd_StoreSelectedScalar: + case NI_AdvSimd_Arm64_StoreSelectedScalar: { HWIntrinsicImmOpHelper helper(this, intrin.op3, node); diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 0bf81992d4933c..cb4c8269d61171 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -680,9 +680,9 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalar, HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalarNonTemporal, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stnp, INS_stnp, INS_invalid, INS_invalid, INS_stnp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairNonTemporal, -1, 3, true, {INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stp}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalar, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x2, 16, 3, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_invalid, INS_invalid, INS_st2, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x3, 16, 3, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_invalid, INS_invalid, INS_st3, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x4, 16, 3, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_invalid, INS_invalid, INS_st4, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x2, 16, 3, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x3, 16, 3, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x4, 16, 3, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x2AndZip, 16, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x3AndZip, 16, 2, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x4AndZip, 16, 2, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 9df74f06253bbf..3914af3fa5e18b 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1418,7 +1418,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_AdvSimd_Arm64_StoreSelectedScalar: case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2: case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3: - case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed(); break;