diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 83eeb02409eee1..7cd273a0bc0e8e 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -260,7 +260,13 @@ void HWIntrinsicInfo::lookupImmBounds( case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3: case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4: case NI_AdvSimd_StoreSelectedScalar: + case NI_AdvSimd_StoreSelectedScalarVector64x2: + case NI_AdvSimd_StoreSelectedScalarVector64x3: + case NI_AdvSimd_StoreSelectedScalarVector64x4: case NI_AdvSimd_Arm64_StoreSelectedScalar: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: case NI_AdvSimd_Arm64_DuplicateSelectedScalarToVector128: case NI_AdvSimd_Arm64_InsertSelectedScalar: immUpperBound = Compiler::getSIMDVectorLength(simdSize, baseType) - 1; @@ -1819,6 +1825,23 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (op2->TypeGet() == TYP_STRUCT) { info.compNeedsConsecutiveRegisters = true; + switch (fieldCount) + { + case 2: + intrinsic = simdSize == 8 ? NI_AdvSimd_StoreSelectedScalarVector64x2 + : NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2; + break; + case 3: + intrinsic = simdSize == 8 ? NI_AdvSimd_StoreSelectedScalarVector64x3 + : NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3; + break; + case 4: + intrinsic = simdSize == 8 ? NI_AdvSimd_StoreSelectedScalarVector64x4 + : NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4; + break; + default: + assert("unsupported"); + } if (!op2->OperIs(GT_LCL_VAR)) { diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 7b903a2deb0e4b..eba1b6f33a09c4 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -493,52 +493,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) ins = varTypeIsUnsigned(intrin.baseType) ? INS_umsubl : INS_smsubl; break; - case NI_AdvSimd_StoreSelectedScalar: - case NI_AdvSimd_Arm64_StoreSelectedScalar: - { - unsigned regCount = 0; - if (intrin.op2->OperIsFieldList()) - { - GenTreeFieldList* fieldList = intrin.op2->AsFieldList(); - GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); - op2Reg = firstField->GetRegNum(); - - INDEBUG(regNumber argReg = op2Reg); - for (GenTreeFieldList::Use& use : fieldList->Uses()) - { - regCount++; -#ifdef DEBUG - GenTree* argNode = use.GetNode(); - assert(argReg == argNode->GetRegNum()); - argReg = REG_NEXT(argReg); -#endif - } - } - else - { - regCount = 1; - } - - switch (regCount) - { - case 1: - ins = INS_st1; - break; - case 2: - ins = INS_st2; - break; - case 3: - ins = INS_st3; - break; - case 4: - ins = INS_st4; - break; - default: - unreached(); - } - break; - } - default: ins = HWIntrinsicInfo::lookupIns(intrin.id, intrin.baseType); break; @@ -829,6 +783,34 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitIns_R_R_R(ins, emitTypeSize(intrin.baseType), op2Reg, op3Reg, op1Reg); break; + case NI_AdvSimd_StoreSelectedScalarVector64x2: + case NI_AdvSimd_StoreSelectedScalarVector64x3: + case NI_AdvSimd_StoreSelectedScalarVector64x4: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: + { + assert(intrin.op2->OperIsFieldList()); + GenTreeFieldList* fieldList = intrin.op2->AsFieldList(); + GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); + op2Reg = firstField->GetRegNum(); + +#ifdef DEBUG + unsigned regCount = 0; + regNumber argReg = op2Reg; + for (GenTreeFieldList::Use& use : fieldList->Uses()) + { + regCount++; + + GenTree* argNode = use.GetNode(); + assert(argReg == argNode->GetRegNum()); + argReg = REG_NEXT(argReg); + } + assert((ins == INS_st2 && regCount == 2) || (ins == INS_st3 && regCount == 3) || + (ins == INS_st4 && regCount == 4)); +#endif + FALLTHROUGH; + } case NI_AdvSimd_StoreSelectedScalar: case NI_AdvSimd_Arm64_StoreSelectedScalar: { diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 1a6c8cd48081b9..cb4c8269d61171 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -478,7 +478,10 @@ HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningLower, HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningUpper, 16, 1, true, {INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AdvSimd, SqrtScalar, 8, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt}, HW_Category_SIMD, HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(AdvSimd, Store, -1, 2, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, 8, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, 8, 3, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalarVector64x2, 8, 3, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_invalid, INS_invalid, INS_st2, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalarVector64x3, 8, 3, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_invalid, INS_invalid, INS_st3, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalarVector64x4, 8, 3, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_invalid, INS_invalid, INS_st4, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd, StoreVector64x2AndZip, 8, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_invalid, INS_invalid, INS_st2, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd, StoreVector64x3AndZip, 8, 2, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_invalid, INS_invalid, INS_st3, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd, StoreVector64x4AndZip, 8, 2, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_invalid, INS_invalid, INS_st4, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) @@ -676,7 +679,10 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePair, HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalar, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stp, INS_stp, INS_invalid, INS_invalid, INS_stp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalarNonTemporal, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stnp, INS_stnp, INS_invalid, INS_invalid, INS_stnp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairNonTemporal, -1, 3, true, {INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stp}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalar, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalar, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x2, 16, 3, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x3, 16, 3, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x4, 16, 3, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x2AndZip, 16, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x3AndZip, 16, 2, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x4AndZip, 16, 2, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 9a314539fd9f9b..4facf33a15d9a8 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3035,7 +3035,13 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AdvSimd_ExtractVector64: case NI_AdvSimd_ExtractVector128: case NI_AdvSimd_StoreSelectedScalar: + case NI_AdvSimd_StoreSelectedScalarVector64x2: + case NI_AdvSimd_StoreSelectedScalarVector64x3: + case NI_AdvSimd_StoreSelectedScalarVector64x4: case NI_AdvSimd_Arm64_StoreSelectedScalar: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: assert(hasImmediateOperand); assert(varTypeIsIntegral(intrin.op3)); if (intrin.op3->IsCnsIntOrI()) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 0b4b383c491b50..3914af3fa5e18b 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1412,7 +1412,13 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_AdvSimd_ExtractVector64: case NI_AdvSimd_ExtractVector128: case NI_AdvSimd_StoreSelectedScalar: + case NI_AdvSimd_StoreSelectedScalarVector64x2: + case NI_AdvSimd_StoreSelectedScalarVector64x3: + case NI_AdvSimd_StoreSelectedScalarVector64x4: case NI_AdvSimd_Arm64_StoreSelectedScalar: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed(); break; @@ -1605,6 +1611,24 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_AdvSimd_StoreSelectedScalar: case NI_AdvSimd_Arm64_StoreSelectedScalar: + assert(intrin.op1 != nullptr); + assert(intrin.op3 != nullptr); + srcCount += BuildOperandUses(intrin.op2); + if (!intrin.op3->isContainedIntOrIImmed()) + { + srcCount += BuildOperandUses(intrin.op3); + } + assert(dstCount == 0); + buildInternalRegisterUses(); + *pDstCount = 0; + break; + + case NI_AdvSimd_StoreSelectedScalarVector64x2: + case NI_AdvSimd_StoreSelectedScalarVector64x3: + case NI_AdvSimd_StoreSelectedScalarVector64x4: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3: + case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4: { assert(intrin.op1 != nullptr); assert(intrin.op3 != nullptr);