Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,13 @@ void HWIntrinsicInfo::lookupImmBounds(
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_StoreSelectedScalarVector64x2:
case NI_AdvSimd_StoreSelectedScalarVector64x3:
case NI_AdvSimd_StoreSelectedScalarVector64x4:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4:
case NI_AdvSimd_Arm64_DuplicateSelectedScalarToVector128:
case NI_AdvSimd_Arm64_InsertSelectedScalar:
immUpperBound = Compiler::getSIMDVectorLength(simdSize, baseType) - 1;
Expand Down Expand Up @@ -1819,6 +1825,23 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
if (op2->TypeGet() == TYP_STRUCT)
{
info.compNeedsConsecutiveRegisters = true;
switch (fieldCount)
{
case 2:
intrinsic = simdSize == 8 ? NI_AdvSimd_StoreSelectedScalarVector64x2
: NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2;
break;
case 3:
intrinsic = simdSize == 8 ? NI_AdvSimd_StoreSelectedScalarVector64x3
: NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3;
break;
case 4:
intrinsic = simdSize == 8 ? NI_AdvSimd_StoreSelectedScalarVector64x4
: NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4;
break;
default:
assert("unsupported");
}

if (!op2->OperIs(GT_LCL_VAR))
{
Expand Down
74 changes: 28 additions & 46 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -493,52 +493,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
ins = varTypeIsUnsigned(intrin.baseType) ? INS_umsubl : INS_smsubl;
break;

case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
{
unsigned regCount = 0;
if (intrin.op2->OperIsFieldList())
{
GenTreeFieldList* fieldList = intrin.op2->AsFieldList();
GenTree* firstField = fieldList->Uses().GetHead()->GetNode();
op2Reg = firstField->GetRegNum();

INDEBUG(regNumber argReg = op2Reg);
for (GenTreeFieldList::Use& use : fieldList->Uses())
{
regCount++;
#ifdef DEBUG
GenTree* argNode = use.GetNode();
assert(argReg == argNode->GetRegNum());
argReg = REG_NEXT(argReg);
#endif
}
}
else
{
regCount = 1;
}

switch (regCount)
{
case 1:
ins = INS_st1;
break;
case 2:
ins = INS_st2;
break;
case 3:
ins = INS_st3;
break;
case 4:
ins = INS_st4;
break;
default:
unreached();
}
break;
}

default:
ins = HWIntrinsicInfo::lookupIns(intrin.id, intrin.baseType);
break;
Expand Down Expand Up @@ -829,6 +783,34 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
GetEmitter()->emitIns_R_R_R(ins, emitTypeSize(intrin.baseType), op2Reg, op3Reg, op1Reg);
break;

case NI_AdvSimd_StoreSelectedScalarVector64x2:
case NI_AdvSimd_StoreSelectedScalarVector64x3:
case NI_AdvSimd_StoreSelectedScalarVector64x4:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4:
{
assert(intrin.op2->OperIsFieldList());
GenTreeFieldList* fieldList = intrin.op2->AsFieldList();
GenTree* firstField = fieldList->Uses().GetHead()->GetNode();
op2Reg = firstField->GetRegNum();

#ifdef DEBUG
unsigned regCount = 0;
regNumber argReg = op2Reg;
for (GenTreeFieldList::Use& use : fieldList->Uses())
{
regCount++;

GenTree* argNode = use.GetNode();
assert(argReg == argNode->GetRegNum());
argReg = REG_NEXT(argReg);
}
assert((ins == INS_st2 && regCount == 2) || (ins == INS_st3 && regCount == 3) ||
(ins == INS_st4 && regCount == 4));
#endif
FALLTHROUGH;
}
case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
{
Expand Down
10 changes: 8 additions & 2 deletions src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,10 @@ HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningLower,
HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningUpper, 16, 1, true, {INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AdvSimd, SqrtScalar, 8, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt}, HW_Category_SIMD, HW_Flag_SIMDScalar)
HARDWARE_INTRINSIC(AdvSimd, Store, -1, 2, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, 8, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, 8, 3, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalarVector64x2, 8, 3, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_invalid, INS_invalid, INS_st2, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalarVector64x3, 8, 3, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_invalid, INS_invalid, INS_st3, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalarVector64x4, 8, 3, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_invalid, INS_invalid, INS_st4, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x2AndZip, 8, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_invalid, INS_invalid, INS_st2, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x3AndZip, 8, 2, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_invalid, INS_invalid, INS_st3, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x4AndZip, 8, 2, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_invalid, INS_invalid, INS_st4, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
Expand Down Expand Up @@ -676,7 +679,10 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePair,
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalar, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stp, INS_stp, INS_invalid, INS_invalid, INS_stp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalarNonTemporal, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stnp, INS_stnp, INS_invalid, INS_invalid, INS_stnp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairNonTemporal, -1, 3, true, {INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stp}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalar, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalar, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x2, 16, 3, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x3, 16, 3, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreSelectedScalarVector128x4, 16, 3, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x2AndZip, 16, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x3AndZip, 16, 2, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x4AndZip, 16, 2, true, {INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4, INS_st4}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3035,7 +3035,13 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_AdvSimd_ExtractVector64:
case NI_AdvSimd_ExtractVector128:
case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_StoreSelectedScalarVector64x2:
case NI_AdvSimd_StoreSelectedScalarVector64x3:
case NI_AdvSimd_StoreSelectedScalarVector64x4:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4:
assert(hasImmediateOperand);
assert(varTypeIsIntegral(intrin.op3));
if (intrin.op3->IsCnsIntOrI())
Expand Down
24 changes: 24 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1412,7 +1412,13 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
case NI_AdvSimd_ExtractVector64:
case NI_AdvSimd_ExtractVector128:
case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_StoreSelectedScalarVector64x2:
case NI_AdvSimd_StoreSelectedScalarVector64x3:
case NI_AdvSimd_StoreSelectedScalarVector64x4:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4:
needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed();
break;

Expand Down Expand Up @@ -1605,6 +1611,24 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou

case NI_AdvSimd_StoreSelectedScalar:
case NI_AdvSimd_Arm64_StoreSelectedScalar:
assert(intrin.op1 != nullptr);
assert(intrin.op3 != nullptr);
srcCount += BuildOperandUses(intrin.op2);
if (!intrin.op3->isContainedIntOrIImmed())
{
srcCount += BuildOperandUses(intrin.op3);
}
assert(dstCount == 0);
buildInternalRegisterUses();
*pDstCount = 0;
break;

case NI_AdvSimd_StoreSelectedScalarVector64x2:
case NI_AdvSimd_StoreSelectedScalarVector64x3:
case NI_AdvSimd_StoreSelectedScalarVector64x4:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x2:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x3:
case NI_AdvSimd_Arm64_StoreSelectedScalarVector128x4:
{
assert(intrin.op1 != nullptr);
assert(intrin.op3 != nullptr);
Expand Down