Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 13 additions & 16 deletions src/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3929,17 +3929,15 @@ void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTre
void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival)
{
noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));

assert(IsSSEOrAVXInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));

ssize_t offs = indir->Offset();
instrDesc* id = emitNewInstrAmdCns(attr, offs, ival);

id->idIns(ins);
id->idReg1(reg1);

emitHandleMemOp(indir, id, IF_RWR_ARD_CNS, ins);
emitHandleMemOp(indir, id, IF_RRW_ARD_CNS, ins);

// Plus one for the 1-byte immediate (ival)
UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
Expand All @@ -3952,22 +3950,20 @@ void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenT
void emitter::emitIns_R_C_I(
instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
{
noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));

assert(IsSSEOrAVXInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));

// Static always need relocs
if (!jitStaticFldIsGlobAddr(fldHnd))
{
attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
}

noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
assert(IsSSEOrAVXInstruction(ins));

instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;

id->idIns(ins);
id->idInsFmt(IF_RWR_RRD_MRD_CNS);
id->idInsFmt(IF_RRW_MRD_CNS);
id->idReg1(reg1);
id->idAddr()->iiaFieldHnd = fldHnd;

Expand All @@ -3979,16 +3975,14 @@ void emitter::emitIns_R_C_I(
void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival)
{
noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));

assert(IsSSEOrAVXInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));

instrDesc* id = emitNewInstrCns(attr, ival);
UNATIVE_OFFSET sz =
emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;

id->idIns(ins);
id->idInsFmt(IF_RWR_RRD_SRD_CNS);
id->idInsFmt(IF_RRW_SRD_CNS);
id->idReg1(reg1);
id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);

Expand Down Expand Up @@ -5190,7 +5184,7 @@ void emitter::emitIns_SIMD_R_R_A(
{
emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
}
emitIns_R_A(ins, emitTypeSize(simdtype), reg, indir, IF_RWR_ARD);
emitIns_R_A(ins, emitTypeSize(simdtype), reg, indir, IF_RRW_ARD);
}
}

Expand Down Expand Up @@ -7341,6 +7335,7 @@ void emitter::emitDispIns(
emitDispAddrMode(id);
break;

case IF_RRW_ARD_CNS:
case IF_RWR_ARD_CNS:
{
printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
Expand Down Expand Up @@ -7531,6 +7526,7 @@ void emitter::emitDispIns(

break;

case IF_RRW_SRD_CNS:
case IF_RWR_SRD_CNS:
{
printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
Expand Down Expand Up @@ -7707,6 +7703,7 @@ void emitter::emitDispIns(
emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
break;

case IF_RRW_MRD_CNS:
case IF_RWR_MRD_CNS:
{
printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
Expand Down Expand Up @@ -11750,6 +11747,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
break;

case IF_RRW_ARD_CNS:
case IF_RWR_ARD_CNS:
emitGetInsAmdCns(id, &cnsVal);
code = insCodeRM(ins);

Expand Down Expand Up @@ -11779,7 +11777,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
sz = emitSizeOfInsDsc(id);
break;

case IF_RWR_ARD_CNS:
case IF_RWR_RRD_ARD_CNS:
{
emitGetInsAmdCns(id, &cnsVal);
Expand Down Expand Up @@ -11876,6 +11873,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
break;

case IF_RRW_SRD_CNS:
case IF_RWR_SRD_CNS:
emitGetInsCns(id, &cnsVal);
code = insCodeRM(ins);

Expand Down Expand Up @@ -11941,7 +11939,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
}
break;

case IF_RWR_SRD_CNS:
case IF_RWR_RRD_SRD_CNS:
{
emitGetInsCns(id, &cnsVal);
Expand Down Expand Up @@ -12021,6 +12018,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
break;

case IF_RRW_MRD_CNS:
case IF_RWR_MRD_CNS:
emitGetInsDcmCns(id, &cnsVal);
code = insCodeRM(ins);

Expand Down Expand Up @@ -12085,7 +12083,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
sz = emitSizeOfInsDsc(id);
break;

case IF_RWR_MRD_CNS:
case IF_RWR_RRD_MRD_CNS:
{
emitGetInsCns(id, &cnsVal);
Expand Down
45 changes: 34 additions & 11 deletions src/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7319,7 +7319,11 @@ void Compiler::gtBlockOpInit(GenTreePtr result, GenTreePtr dst, GenTreePtr srcOr
{
src = src->AsIndir()->Addr()->gtGetOp1();
}
#ifdef FEATURE_HW_INTRINSICS
if ((src->OperGet() == GT_SIMD) || (src->OperGet() == GT_HWIntrinsic))
#else
if (src->OperGet() == GT_SIMD)
#endif // FEATURE_HW_INTRINSICS
{
if (dst->OperIsBlk() && (dst->AsIndir()->Addr()->OperGet() == GT_ADDR))
{
Expand Down Expand Up @@ -17850,10 +17854,7 @@ GenTreeSIMD* Compiler::gtNewSIMDNode(
{
assert(op1 != nullptr);
SetOpLclRelatedToSIMDIntrinsic(op1);
if (op2 != nullptr)
{
SetOpLclRelatedToSIMDIntrinsic(op2);
}
SetOpLclRelatedToSIMDIntrinsic(op2);

return new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size);
}
Expand All @@ -17867,14 +17868,17 @@ GenTreeSIMD* Compiler::gtNewSIMDNode(
//
void Compiler::SetOpLclRelatedToSIMDIntrinsic(GenTreePtr op)
{
if (op->OperIsLocal())
{
setLclRelatedToSIMDIntrinsic(op);
}
else if ((op->OperGet() == GT_OBJ) && (op->gtOp.gtOp1->OperGet() == GT_ADDR) &&
op->gtOp.gtOp1->gtOp.gtOp1->OperIsLocal())
if (op != nullptr)
{
setLclRelatedToSIMDIntrinsic(op->gtOp.gtOp1->gtOp.gtOp1);
if (op->OperIsLocal())
{
setLclRelatedToSIMDIntrinsic(op);
}
else if ((op->OperGet() == GT_OBJ) && (op->gtOp.gtOp1->OperGet() == GT_ADDR) &&
op->gtOp.gtOp1->gtOp.gtOp1->OperIsLocal())
{
setLclRelatedToSIMDIntrinsic(op->gtOp.gtOp1->gtOp.gtOp1);
}
}
}

Expand Down Expand Up @@ -17912,12 +17916,17 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type,
GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(
var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size)
{
SetOpLclRelatedToSIMDIntrinsic(op1);

return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, op1, hwIntrinsicID, baseType, size);
}

GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(
var_types type, GenTree* op1, GenTree* op2, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size)
{
SetOpLclRelatedToSIMDIntrinsic(op1);
SetOpLclRelatedToSIMDIntrinsic(op2);

return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, op1, op2, hwIntrinsicID, baseType, size);
}

Expand All @@ -17929,6 +17938,10 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type,
var_types baseType,
unsigned size)
{
SetOpLclRelatedToSIMDIntrinsic(op1);
SetOpLclRelatedToSIMDIntrinsic(op2);
SetOpLclRelatedToSIMDIntrinsic(op3);

return new (this, GT_HWIntrinsic)
GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3), hwIntrinsicID, baseType, size);
}
Expand All @@ -17942,12 +17955,19 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type,
var_types baseType,
unsigned size)
{
SetOpLclRelatedToSIMDIntrinsic(op1);
SetOpLclRelatedToSIMDIntrinsic(op2);
SetOpLclRelatedToSIMDIntrinsic(op3);
SetOpLclRelatedToSIMDIntrinsic(op4);

return new (this, GT_HWIntrinsic)
GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3, op4), hwIntrinsicID, baseType, size);
}

GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID)
{
SetOpLclRelatedToSIMDIntrinsic(op1);

return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, op1, hwIntrinsicID, TYP_UNKNOWN, 0);
}

Expand All @@ -17956,6 +17976,9 @@ GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type,
GenTree* op2,
NamedIntrinsic hwIntrinsicID)
{
SetOpLclRelatedToSIMDIntrinsic(op1);
SetOpLclRelatedToSIMDIntrinsic(op2);

return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, op1, op2, hwIntrinsicID, TYP_UNKNOWN, 0);
}

Expand Down
3 changes: 2 additions & 1 deletion src/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19127,7 +19127,8 @@ bool Compiler::IsTargetIntrinsic(CorInfoIntrinsics intrinsicId)
case CORINFO_INTRINSIC_Round:
case CORINFO_INTRINSIC_Ceiling:
case CORINFO_INTRINSIC_Floor:
return compSupports(InstructionSet_SSE41);
// TODO-XArch-CQ: Update to work on non-AVX machines: https://github.com/dotnet/coreclr/issues/15908
return compSupports(InstructionSet_SSE41) && canUseVexEncoding();

default:
return false;
Expand Down
13 changes: 12 additions & 1 deletion tests/src/JIT/HardwareIntrinsics/X86/Sse/LoadAlignedVector128.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ static unsafe int Main(string[] args)

if (Sse.IsSupported)
{
float* inArray = stackalloc float[4];
byte* inBuffer = stackalloc byte[32];
float* inArray = Align(inBuffer, 16);
float* outArray = stackalloc float[4];

var vf = Sse.LoadAlignedVector128(inArray);
Expand All @@ -47,5 +48,15 @@ static unsafe int Main(string[] args)

return testResult;
}

static unsafe float* Align(byte* buffer, byte expectedAlignment)
{
// Compute how bad the misalignment is, which is at most (expectedAlignment - 1).
// Then subtract that from the expectedAlignment and add it to the original address
// to compute the aligned address.

var misalignment = expectedAlignment - ((ulong)(buffer) % expectedAlignment);
return (float*)(buffer + misalignment);
}
}
}