Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions src/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
case INS_andnps:
case INS_andpd:
case INS_andps:
case INS_blendpd:
case INS_blendps:
case INS_cmppd:
case INS_cmpps:
case INS_cmpsd:
Expand Down Expand Up @@ -114,6 +116,7 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
case INS_minss:
case INS_movhlps:
case INS_movlhps:
case INS_mpsadbw:
case INS_mulpd:
case INS_mulps:
case INS_mulsd:
Expand All @@ -137,6 +140,7 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
case INS_pandn:
case INS_pavgb:
case INS_pavgw:
case INS_pblendw:
case INS_pcmpeqb:
case INS_pcmpeqd:
case INS_pcmpeqq:
Expand Down Expand Up @@ -5486,6 +5490,9 @@ void emitter::emitIns_SIMD_R_R_R(instruction ins, emitAttr attr, regNumber reg,
{
if (reg1 != reg)
{
// Ensure we aren't overwriting op2
assert(reg2 != reg);

emitIns_R_R(INS_movaps, attr, reg, reg1);
}
emitIns_R_R(ins, attr, reg, reg2);
Expand Down Expand Up @@ -5565,10 +5572,18 @@ void emitter::emitIns_SIMD_R_R_R_R(
// SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
if (reg3 != REG_XMM0)
{
// Ensure we aren't overwriting op1 or op2
assert(reg1 != REG_XMM0);
assert(reg2 != REG_XMM0);

emitIns_R_R(INS_movaps, attr, REG_XMM0, reg3);
}
if (reg1 != reg)
{
// Ensure we aren't overwriting op2 or op3
assert(reg2 != reg);
assert((reg3 == REG_XMM0) || (reg != REG_XMM0));

emitIns_R_R(INS_movaps, attr, reg, reg1);
}
emitIns_R_R(ins, attr, reg, reg2);
Expand Down Expand Up @@ -5653,6 +5668,9 @@ void emitter::emitIns_SIMD_R_R_R_I(
{
if (reg1 != reg)
{
// Ensure we aren't overwriting op2
assert(reg2 != reg);

emitIns_R_R(INS_movaps, attr, reg, reg1);
}
emitIns_R_R_I(ins, attr, reg, reg2, ival);
Expand Down
97 changes: 96 additions & 1 deletion src/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
{
emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op1Reg);
}
else if ((ival != -1) && varTypeIsFloating(baseType))
{
emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, ival);
}
else
{
emit->emitIns_R_R(ins, simdSize, targetReg, op1Reg);
Expand Down Expand Up @@ -1027,7 +1031,98 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
//
void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node)
{
NYI("Implement SSE41 intrinsic code generation");
NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2();
GenTree* op3 = nullptr;
GenTree* op4 = nullptr;
regNumber targetReg = node->gtRegNum;
var_types targetType = node->TypeGet();
var_types baseType = node->gtSIMDBaseType;

regNumber op1Reg = REG_NA;
regNumber op2Reg = REG_NA;
regNumber op3Reg = REG_NA;
regNumber op4Reg = REG_NA;
emitter* emit = getEmitter();

if ((op1 != nullptr) && !op1->OperIsList())
{
op1Reg = op1->gtRegNum;
genConsumeOperands(node);
}

switch (intrinsicID)
{
case NI_SSE41_CeilingScalar:
case NI_SSE41_FloorScalar:
case NI_SSE41_RoundCurrentDirectionScalar:
case NI_SSE41_RoundToNearestIntegerScalar:
case NI_SSE41_RoundToNegativeInfinityScalar:
case NI_SSE41_RoundToPositiveInfinityScalar:
case NI_SSE41_RoundToZeroScalar:
{
assert((baseType == TYP_FLOAT) || (baseType == TYP_DOUBLE));
instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);

if (op2 == nullptr)
{
int ival = Compiler::ivalOfHWIntrinsic(intrinsicID);
emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op1Reg, ival);
}
else
{
genHWIntrinsic_R_R_RM_I(node, ins);
}
break;
}

case NI_SSE41_TestAllOnes:
{
regNumber tmpReg = node->GetSingleTempReg();
assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, emitTypeSize(TYP_SIMD16), tmpReg, tmpReg, tmpReg);
emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, tmpReg);
emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
break;
}

case NI_SSE41_TestAllZeros:
case NI_SSE41_TestZ:
{
assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
emit->emitIns_R(INS_sete, EA_1BYTE, targetReg);
break;
}

case NI_SSE41_TestC:
{
assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
emit->emitIns_R(INS_setb, EA_1BYTE, targetReg);
break;
}

case NI_SSE41_TestMixOnesZeros:
case NI_SSE41_TestNotZAndNotC:
{
assert(Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType) == INS_ptest);
emit->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
emit->emitIns_R_R(INS_ptest, emitTypeSize(TYP_SIMD16), op1Reg, op2->gtRegNum);
emit->emitIns_R(INS_seta, EA_1BYTE, targetReg);
break;
}

default:
unreached();
break;
}

genProduceReg(node);
}

//------------------------------------------------------------------------
Expand Down
Loading