Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7451,6 +7451,18 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
noway_assert(srcType != TYP_UINT);
noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT));

if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
{
if (srcType == TYP_ULONG && (dstType == TYP_DOUBLE || dstType == TYP_FLOAT))
{
genConsumeOperands(treeNode->AsOp());
instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType));
GetEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
genProduceReg(treeNode);
return;
}
}

// To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
// which does a partial write to lower 4/8 bytes of xmm register keeping the other
// upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop,
Expand Down Expand Up @@ -7562,8 +7574,10 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));

// We shouldn't be seeing uint64 here as it should have been converted
// into a helper call by either front-end or lowering phase.
noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
// into a helper call by either front-end or lowering phase, unless we have AVX512F
// accelerated conversions.
noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));

// If the dstType is TYP_UINT, we have 32-bits to encode the
// float number. Any of 33rd or above bits can be the sign bit.
Expand All @@ -7576,7 +7590,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
// Note that we need to specify dstType here so that it will determine
// the size of destination integer register and also the rex.w prefix.
genConsumeOperands(treeNode->AsOp());
instruction ins = ins_FloatConv(TYP_INT, srcType, emitTypeSize(srcType));
instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType));
GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
genProduceReg(treeNode);
}
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -3857,6 +3857,15 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id) const
return EA_32BYTE;
}

case INS_vcvttss2usi64:
{
if (defaultSize == 8)
{
return EA_4BYTE;
}
return defaultSize;
}

case INS_movddup:
{
if (defaultSize == 64)
Expand Down
35 changes: 29 additions & 6 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1294,7 +1294,6 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
case INS_vcvtsd2usi:
case INS_vcvtss2usi:
case INS_vcvttsd2usi:
case INS_vcvttss2usi:
{
if (attr == EA_8BYTE)
{
Expand Down Expand Up @@ -2518,7 +2517,8 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
case INS_vcvtsd2usi:
case INS_vcvtss2usi:
case INS_vcvttsd2usi:
case INS_vcvttss2usi:
case INS_vcvttss2usi32:
case INS_vcvttss2usi64:
{
// These SSE instructions write to a general purpose integer register.
return false;
Expand Down Expand Up @@ -11234,12 +11234,18 @@ void emitter::emitDispIns(
case INS_vcvtsd2usi:
case INS_vcvtss2usi:
case INS_vcvttsd2usi:
case INS_vcvttss2usi:
{
printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
break;
}

case INS_vcvttss2usi32:
case INS_vcvttss2usi64:
{
printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_4BYTE));
break;
}

#ifdef TARGET_AMD64
case INS_movsxd:
{
Expand Down Expand Up @@ -18157,23 +18163,40 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_cvtsi2sd64:
case INS_cvtsi2ss64:
case INS_vcvtsd2usi:
case INS_vcvttsd2usi:
case INS_vcvtusi2sd32:
case INS_vcvtusi2sd64:
case INS_vcvtusi2ss32:
case INS_vcvtusi2ss64:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_7C;
break;

case INS_vcvttsd2usi:
result.insLatency += PERFSCORE_LATENCY_6C;
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
break;

case INS_vcvtusi2sd64:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_5C;
break;

case INS_cvttss2si:
case INS_cvtss2si:
case INS_vcvtss2usi:
case INS_vcvttss2usi:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += opSize == EA_8BYTE ? PERFSCORE_LATENCY_8C : PERFSCORE_LATENCY_7C;
break;

case INS_vcvttss2usi32:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_7C;
break;

case INS_vcvttss2usi64:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_8C;
break;

case INS_cvtss2sd:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_5C;
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -836,7 +836,7 @@ HARDWARE_INTRINSIC(AVX512F, Ceiling,
HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi, INS_vcvtsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Byte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector128ByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
Expand Down Expand Up @@ -948,7 +948,7 @@ HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmeticVariable,
HARDWARE_INTRINSIC(AVX512F_X64, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2sd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F_X64, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2ss64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi, INS_vcvtsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
Expand Down
16 changes: 16 additions & 0 deletions src/coreclr/jit/instr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2165,6 +2165,9 @@ instruction CodeGen::ins_MathOp(genTreeOps oper, var_types type)
instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
{
// AVX: For now we support only conversion from Int/Long -> float
// AVX512: Supports following conversions
// srcType = float/double castToType = ulong
// srcType = ulong castToType = double

switch (from)
{
Expand Down Expand Up @@ -2213,6 +2216,8 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
return ins_Move_Extend(TYP_FLOAT, false);
case TYP_DOUBLE:
return INS_cvtss2sd;
case TYP_ULONG:
return INS_vcvttss2usi64;
default:
unreached();
}
Expand All @@ -2225,6 +2230,8 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
return INS_cvttsd2si;
case TYP_LONG:
return INS_cvttsd2si;
case TYP_ULONG:
return INS_vcvttsd2usi;
case TYP_FLOAT:
return INS_cvtsd2ss;
case TYP_DOUBLE:
Expand All @@ -2234,6 +2241,15 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr)
}
break;

case TYP_ULONG:
switch (to)
{
case TYP_DOUBLE:
return INS_vcvtusi2sd64;
default:
unreached();
}

default:
unreached();
}
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,8 @@ INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_
INST3(vcvttpd2udq, "cvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned DWORDs
INST3(vcvttps2udq, "cvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned DWORDs
INST3(vcvttsd2usi, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD/QWORD
INST3(vcvttss2usi, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD
INST3(vcvttss2usi32, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD
INST3(vcvttss2usi64, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD
INST3(vcvtudq2pd, "cvtudq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to doubles
INST3(vcvtudq2ps, "cvtudq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to singles
INST3(vcvtusi2sd32, "cvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to double
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -795,15 +795,15 @@ void Lowering::LowerCast(GenTree* tree)
// srcType = float/double castToType = * and overflow detecting cast
// Reason: must be converted to a helper call
// srcType = float/double, castToType = ulong
// Reason: must be converted to a helper call
// Reason: must be converted to a helper call unless we have AVX512F
// srcType = uint castToType = float/double
// Reason: uint -> float/double = uint -> long -> float/double
// srcType = ulong castToType = float
// Reason: ulong -> float = ulong -> double -> float
if (varTypeIsFloating(srcType))
if (srcType == TYP_FLOAT)
{
noway_assert(!tree->gtOverflow());
noway_assert(castToType != TYP_ULONG);
noway_assert(!tree->gtOverflow() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
noway_assert(castToType != TYP_ULONG || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
}
else if (srcType == TYP_UINT)
{
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,10 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
#endif // !TARGET_AMD64

case TYP_ULONG:
#ifdef TARGET_AMD64
if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
return nullptr;
#endif
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
default:
unreached();
Expand Down
Loading