Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 78 additions & 20 deletions src/coreclr/jit/decomposelongs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,12 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
}
}

#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_X86)
if (!tree->TypeIs(TYP_LONG) &&
!(tree->OperIs(GT_CAST) && varTypeIsLong(tree->AsCast()->CastOp()) && varTypeIsFloating(tree)))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: negated conditions like this can be hard to read. A small comment covering that we want to handle nodes that produce long or GT_CAST float->long would be beneficial IMO.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. I actually plan on extending this to handle casts in the opposite direction as well, and that will make this check even more hairy. I'll do something to simplify it then.

#else
if (!tree->TypeIs(TYP_LONG))
#endif // FEATURE_HW_INTRINSICS && TARGET_X86
{
return tree->gtNext;
}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fact that from this point onwards it can now also be GT_CAST float rather than only some NODE long seems like a tricky thing that might trip people up in the future.

Expand All @@ -157,15 +162,18 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)

GenTree* user = use.User();

if (user->OperIsHWIntrinsic())
if (tree->TypeIs(TYP_LONG) && (user->OperIsHWIntrinsic() || (user->OperIs(GT_CAST) && varTypeIsFloating(user))))
{
if (tree->OperIs(GT_CNS_LNG) ||
(tree->OperIs(GT_IND, GT_LCL_FLD) && m_lowering->IsSafeToContainMem(user, tree)))
{
NamedIntrinsic intrinsicId = user->AsHWIntrinsic()->GetHWIntrinsicId();
assert(HWIntrinsicInfo::IsVectorCreate(intrinsicId) ||
HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId) ||
HWIntrinsicInfo::IsVectorCreateScalarUnsafe(intrinsicId));
if (user->OperIsHWIntrinsic())
{
NamedIntrinsic intrinsicId = user->AsHWIntrinsic()->GetHWIntrinsicId();
assert(HWIntrinsicInfo::IsVectorCreate(intrinsicId) ||
HWIntrinsicInfo::IsVectorCreateScalar(intrinsicId) ||
HWIntrinsicInfo::IsVectorCreateScalarUnsafe(intrinsicId));
}

return tree->gtNext;
}
Expand Down Expand Up @@ -562,28 +570,78 @@ GenTree* DecomposeLongs::DecomposeStoreLclFld(LIR::Use& use)
GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
{
assert(use.IsInitialized());
assert(use.Def()->OperGet() == GT_CAST);
assert(use.Def()->OperIs(GT_CAST));

GenTree* cast = use.Def()->AsCast();
GenTree* loResult = nullptr;
GenTree* hiResult = nullptr;
GenTreeCast* cast = use.Def()->AsCast();
var_types srcType = cast->CastFromType();
var_types dstType = cast->CastToType();

var_types srcType = cast->CastFromType();
var_types dstType = cast->CastToType();

if ((cast->gtFlags & GTF_UNSIGNED) != 0)
if (cast->IsUnsigned())
{
srcType = varTypeToUnsigned(srcType);
}

bool skipDecomposition = false;
#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_X86)
if (varTypeIsFloating(dstType))
{
// We will reach this path only if morph did not convert the cast to a helper call,
// meaning we can perform the cast using SIMD instructions.
// The sequence this creates is simply:
// AVX512DQ.VL.ConvertToVector128Single(Vector128.CreateScalarUnsafe(LONG)).ToScalar()

NamedIntrinsic intrinsicId = NI_Illegal;
GenTree* srcOp = cast->CastOp();
var_types dstType = cast->CastToType();
CorInfoType baseFloatingType = (dstType == TYP_FLOAT) ? CORINFO_TYPE_FLOAT : CORINFO_TYPE_DOUBLE;
CorInfoType baseIntegralType = cast->IsUnsigned() ? CORINFO_TYPE_ULONG : CORINFO_TYPE_LONG;

assert(!cast->gtOverflow());

if (m_compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ_VL))
{
intrinsicId = (dstType == TYP_FLOAT) ? NI_AVX512DQ_VL_ConvertToVector128Single
: NI_AVX512DQ_VL_ConvertToVector128Double;
}
else
{
assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v1));
intrinsicId =
(dstType == TYP_FLOAT) ? NI_AVX10v1_ConvertToVector128Single : NI_AVX10v1_ConvertToVector128Double;
}
Comment thread
saucecontrol marked this conversation as resolved.

GenTree* createScalar = m_compiler->gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, srcOp, baseIntegralType, 16);
GenTree* convert =
m_compiler->gtNewSimdHWIntrinsicNode(TYP_SIMD16, createScalar, intrinsicId, baseIntegralType, 16);
GenTree* toScalar = m_compiler->gtNewSimdToScalarNode(dstType, convert, baseFloatingType, 16);

Range().InsertAfter(cast, createScalar, convert, toScalar);
Range().Remove(cast);

if (createScalar->IsCnsVec())
{
Range().Remove(srcOp);
}

if (use.IsDummyUse())
{
toScalar->SetUnusedValue();
}
use.ReplaceWith(toScalar);

return toScalar->gtNext;
}
#endif // FEATURE_HW_INTRINSICS && TARGET_X86

bool skipDecomposition = false;
GenTree* loResult = nullptr;
GenTree* hiResult = nullptr;

if (varTypeIsLong(srcType))
{
if (cast->gtOverflow() && (varTypeIsUnsigned(srcType) != varTypeIsUnsigned(dstType)))
{
GenTree* srcOp = cast->gtGetOp1();
noway_assert(srcOp->OperGet() == GT_LONG);
GenTree* srcOp = cast->CastOp();
noway_assert(srcOp->OperIs(GT_LONG));
GenTree* loSrcOp = srcOp->gtGetOp1();
GenTree* hiSrcOp = srcOp->gtGetOp2();

Expand All @@ -595,13 +653,13 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
// check provided by codegen.
//

const bool signExtend = (cast->gtFlags & GTF_UNSIGNED) == 0;
const bool signExtend = !cast->IsUnsigned();
loResult = EnsureIntSized(loSrcOp, signExtend);

hiResult = cast;
hiResult->gtType = TYP_INT;
hiResult->AsCast()->gtCastType = TYP_UINT;
hiResult->gtFlags &= ~GTF_UNSIGNED;
hiResult->ClearUnsigned();
hiResult->AsOp()->gtOp1 = hiSrcOp;

Range().Remove(srcOp);
Expand Down Expand Up @@ -631,7 +689,7 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
}
else
{
if (!use.IsDummyUse() && (use.User()->OperGet() == GT_MUL))
if (!use.IsDummyUse() && use.User()->OperIs(GT_MUL))
{
//
// This int->long cast is used by a GT_MUL that will be transformed by DecomposeMul into a
Expand All @@ -646,7 +704,7 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
}
else if (varTypeIsUnsigned(srcType))
{
const bool signExtend = (cast->gtFlags & GTF_UNSIGNED) == 0;
const bool signExtend = !cast->IsUnsigned();
loResult = EnsureIntSized(cast->gtGetOp1(), signExtend);

hiResult = m_compiler->gtNewZeroConNode(TYP_INT);
Expand Down
18 changes: 16 additions & 2 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,11 +417,16 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
// Because there is no IL instruction conv.r4.un, uint/ulong -> float
// casts are always imported as CAST(float <- CAST(double <- uint/ulong)).
// We can usually eliminate the redundant intermediate cast as an optimization.
//
// AArch and xarch+EVEX have instructions that can cast directly from
// all integers (except for longs on 32-bit of course) to floats.
// all integers (except for longs on ARM32) to floats.
// On x64, we also have the option of widening uint -> long and
// using the signed conversion instructions, and ulong -> float/double
// is handled directly in codegen, so we can allow all casts.
//
// This logic will also catch CAST(float <- CAST(double <- float))
// and reduce it to CAST(float <- float), which is handled in codegen as
// an optional mov.
else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && oper->OperIs(GT_CAST)
#ifndef TARGET_64BIT
&& !varTypeIsLong(oper->AsCast()->CastOp())
Expand Down Expand Up @@ -481,6 +486,15 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
#endif // TARGET_AMD64

#ifdef TARGET_X86
#ifdef FEATURE_HW_INTRINSICS
else if (varTypeIsLong(srcType) && varTypeIsFloating(dstType) && canUseEvexEncoding())
{
// We can handle these casts directly using SIMD instructions.
// The transform to SIMD is done in DecomposeLongs.
return nullptr;
}
#endif // FEATURE_HW_INTRINSICS

// Do we have to do two step U4/8 -> R4/8 ?
else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
{
Expand All @@ -494,7 +508,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
{
oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
tree->gtFlags &= ~GTF_UNSIGNED;
tree->ClearUnsigned();
return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
}
}
Expand Down
Loading