Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ namespace System.Runtime.Intrinsics
/// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
public static int Count
{
[Intrinsic]
get
{
ThrowHelper.ThrowForUnsupportedVectorBaseType<T>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ namespace System.Runtime.Intrinsics
/// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
public static int Count
{
[Intrinsic]
get
{
ThrowHelper.ThrowForUnsupportedVectorBaseType<T>();
Expand Down
4 changes: 3 additions & 1 deletion src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3527,6 +3527,7 @@ class Compiler

#ifdef FEATURE_HW_INTRINSICS
GenTree* impHWIntrinsic(NamedIntrinsic intrinsic,
CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
bool mustExpand);
Expand All @@ -3540,6 +3541,7 @@ class Compiler

#ifdef _TARGET_XARCH_
GenTree* impBaseIntrinsic(NamedIntrinsic intrinsic,
CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
bool mustExpand);
Expand Down Expand Up @@ -5770,7 +5772,7 @@ class Compiler
#define LPFLG_VAR_LIMIT 0x0100 // iterator is compared with a local var (var # found in lpVarLimit)
#define LPFLG_CONST_LIMIT 0x0200 // iterator is compared with a constant (found in lpConstLimit)
#define LPFLG_ARRLEN_LIMIT 0x0400 // iterator is compared with a.len or a[i].len (found in lpArrLenLimit)
#define LPFLG_SIMD_LIMIT 0x0080 // iterator is compared with Vector<T>.Count (found in lpConstLimit)
#define LPFLG_SIMD_LIMIT 0x0080 // iterator is compared with vector element count (found in lpConstLimit)

#define LPFLG_HAS_PREHEAD 0x0800 // lpHead is known to be a preHead for this loop
#define LPFLG_REMOVED 0x1000 // has been removed from the loop table (unrolled or optimized away)
Expand Down
2 changes: 1 addition & 1 deletion src/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10196,7 +10196,7 @@ void Compiler::gtDispConst(GenTree* tree)
#ifdef FEATURE_SIMD
if ((tree->gtFlags & GTF_ICON_SIMD_COUNT) != 0)
{
printf(" Vector<T>.Count");
printf(" vector element count");
}
#endif

Expand Down
1 change: 1 addition & 0 deletions src/jit/hwintrinsicArm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ int HWIntrinsicInfo::lookupNumArgs(const GenTreeHWIntrinsic* node)
// the expanded intrinsic.
//
GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
bool mustExpand)
Expand Down
2 changes: 2 additions & 0 deletions src/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ HARDWARE_INTRINSIC(Vector128_AsSingle, "AsSingle",
HARDWARE_INTRINSIC(Vector128_AsUInt16, "AsUInt16", Vector128, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128_AsUInt32, "AsUInt32", Vector128, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128_AsUInt64, "AsUInt64", Vector128, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128_Count, "get_Count", Vector128, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128_CreateScalarUnsafe, "CreateScalarUnsafe", Vector128, -1, 16, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128_GetElement, "GetElement", Vector128, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128_WithElement, "WithElement", Vector128, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg)
Expand All @@ -62,6 +63,7 @@ HARDWARE_INTRINSIC(Vector256_AsSingle, "AsSingle",
HARDWARE_INTRINSIC(Vector256_AsUInt16, "AsUInt16", Vector256, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256_AsUInt32, "AsUInt32", Vector256, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256_AsUInt64, "AsUInt64", Vector256, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256_Count, "get_Count", Vector256, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256_CreateScalarUnsafe, "CreateScalarUnsafe", Vector256, -1, 32, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256_GetElement, "GetElement", Vector256, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector256_WithElement, "WithElement", Vector256, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg)
Expand Down
22 changes: 19 additions & 3 deletions src/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,7 @@ static bool impIsTableDrivenHWIntrinsic(NamedIntrinsic intrinsicId, HWIntrinsicC
// the expanded intrinsic.
//
GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
bool mustExpand)
Expand Down Expand Up @@ -954,7 +955,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
{
case InstructionSet_Vector128:
case InstructionSet_Vector256:
return impBaseIntrinsic(intrinsic, method, sig, mustExpand);
return impBaseIntrinsic(intrinsic, clsHnd, method, sig, mustExpand);
case InstructionSet_SSE:
return impSSEIntrinsic(intrinsic, method, sig, mustExpand);
case InstructionSet_SSE2:
Expand Down Expand Up @@ -1002,6 +1003,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
// the expanded intrinsic.
//
GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
bool mustExpand)
Expand Down Expand Up @@ -1035,12 +1037,15 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
retType = getSIMDTypeForSize(retSimdSize);
}
}
else
else if (retType == TYP_STRUCT)
{
assert(retType == TYP_STRUCT);
baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSize);
retType = getSIMDTypeForSize(simdSize);
}
else
{
baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize);
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Had to change this to be here as the check right below (if (!varTypeIsArithmetic(baseType)) is what handles unsupported T and we were returning nullptr.

Validated that we now return the integer constant node, that the loop unrolling functionality works (cc. @gfoidl), and the codegen for the known cases is now "efficient".

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe for 5.0, it would be nice to make the loop unrolling work for "real world" scenarios, such as for (int i = 0; i < data.Length; i += Vector128<T>.Count)...

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps you could file an issue?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will file one before merging.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like this is largely covered by both https://github.com/dotnet/coreclr/issues/11606 and https://github.com/dotnet/coreclr/issues/20486.

I've added comments to both of these instead.

}

if (!varTypeIsArithmetic(baseType))
{
Expand Down Expand Up @@ -1094,6 +1099,17 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_Vector128_Count:
case NI_Vector256_Count:
{
assert(sig->numArgs == 0);

GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, baseType), TYP_INT);
countNode->gtFlags |= GTF_ICON_SIMD_COUNT;
retNode = countNode;
break;
}

case NI_Vector128_CreateScalarUnsafe:
{
assert(sig->numArgs == 1);
Expand Down
2 changes: 1 addition & 1 deletion src/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3500,7 +3500,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,

if ((ni > NI_HW_INTRINSIC_START) && (ni < NI_HW_INTRINSIC_END))
{
GenTree* hwintrinsic = impHWIntrinsic(ni, method, sig, mustExpand);
GenTree* hwintrinsic = impHWIntrinsic(ni, clsHnd, method, sig, mustExpand);

if (mustExpand && (hwintrinsic == nullptr))
{
Expand Down
2 changes: 1 addition & 1 deletion src/jit/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3553,7 +3553,7 @@ void Compiler::optUnrollLoops()
if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
{
// In stress mode, quadruple the size limit, and drop
// the restriction that loop limit must be Vector<T>.Count.
// the restriction that loop limit must be vector element count.

unrollLimitSz *= 4;
requiredFlags &= ~LPFLG_SIMD_LIMIT;
Expand Down