From c667f2ed71196b9a65f0686585d76a708ca94ff1 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 5 Jun 2019 18:21:07 -0700 Subject: [PATCH 1/5] Marking Vector128.Count and Vector256.Count as [Intrinsic] --- .../shared/System/Runtime/Intrinsics/Vector128_1.cs | 1 + .../shared/System/Runtime/Intrinsics/Vector256_1.cs | 1 + src/jit/compiler.h | 2 +- src/jit/gentree.cpp | 2 +- src/jit/hwintrinsiclistxarch.h | 2 ++ src/jit/hwintrinsicxarch.cpp | 11 +++++++++++ src/jit/optimizer.cpp | 2 +- 7 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs index cdcddcb49e26..9f609eabe490 100644 --- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs +++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs @@ -39,6 +39,7 @@ namespace System.Runtime.Intrinsics /// The type of the current instance () is not supported. public static int Count { + [Intrinsic] get { ThrowHelper.ThrowForUnsupportedVectorBaseType(); diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs index 2ed5516455f1..0a9b99347bd7 100644 --- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs +++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs @@ -41,6 +41,7 @@ namespace System.Runtime.Intrinsics /// The type of the current instance () is not supported. public static int Count { + [Intrinsic] get { ThrowHelper.ThrowForUnsupportedVectorBaseType(); diff --git a/src/jit/compiler.h b/src/jit/compiler.h index c73ca30c16c1..a07b5c1f46b4 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -5770,7 +5770,7 @@ class Compiler #define LPFLG_VAR_LIMIT 0x0100 // iterator is compared with a local var (var # found in lpVarLimit) #define LPFLG_CONST_LIMIT 0x0200 // iterator is compared with a constant (found in lpConstLimit) #define LPFLG_ARRLEN_LIMIT 0x0400 // iterator is compared with a.len or a[i].len (found in lpArrLenLimit) -#define LPFLG_SIMD_LIMIT 0x0080 // iterator is compared with Vector.Count (found in lpConstLimit) +#define LPFLG_SIMD_LIMIT 0x0080 // iterator is compared with Vector, Vector64, Vector128, or Vector256.Count (found in lpConstLimit) #define LPFLG_HAS_PREHEAD 0x0800 // lpHead is known to be a preHead for this loop #define LPFLG_REMOVED 0x1000 // has been removed from the loop table (unrolled or optimized away) diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index a6bbf8152b99..9f671503b368 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -10196,7 +10196,7 @@ void Compiler::gtDispConst(GenTree* tree) #ifdef FEATURE_SIMD if ((tree->gtFlags & GTF_ICON_SIMD_COUNT) != 0) { - printf(" Vector.Count"); + printf(" Vector, Vector64, Vector128, or Vector256.Count"); } #endif diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index 447d401f253e..c00a92e21cda 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -38,6 +38,7 @@ HARDWARE_INTRINSIC(Vector128_AsSingle, "AsSingle", HARDWARE_INTRINSIC(Vector128_AsUInt16, "AsUInt16", Vector128, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128_AsUInt32, "AsUInt32", Vector128, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128_AsUInt64, "AsUInt64", Vector128, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector128_Count, "get_Count", Vector128, -1, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128_CreateScalarUnsafe, "CreateScalarUnsafe", Vector128, -1, 16, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128_GetElement, "GetElement", Vector128, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128_WithElement, "WithElement", Vector128, -1, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) @@ -62,6 +63,7 @@ HARDWARE_INTRINSIC(Vector256_AsSingle, "AsSingle", HARDWARE_INTRINSIC(Vector256_AsUInt16, "AsUInt16", Vector256, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256_AsUInt32, "AsUInt32", Vector256, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256_AsUInt64, "AsUInt64", Vector256, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector256_Count, "get_Count", Vector256, -1, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256_CreateScalarUnsafe, "CreateScalarUnsafe", Vector256, -1, 32, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector256_GetElement, "GetElement", Vector256, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256_WithElement, "WithElement", Vector256, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index 7835454e70ad..114de7d0d358 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -1094,6 +1094,17 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Vector128_Count: + case NI_Vector256_Count: + { + assert(sig->numArgs == 0); + + GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, baseType), TYP_INT); + countNode->gtFlags |= GTF_ICON_SIMD_COUNT; + retNode = countNode; + break; + } + case NI_Vector128_CreateScalarUnsafe: { assert(sig->numArgs == 1); diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp index 7749e928ba5f..665d2561f5cf 100644 --- a/src/jit/optimizer.cpp +++ b/src/jit/optimizer.cpp @@ -3553,7 +3553,7 @@ void Compiler::optUnrollLoops() if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) { // In stress mode, quadruple the size limit, and drop - // the restriction that loop limit must be Vector.Count. + // the restriction that loop limit must be Vector, Vector64, Vector128, or Vector256.Count. unrollLimitSz *= 4; requiredFlags &= ~LPFLG_SIMD_LIMIT; From 991ca78a1eb5d2c2571ebf3cba40101c2fc7a848 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 5 Jun 2019 18:42:42 -0700 Subject: [PATCH 2/5] Fixing NI_Vector128_Count and NI_Vector256_Count to use clsHnd when getting the simdSize and baseType --- src/jit/compiler.h | 2 ++ src/jit/hwintrinsicArm64.cpp | 1 + src/jit/hwintrinsicxarch.cpp | 9 +++++---- src/jit/importer.cpp | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index a07b5c1f46b4..27640e02e0f3 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -3527,6 +3527,7 @@ class Compiler #ifdef FEATURE_HW_INTRINSICS GenTree* impHWIntrinsic(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, bool mustExpand); @@ -3540,6 +3541,7 @@ class Compiler #ifdef _TARGET_XARCH_ GenTree* impBaseIntrinsic(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, bool mustExpand); diff --git a/src/jit/hwintrinsicArm64.cpp b/src/jit/hwintrinsicArm64.cpp index dec60383bd2e..1cd9024e8822 100644 --- a/src/jit/hwintrinsicArm64.cpp +++ b/src/jit/hwintrinsicArm64.cpp @@ -321,6 +321,7 @@ int HWIntrinsicInfo::lookupNumArgs(const GenTreeHWIntrinsic* node) // the expanded intrinsic. // GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, bool mustExpand) diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index 114de7d0d358..70bb643a25c1 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -757,6 +757,7 @@ static bool impIsTableDrivenHWIntrinsic(NamedIntrinsic intrinsicId, HWIntrinsicC // the expanded intrinsic. // GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, bool mustExpand) @@ -954,7 +955,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, { case InstructionSet_Vector128: case InstructionSet_Vector256: - return impBaseIntrinsic(intrinsic, method, sig, mustExpand); + return impBaseIntrinsic(intrinsic, clsHnd, method, sig, mustExpand); case InstructionSet_SSE: return impSSEIntrinsic(intrinsic, method, sig, mustExpand); case InstructionSet_SSE2: @@ -1002,6 +1003,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, // the expanded intrinsic. // GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, bool mustExpand) @@ -1035,9 +1037,8 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, retType = getSIMDTypeForSize(retSimdSize); } } - else + else if (retType == TYP_STRUCT) { - assert(retType == TYP_STRUCT); baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSize); retType = getSIMDTypeForSize(simdSize); } @@ -1099,7 +1100,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 0); - GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, baseType), TYP_INT); + GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(clsHnd), TYP_INT); countNode->gtFlags |= GTF_ICON_SIMD_COUNT; retNode = countNode; break; diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index 993889d0db68..83942d6005d7 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -3500,7 +3500,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, if ((ni > NI_HW_INTRINSIC_START) && (ni < NI_HW_INTRINSIC_END)) { - GenTree* hwintrinsic = impHWIntrinsic(ni, method, sig, mustExpand); + GenTree* hwintrinsic = impHWIntrinsic(ni, clsHnd, method, sig, mustExpand); if (mustExpand && (hwintrinsic == nullptr)) { From 71d966ce362efa5e7d0c9b5b076db30a29adcd17 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 5 Jun 2019 18:59:57 -0700 Subject: [PATCH 3/5] Applying the formatting patch. --- src/jit/compiler.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 27640e02e0f3..f201629032ea 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -5772,7 +5772,9 @@ class Compiler #define LPFLG_VAR_LIMIT 0x0100 // iterator is compared with a local var (var # found in lpVarLimit) #define LPFLG_CONST_LIMIT 0x0200 // iterator is compared with a constant (found in lpConstLimit) #define LPFLG_ARRLEN_LIMIT 0x0400 // iterator is compared with a.len or a[i].len (found in lpArrLenLimit) -#define LPFLG_SIMD_LIMIT 0x0080 // iterator is compared with Vector, Vector64, Vector128, or Vector256.Count (found in lpConstLimit) +#define LPFLG_SIMD_LIMIT \ + 0x0080 // iterator is compared with Vector, Vector64, Vector128, or Vector256.Count (found in + // lpConstLimit) #define LPFLG_HAS_PREHEAD 0x0800 // lpHead is known to be a preHead for this loop #define LPFLG_REMOVED 0x1000 // has been removed from the loop table (unrolled or optimized away) From bc25fb101ffadb53653bfad094f42a537bfc19eb Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 6 Jun 2019 08:10:39 -0700 Subject: [PATCH 4/5] Changing some comments to just be "vector element count". --- src/jit/compiler.h | 4 +--- src/jit/gentree.cpp | 2 +- src/jit/optimizer.cpp | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index f201629032ea..da42aa14ceec 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -5772,9 +5772,7 @@ class Compiler #define LPFLG_VAR_LIMIT 0x0100 // iterator is compared with a local var (var # found in lpVarLimit) #define LPFLG_CONST_LIMIT 0x0200 // iterator is compared with a constant (found in lpConstLimit) #define LPFLG_ARRLEN_LIMIT 0x0400 // iterator is compared with a.len or a[i].len (found in lpArrLenLimit) -#define LPFLG_SIMD_LIMIT \ - 0x0080 // iterator is compared with Vector, Vector64, Vector128, or Vector256.Count (found in - // lpConstLimit) +#define LPFLG_SIMD_LIMIT 0x0080 // iterator is compared with vector element count (found in lpConstLimit) #define LPFLG_HAS_PREHEAD 0x0800 // lpHead is known to be a preHead for this loop #define LPFLG_REMOVED 0x1000 // has been removed from the loop table (unrolled or optimized away) diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 9f671503b368..df2bdfef06cd 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -10196,7 +10196,7 @@ void Compiler::gtDispConst(GenTree* tree) #ifdef FEATURE_SIMD if ((tree->gtFlags & GTF_ICON_SIMD_COUNT) != 0) { - printf(" Vector, Vector64, Vector128, or Vector256.Count"); + printf(" vector element count"); } #endif diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp index 665d2561f5cf..00e518a890bb 100644 --- a/src/jit/optimizer.cpp +++ b/src/jit/optimizer.cpp @@ -3553,7 +3553,7 @@ void Compiler::optUnrollLoops() if (compStressCompile(STRESS_UNROLL_LOOPS, 50)) { // In stress mode, quadruple the size limit, and drop - // the restriction that loop limit must be Vector, Vector64, Vector128, or Vector256.Count. + // the restriction that loop limit must be vector element count. unrollLimitSz *= 4; requiredFlags &= ~LPFLG_SIMD_LIMIT; From fb32383e8cbd9dfac0d5ceca5cf5aabe4a6fb379 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 6 Jun 2019 08:34:00 -0700 Subject: [PATCH 5/5] Fixing impBaseIntrinsic to set the baseType so Vector128_Count and Vector256_Count don't return nullptr --- src/jit/hwintrinsicxarch.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index 70bb643a25c1..3b0c6f8ef00b 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -1042,6 +1042,10 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeClass, &simdSize); retType = getSIMDTypeForSize(simdSize); } + else + { + baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize); + } if (!varTypeIsArithmetic(baseType)) { @@ -1100,7 +1104,7 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 0); - GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(clsHnd), TYP_INT); + GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, baseType), TYP_INT); countNode->gtFlags |= GTF_ICON_SIMD_COUNT; retNode = countNode; break;