From 703ab93d2679086fe31635e2d2369f78a1236ade Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 23 Apr 2020 10:20:40 -0700 Subject: [PATCH 01/40] Adding basic support for recognizing and handling SIMD intrinsics as HW intrinsics --- src/coreclr/src/jit/CMakeLists.txt | 10 +- src/coreclr/src/jit/compiler.h | 18 +- src/coreclr/src/jit/hwintrinsic.cpp | 15 +- .../src/jit/hwintrinsiccodegenxarch.cpp | 18 +- src/coreclr/src/jit/hwintrinsiclistxarch.h | 2 - src/coreclr/src/jit/importer.cpp | 20 + src/coreclr/src/jit/lower.cpp | 3 +- src/coreclr/src/jit/lowerarmarch.cpp | 9 + src/coreclr/src/jit/lowerxarch.cpp | 10 +- src/coreclr/src/jit/namedintrinsiclist.h | 12 + src/coreclr/src/jit/simdashwintrinsic.cpp | 402 ++++++++++++++++++ src/coreclr/src/jit/simdashwintrinsic.h | 134 ++++++ .../src/jit/simdashwintrinsiclistarm64.h | 92 ++++ .../src/jit/simdashwintrinsiclistxarch.h | 111 +++++ 14 files changed, 832 insertions(+), 24 deletions(-) create mode 100644 src/coreclr/src/jit/simdashwintrinsic.cpp create mode 100644 src/coreclr/src/jit/simdashwintrinsic.h create mode 100644 src/coreclr/src/jit/simdashwintrinsiclistarm64.h create mode 100644 src/coreclr/src/jit/simdashwintrinsiclistxarch.h diff --git a/src/coreclr/src/jit/CMakeLists.txt b/src/coreclr/src/jit/CMakeLists.txt index 5093ce9a0161f8..8ba2c47973fde4 100644 --- a/src/coreclr/src/jit/CMakeLists.txt +++ b/src/coreclr/src/jit/CMakeLists.txt @@ -170,6 +170,7 @@ if (CLR_CMAKE_TARGET_WIN32) regset.h sideeffects.h simd.h + simdashwintrinsic.h simdintrinsiclist.h sm.h smallhash.h @@ -204,14 +205,16 @@ if (CLR_CMAKE_TARGET_WIN32) instrsarm.h instrsarm64.h registerarm.h - registerarm64.h) + registerarm64.h + simdashwintrinsiclistarm64.h) elseif (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386) list (APPEND JIT_HEADERS emitfmtsxarch.h emitxarch.h hwintrinsiclistxarch.h hwintrinsic.h - instrsxarch.h) + instrsxarch.h + simdashwintrinsiclistxarch.h) endif () endif(CLR_CMAKE_TARGET_WIN32) @@ -223,6 +226,7 @@ set( JIT_AMD64_SOURCES lowerxarch.cpp lsraxarch.cpp simd.cpp + simdashwintrinsic.cpp simdcodegenxarch.cpp targetamd64.cpp unwindamd64.cpp @@ -249,6 +253,7 @@ set( JIT_I386_SOURCES lowerxarch.cpp lsraxarch.cpp simd.cpp + simdashwintrinsic.cpp simdcodegenxarch.cpp targetx86.cpp unwindx86.cpp @@ -264,6 +269,7 @@ set( JIT_ARM64_SOURCES lsraarmarch.cpp lsraarm64.cpp simd.cpp + simdashwintrinsic.cpp targetarm64.cpp unwindarm.cpp unwindarm64.cpp diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 4ec9095c25ee41..f9a6651e846923 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -61,6 +61,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "hwintrinsic.h" #include "simd.h" +#include "simdashwintrinsic.h" // This is only used locally in the JIT to indicate that // a verification block should be inserted @@ -3679,16 +3680,27 @@ class Compiler CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, bool mustExpand); + GenTree* impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* sig, + bool mustExpand); protected: bool compSupportsHWIntrinsic(CORINFO_InstructionSet isa); + GenTree* impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* sig, + bool mustExpand); + GenTree* impSpecialIntrinsic(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig); - GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass); + GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, bool expectAddr = false); GenTree* impNonConstFallback(NamedIntrinsic intrinsic, var_types simdType, var_types baseType); GenTree* addRangeCheckIfNeeded(NamedIntrinsic intrinsic, GenTree* lastOp, bool mustExpand, int immUpperBound); @@ -8154,8 +8166,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return emitTypeSize(TYP_SIMD8); } +public: // Returns the codegen type for a given SIMD size. - var_types getSIMDTypeForSize(unsigned size) + static var_types getSIMDTypeForSize(unsigned size) { var_types simdType = TYP_UNDEF; if (size == 8) @@ -8181,6 +8194,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX return simdType; } +private: unsigned getSIMDInitTempVarNum() { if (lvaSIMDInitTempVarNum == BAD_VAR_NUM) diff --git a/src/coreclr/src/jit/hwintrinsic.cpp b/src/coreclr/src/jit/hwintrinsic.cpp index e376d06a9dab51..bef3b265060419 100644 --- a/src/coreclr/src/jit/hwintrinsic.cpp +++ b/src/coreclr/src/jit/hwintrinsic.cpp @@ -494,16 +494,17 @@ bool HWIntrinsicInfo::isImmOp(NamedIntrinsic id, const GenTree* op) } //------------------------------------------------------------------------ -// // getArgForHWIntrinsic: pop an argument from the stack and validate its type +// getArgForHWIntrinsic: pop an argument from the stack and validate its type // // Arguments: -// argType -- the required type of argument -// argClass -- the class handle of argType +// argType -- the required type of argument +// argClass -- the class handle of argType +// expectAddr -- if true indicates we are expecting type stack entry to be a TYP_BYREF. // // Return Value: // the validated argument // -GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass) +GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, bool expectAddr) { GenTree* arg = nullptr; if (argType == TYP_STRUCT) @@ -511,9 +512,9 @@ GenTree* Compiler::getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE unsigned int argSizeBytes; var_types base = getBaseTypeAndSizeOfSIMDType(argClass, &argSizeBytes); argType = getSIMDTypeForSize(argSizeBytes); - assert((argType == TYP_SIMD8) || (argType == TYP_SIMD16) || (argType == TYP_SIMD32)); - arg = impSIMDPopStack(argType); - assert((arg->TypeGet() == TYP_SIMD8) || (arg->TypeGet() == TYP_SIMD16) || (arg->TypeGet() == TYP_SIMD32)); + assert(varTypeIsSIMD(argType)); + arg = impSIMDPopStack(argType, expectAddr); + assert(varTypeIsSIMD(arg->TypeGet())); } else { diff --git a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp index 240244ff20569c..2be9f5ced23ca0 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp @@ -102,7 +102,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(numArgs >= 0); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); assert(ins != INS_invalid); - emitAttr simdSize = EA_ATTR(node->gtSIMDSize); + emitAttr simdSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); assert(simdSize != 0); switch (numArgs) @@ -254,11 +254,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else if (node->TypeGet() == TYP_VOID) { - genHWIntrinsic_R_RM(node, ins, EA_ATTR(node->gtSIMDSize), op1Reg, op2); + genHWIntrinsic_R_RM(node, ins, simdSize, op1Reg, op2); } else { - genHWIntrinsic_R_R_RM(node, ins, EA_ATTR(node->gtSIMDSize)); + genHWIntrinsic_R_R_RM(node, ins, simdSize); } break; } @@ -546,7 +546,7 @@ void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, i var_types targetType = node->TypeGet(); regNumber targetReg = node->GetRegNum(); GenTree* op1 = node->gtGetOp1(); - emitAttr simdSize = EA_ATTR(node->gtSIMDSize); + emitAttr simdSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); emitter* emit = GetEmitter(); // TODO-XArch-CQ: Commutative operations can have op1 be contained @@ -628,7 +628,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, regNumber targetReg = node->GetRegNum(); GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); - emitAttr simdSize = EA_ATTR(node->gtSIMDSize); + emitAttr simdSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); emitter* emit = GetEmitter(); // TODO-XArch-CQ: Commutative operations can have op1 be contained @@ -792,7 +792,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins) GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); GenTree* op3 = nullptr; - emitAttr simdSize = EA_ATTR(node->gtSIMDSize); + emitAttr simdSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); emitter* emit = GetEmitter(); assert(op1->OperIsList()); @@ -1146,7 +1146,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) assert(node->gtGetOp2() == nullptr); emitter* emit = GetEmitter(); - emitAttr attr = EA_ATTR(node->gtSIMDSize); + emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); switch (intrinsicId) @@ -1621,7 +1621,7 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; var_types baseType = node->gtSIMDBaseType; - emitAttr attr = EA_ATTR(node->gtSIMDSize); + emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); var_types targetType = node->TypeGet(); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); int numArgs = HWIntrinsicInfo::lookupNumArgs(node); @@ -1934,7 +1934,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; var_types baseType = node->gtSIMDBaseType; - emitAttr attr = EA_ATTR(node->gtSIMDSize); + emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); GenTree* op1 = node->gtGetOp1(); regNumber targetReg = node->GetRegNum(); diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index c2b1aee82c635c..31c164bebc5dcd 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -408,7 +408,6 @@ HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector128, HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, BroadcastVector128ToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastf128, INS_vbroadcastf128}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, Compare, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_IMM, HW_Flag_NoFlag) - HARDWARE_INTRINSIC(AVX, CompareEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX, CompareGreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, CompareGreaterThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) @@ -421,7 +420,6 @@ HARDWARE_INTRINSIC(AVX, CompareNotLessThan, HARDWARE_INTRINSIC(AVX, CompareNotLessThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, CompareOrdered, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, CompareUnordered, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) - HARDWARE_INTRINSIC(AVX, CompareScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_IMM, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX, ConvertToVector128Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index 973541ef5b4710..dd71f04f5f5101 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -3506,6 +3506,11 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, return hwintrinsic; } + + if ((ni > NI_SIMD_AS_HWINTRINSIC_START) && (ni < NI_SIMD_AS_HWINTRINSIC_END)) + { + return impSimdAsHWIntrinsic(ni, clsHnd, method, sig, mustExpand); + } #endif // FEATURE_HW_INTRINSICS } } @@ -4453,6 +4458,21 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) } } #ifdef FEATURE_HW_INTRINSICS + else if (strcmp(namespaceName, "System.Numerics") == 0) + { + CORINFO_SIG_INFO sig; + info.compCompHnd->getMethodSig(method, &sig); + + int sizeOfVectorT = 16; +#if defined(TARGET_XARCH) + if (compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + sizeOfVectorT = 32; + } +#endif // TARGET_XARCH + + result = SimdAsHWIntrinsicInfo::lookupId(&sig, className, methodName, enclosingClassName, sizeOfVectorT); + } else if (strncmp(namespaceName, "System.Runtime.Intrinsics", 25) == 0) { namespaceName += 25; diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index 422abdef34689b..4479a84bf8944d 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -1331,7 +1331,7 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) LclVarDsc* varDsc = &comp->lvaTable[varNum]; type = varDsc->lvType; } - else if (arg->OperGet() == GT_SIMD) + else if (arg->OperIs(GT_SIMD, GT_HWINTRINSIC)) { assert((arg->AsSIMD()->gtSIMDSize == 16) || (arg->AsSIMD()->gtSIMDSize == 12)); @@ -5360,6 +5360,7 @@ void Lowering::CheckNode(Compiler* compiler, GenTree* node) #ifdef FEATURE_SIMD case GT_SIMD: + case GT_HWINTRINSIC: assert(node->TypeGet() != TYP_SIMD12); break; #ifdef TARGET_64BIT diff --git a/src/coreclr/src/jit/lowerarmarch.cpp b/src/coreclr/src/jit/lowerarmarch.cpp index 6eee592dc949dc..7e4810eaf9606d 100644 --- a/src/coreclr/src/jit/lowerarmarch.cpp +++ b/src/coreclr/src/jit/lowerarmarch.cpp @@ -526,6 +526,15 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode) // void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { + assert(node->TypeGet() != TYP_SIMD32); + + if (node->TypeGet() == TYP_SIMD12) + { + // GT_HWINTRINSIC node requiring to produce TYP_SIMD12 in fact + // produces a TYP_SIMD16 result + node->gtType = TYP_SIMD16; + } + ContainCheckHWIntrinsic(node); } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index 92e9965f17c818..1e39664a6c81b1 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -920,6 +920,13 @@ void Lowering::LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node) // void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { + if (node->TypeGet() == TYP_SIMD12) + { + // GT_HWINTRINSIC node requiring to produce TYP_SIMD12 in fact + // produces a TYP_SIMD16 result + node->gtType = TYP_SIMD16; + } + switch (node->gtHWIntrinsicId) { case NI_SSE_CompareScalarOrderedEqual: @@ -2975,12 +2982,13 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsicId); int numArgs = HWIntrinsicInfo::lookupNumArgs(node); var_types baseType = node->gtSIMDBaseType; + unsigned simdSize = node->gtSIMDSize; GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); GenTree* op3 = nullptr; - if (!HWIntrinsicInfo::SupportsContainment(intrinsicId)) + if (!HWIntrinsicInfo::SupportsContainment(intrinsicId) || (simdSize == 8) || (simdSize == 12)) { // AVX2 gather are not containable and always have constant IMM argument if (HWIntrinsicInfo::isAVX2GatherIntrinsic(intrinsicId)) diff --git a/src/coreclr/src/jit/namedintrinsiclist.h b/src/coreclr/src/jit/namedintrinsiclist.h index f4969a57ea2984..b3c6a7510eb7f2 100644 --- a/src/coreclr/src/jit/namedintrinsiclist.h +++ b/src/coreclr/src/jit/namedintrinsiclist.h @@ -38,6 +38,18 @@ enum NamedIntrinsic : unsigned short #include "hwintrinsiclistarm64.h" #endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) NI_HW_INTRINSIC_END, + + NI_SIMD_AS_HWINTRINSIC_START, +#if defined(TARGET_XARCH) +#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + NI_##classId##_##name, +#include "simdashwintrinsiclistxarch.h" +#elif defined(TARGET_ARM64) +#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + NI_##classId##_##name, +#include "simdashwintrinsiclistarm64.h" +#endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) + NI_SIMD_AS_HWINTRINSIC_END, #endif // FEATURE_HW_INTRINSICS }; diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp new file mode 100644 index 00000000000000..2b359f286f73bb --- /dev/null +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -0,0 +1,402 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#include "jitpch.h" +#include "simdashwintrinsic.h" + +#ifdef FEATURE_HW_INTRINSICS + +static const SimdAsHWIntrinsicInfo simdAsHWIntrinsicInfoArray[] = { +// clang-format off +#if defined(TARGET_XARCH) +#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + {NI_##classId##_##name, #name, SimdAsHWIntrinsicClassId::##classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, +#include "simdashwintrinsiclistxarch.h" +#elif defined(TARGET_ARM64) +#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ + {NI_##classId##_##name, #name, SimdAsHWIntrinsicClassId::##classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, +#include "simdashwintrinsiclistarm64.h" +#else +#error Unsupported platform +#endif + // clang-format on +}; + +//------------------------------------------------------------------------ +// lookup: Gets the SimdAsHWIntrinsicInfo associated with a given NamedIntrinsic +// +// Arguments: +// id -- The NamedIntrinsic associated with the SimdAsHWIntrinsic to lookup +// +// Return Value: +// The SimdAsHWIntrinsicInfo associated with id +const SimdAsHWIntrinsicInfo& SimdAsHWIntrinsicInfo::lookup(NamedIntrinsic id) +{ + assert(id != NI_Illegal); + + assert(id > NI_SIMD_AS_HWINTRINSIC_START); + assert(id < NI_SIMD_AS_HWINTRINSIC_END); + + return simdAsHWIntrinsicInfoArray[id - NI_SIMD_AS_HWINTRINSIC_START - 1]; +} + +//------------------------------------------------------------------------ +// lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet +// +// Arguments: +// className -- The name of the class associated with the SimdIntrinsic to lookup +// methodName -- The name of the method associated with the SimdIntrinsic to lookup +// enclosingClassName -- The name of the enclosing class +// sizeOfVectorT -- The size of Vector in bytes +// +// Return Value: +// The NamedIntrinsic associated with methodName and classId +NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(CORINFO_SIG_INFO* sig, + const char* className, + const char* methodName, + const char* enclosingClassName, + int sizeOfVectorT) +{ + SimdAsHWIntrinsicClassId classId = lookupClassId(className, enclosingClassName, sizeOfVectorT); + + if (classId == SimdAsHWIntrinsicClassId::Unknown) + { + return NI_Illegal; + } + + for (int i = 0; i < (NI_SIMD_AS_HWINTRINSIC_END - NI_SIMD_AS_HWINTRINSIC_START - 1); i++) + { + const SimdAsHWIntrinsicInfo& intrinsicInfo = simdAsHWIntrinsicInfoArray[i]; + + if (classId != intrinsicInfo.classId) + { + continue; + } + + if (sig->numArgs != static_cast(intrinsicInfo.numArgs)) + { + continue; + } + + if (sig->hasThis() != SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsicInfo.id)) + { + continue; + } + + if (strcmp(methodName, intrinsicInfo.name) != 0) + { + continue; + } + + return intrinsicInfo.id; + } + + return NI_Illegal; +} + +//------------------------------------------------------------------------ +// lookupClassId: Gets the SimdAsHWIntrinsicClassId for a given class name and enclsoing class name +// +// Arguments: +// className -- The name of the class associated with the SimdAsHWIntrinsicClassId to lookup +// enclosingClassName -- The name of the enclosing class +// sizeOfVectorT -- The size of Vector in bytes +// +// Return Value: +// The SimdAsHWIntrinsicClassId associated with className and enclosingClassName +SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(const char* className, + const char* enclosingClassName, + int sizeOfVectorT) +{ + assert(className != nullptr); + + if ((enclosingClassName != nullptr) || (className[0] != 'V')) + { + return SimdAsHWIntrinsicClassId::Unknown; + } + if (strcmp(className, "Vector2") == 0) + { + return SimdAsHWIntrinsicClassId::Vector2; + } + if (strcmp(className, "Vector3") == 0) + { + return SimdAsHWIntrinsicClassId::Vector3; + } + if (strcmp(className, "Vector4") == 0) + { + return SimdAsHWIntrinsicClassId::Vector4; + } + if (strcmp(className, "Vector`1") == 0) + { +#if defined(TARGET_XARCH) + if (sizeOfVectorT == 32) + { + return SimdAsHWIntrinsicClassId::VectorT256; + } +#endif // TARGET_XARCH + + assert(sizeOfVectorT == 16); + return SimdAsHWIntrinsicClassId::VectorT128; + } + + return SimdAsHWIntrinsicClassId::Unknown; +} + +GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* sig, + bool mustExpand) +{ + if (!featureSIMD) + { + return nullptr; + } + + var_types retType = JITtype2varType(sig->retType); + var_types baseType = TYP_UNKNOWN; + var_types simdType = TYP_UNKNOWN; + unsigned simdSize = 0; + + if (retType == TYP_STRUCT) + { + baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &simdSize); + simdType = getSIMDTypeForSize(simdSize); + retType = simdType; + } + else + { + assert(!"Unexpected SimdAsHWIntrinsic"); + return nullptr; + } + + if (!varTypeIsArithmetic(baseType)) + { + return nullptr; + } + + NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + CORINFO_InstructionSet hwIntrinsicIsa = HWIntrinsicInfo::lookupIsa(hwIntrinsic); + bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); + + if ((hwIntrinsic == NI_Illegal) || !varTypeIsSIMD(simdType)) + { + assert(!"Unexpected SimdAsHWIntrinsic"); + return nullptr; + } + + if (!compOpportunisticallyDependsOn(hwIntrinsicIsa)) + { + return nullptr; + } + + if (SimdAsHWIntrinsicInfo::IsFloatingPointUsed(intrinsic)) + { + // Set `compFloatingPointUsed` to cover the scenario where an intrinsic + // is operating on SIMD fields, but where no SIMD local vars are in use. + compFloatingPointUsed = true; + } + + if (!SimdAsHWIntrinsicInfo::IsTableDriven(intrinsic)) + { + return impSimdAsHWIntrinsicSpecial(intrinsic, clsHnd, method, sig, mustExpand); + } + + CORINFO_ARG_LIST_HANDLE argList = sig->args; + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass; + + GenTree* op1 = nullptr; + GenTree* op2 = nullptr; + + switch (sig->numArgs) + { + case 2: + { + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + + if (SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)) + { + GenTree* tmp = op2; + op2 = op1; + op1 = tmp; + } + + return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + } + } + + assert(!"Unexpected SimdAsHWIntrinsic"); + return nullptr; +} + +GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_METHOD_HANDLE method, + CORINFO_SIG_INFO* sig, + bool mustExpand) +{ + assert(featureSIMD); + assert(SimdAsHWIntrinsicInfo::IsTableDriven(intrinsic)); + + var_types retType = JITtype2varType(sig->retType); + var_types baseType = TYP_UNKNOWN; + var_types simdType = TYP_UNKNOWN; + unsigned simdSize = 0; + + if (retType == TYP_STRUCT) + { + baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &simdSize); + simdType = getSIMDTypeForSize(simdSize); + retType = simdType; + } + else + { + assert(!"Unexpected SimdAsHWIntrinsic"); + return nullptr; + } + + assert(varTypeIsArithmetic(baseType)); + + NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + CORINFO_InstructionSet hwIntrinsicIsa = HWIntrinsicInfo::lookupIsa(hwIntrinsic); + bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); + + assert((hwIntrinsic != NI_Illegal) && varTypeIsSIMD(simdType) && compIsaSupportedDebugOnly(hwIntrinsicIsa)); + + CORINFO_ARG_LIST_HANDLE argList = sig->args; + var_types argType = TYP_UNKNOWN; + + GenTree* op1 = nullptr; + GenTree* op2 = nullptr; + +#if defined(TARGET_XARCH) + CORINFO_CLASS_HANDLE argClass; + + switch (sig->numArgs) + { + case 2: + { + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + + if (SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)) + { + GenTree* tmp = op2; + op2 = op1; + op1 = tmp; + } + + switch (intrinsic) + { + case NI_VectorT128_LessThan: + case NI_VectorT128_LessThanOrEqual: + case NI_VectorT256_LessThan: + case NI_VectorT256_LessThanOrEqual: + { + if (varTypeIsIntegral(baseType)) + { + GenTree* tmp = op2; + op2 = op1; + op1 = tmp; + } + + __fallthrough; + } + + case NI_VectorT128_GreaterThan: + case NI_VectorT128_GreaterThanOrEqual: + case NI_VectorT256_GreaterThan: + case NI_VectorT256_GreaterThanOrEqual: + { + if (varTypeIsUnsigned(baseType)) + { + // Vector, Vector, Vector and Vector: + // Hardware supports > for signed comparison. Therefore, to use it for + // comparing unsigned numbers, we subtract a constant from both the + // operands such that the result fits within the corresponding signed + // type. The resulting signed numbers are compared using signed comparison. + // + // Vector: constant to be subtracted is 2^7 + // Vector constant to be subtracted is 2^15 + // Vector constant to be subtracted is 2^31 + // Vector constant to be subtracted is 2^63 + // + // We need to treat op1 and op2 as signed for comparison purpose after + // the transformation. + + GenTree* constVal = nullptr; + + switch (baseType) + { + case TYP_UBYTE: + { + constVal = gtNewIconNode(0x80808080, TYP_INT); + baseType = TYP_BYTE; + break; + } + + case TYP_USHORT: + { + constVal = gtNewIconNode(0x80008000, TYP_INT); + baseType = TYP_SHORT; + break; + } + + case TYP_UINT: + { + constVal = gtNewIconNode(0x80000000, TYP_INT); + baseType = TYP_INT; + break; + } + + case TYP_ULONG: + { + constVal = gtNewLconNode(0x8000000000000000); + baseType = TYP_LONG; + break; + } + + default: + { + unreached(); + } + } + + GenTree* constVector; + GenTree* constVectorDup; + + constVector = gtNewSIMDNode(retType, constVal, nullptr, SIMDIntrinsicInit, constVal->TypeGet(), simdSize); + constVector = impCloneExpr(constVector, &constVectorDup, clsHnd, (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("Clone for Vector unsigned comparison")); + + NamedIntrinsic subtractIntrinsic = (simdSize == 32) ? NI_AVX2_Subtract : NI_SSE2_Subtract; + + // op1 = op1 - constVector + op1 = gtNewSimdHWIntrinsicNode(retType, op1, constVector, subtractIntrinsic, baseType, simdSize); + + // op2 = op2 - constVector + op2 = gtNewSimdHWIntrinsicNode(retType, op2, constVectorDup, subtractIntrinsic, baseType, simdSize); + } + + return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + } + } + } + } +#endif + + assert(!"Unexpected SimdAsHWIntrinsic"); + return nullptr; + +} +#endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/simdashwintrinsic.h b/src/coreclr/src/jit/simdashwintrinsic.h new file mode 100644 index 00000000000000..9e561f0c319ac2 --- /dev/null +++ b/src/coreclr/src/jit/simdashwintrinsic.h @@ -0,0 +1,134 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +#ifndef _SIMD_AS_HWINTRINSIC_H_ +#define _SIMD_AS_HWINTRINSIC_H_ + +enum class SimdAsHWIntrinsicClassId { + Unknown, + Vector2, + Vector3, + Vector4, + VectorT128, + VectorT256, +}; + +enum class SimdAsHWIntrinsicFlag : unsigned int +{ + None = 0, + + // Indicates compFloatingPointUsed does not need to be set. + NoFloatingPointUsed = 0x1, + + // Indicates the intrinsic requires special handling and can't be table driven. + NotTableDriven = 0x2, + + // Indicates the intrinsic is for an instance method. + InstanceMethod = 0x04, + + // Indicates the operands should be swapped in importation. + NeedsOperandsSwapped = 0x08, +}; + +inline SimdAsHWIntrinsicFlag operator ~(SimdAsHWIntrinsicFlag value) { + return static_cast(~static_cast(value)); +} + +inline SimdAsHWIntrinsicFlag operator |(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs) { + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + +inline SimdAsHWIntrinsicFlag operator &(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs) { + return static_cast(static_cast(lhs) & static_cast(rhs)); +} + +inline SimdAsHWIntrinsicFlag operator ^(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs) { + return static_cast(static_cast(lhs) ^ static_cast(rhs)); +} + +struct SimdAsHWIntrinsicInfo +{ + NamedIntrinsic id; + const char* name; + SimdAsHWIntrinsicClassId classId; + int numArgs; + NamedIntrinsic hwIntrinsic[10]; + SimdAsHWIntrinsicFlag flags; + + static const SimdAsHWIntrinsicInfo& lookup(NamedIntrinsic id); + + static NamedIntrinsic lookupId(CORINFO_SIG_INFO* sig, + const char* className, + const char* methodName, + const char* enclosingClassName, + int sizeOfVectorT); + static SimdAsHWIntrinsicClassId lookupClassId(const char* className, + const char* enclosingClassName, + int sizeOfVectorT); + + // Member lookup + + static NamedIntrinsic lookupId(NamedIntrinsic id) + { + return lookup(id).id; + } + + static const char* lookupName(NamedIntrinsic id) + { + return lookup(id).name; + } + + static SimdAsHWIntrinsicClassId lookupClassId(NamedIntrinsic id) + { + return lookup(id).classId; + } + + static int lookupNumArgs(NamedIntrinsic id) + { + return lookup(id).numArgs; + } + + static NamedIntrinsic lookupHWIntrinsic(NamedIntrinsic id, var_types type) + { + if ((type < TYP_BYTE) || (type > TYP_DOUBLE)) + { + assert(!"Unexpected type"); + return NI_Illegal; + } + return lookup(id).hwIntrinsic[type - TYP_BYTE]; + } + + static SimdAsHWIntrinsicFlag lookupFlags(NamedIntrinsic id) + { + return lookup(id).flags; + } + + // Flags lookup + + static bool IsFloatingPointUsed(NamedIntrinsic id) + { + SimdAsHWIntrinsicFlag flags = lookupFlags(id); + return (flags & SimdAsHWIntrinsicFlag::NoFloatingPointUsed) == SimdAsHWIntrinsicFlag::None; + } + + static bool IsInstanceMethod(NamedIntrinsic id) + { + SimdAsHWIntrinsicFlag flags = lookupFlags(id); + return (flags & SimdAsHWIntrinsicFlag::InstanceMethod) == SimdAsHWIntrinsicFlag::InstanceMethod; + } + + static bool IsTableDriven(NamedIntrinsic id) + { + SimdAsHWIntrinsicFlag flags = lookupFlags(id); + return (flags & SimdAsHWIntrinsicFlag::NotTableDriven) == SimdAsHWIntrinsicFlag::None; + } + + static bool NeedsOperandsSwapped(NamedIntrinsic id) + { + SimdAsHWIntrinsicFlag flags = lookupFlags(id); + return (flags & SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) == SimdAsHWIntrinsicFlag::NeedsOperandsSwapped; + } +}; + +#endif // _SIMD_AS_HWINTRINSIC_H_ diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h new file mode 100644 index 00000000000000..09c1d50aa8a1dc --- /dev/null +++ b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h @@ -0,0 +1,92 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*****************************************************************************/ +#ifndef SIMD_AS_HWINTRINSIC +#error Define SIMD_AS_HWINTRINSIC before including this file +#endif +/*****************************************************************************/ + +// clang-format off + +#ifdef FEATURE_HW_INTRINSICS + +/* Note + * Each intrinsic has a unique Intrinsic ID with type of `enum NamedIntrinsic` + * Each intrinsic has a `NumArg` for number of parameters + * Each intrinsic has 10 `NamedIntrinsic` fields that list the HWIntrinsic that should be generated based-on the base type + * NI_Illegal is used to represent an unsupported type + * Using the same Intrinsic ID as the represented entry is used to indicate special handling is required + * Each intrinsic has one or more flags with type of `enum SimdAsHWIntrinsicFlag` +*/ + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector2 Intrinsics +SIMD_AS_HWINTRINSIC(Vector2, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector3 Intrinsics +SIMD_AS_HWINTRINSIC(Vector3, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector4 Intrinsics +SIMD_AS_HWINTRINSIC(Vector4, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector Intrinsics +SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, LessThanOrEqual, 2, {NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Addition, 2, {NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Arm64_Add}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_AdvSimd_Arm64_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor, NI_AdvSimd_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_AdvSimd_Multiply, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_AdvSimd_Arm64_Multiply}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Subtract, NI_AdvSimd_Arm64_Subtract}, SimdAsHWIntrinsicFlag::None) + +#endif // FEATURE_HW_INTRINSICS + +#undef SIMD_AS_HWINTRINSIC + +// clang-format on diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h new file mode 100644 index 00000000000000..ead239560477a8 --- /dev/null +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -0,0 +1,111 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +/*****************************************************************************/ +#ifndef SIMD_AS_HWINTRINSIC +#error Define SIMD_AS_HWINTRINSIC before including this file +#endif +/*****************************************************************************/ + +// clang-format off + +#ifdef FEATURE_HW_INTRINSICS + +/* Note + * Each intrinsic has a unique Intrinsic ID with type of `enum NamedIntrinsic` + * Each intrinsic has a `NumArg` for number of parameters + * Each intrinsic has 10 `NamedIntrinsic` fields that list the HWIntrinsic that should be generated based-on the base type + * NI_Illegal is used to represent an unsupported type + * Using the same Intrinsic ID as the represented entry is used to indicate special handling is required + * Each intrinsic has one or more flags with type of `enum SimdAsHWIntrinsicFlag` +*/ + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector2 Intrinsics +SIMD_AS_HWINTRINSIC(Vector2, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector3 Intrinsics +SIMD_AS_HWINTRINSIC(Vector3, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector4 Intrinsics +SIMD_AS_HWINTRINSIC(Vector4, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector Intrinsics +SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE_AndNot, NI_SSE2_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) +SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE41_CompareEqual, NI_SSE41_CompareEqual, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::NotTableDriven) +SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE_CompareGreaterThanOrEqual, NI_SSE2_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::NotTableDriven) +SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE_CompareLessThan, NI_SSE2_CompareLessThan}, SimdAsHWIntrinsicFlag::NotTableDriven) +SIMD_AS_HWINTRINSIC(VectorT128, LessThanOrEqual, 2, {NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE_CompareLessThanOrEqual, NI_SSE2_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::NotTableDriven) +SIMD_AS_HWINTRINSIC(VectorT128, op_Addition, 2, {NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE_Add, NI_SSE2_Add}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE_And, NI_SSE2_And}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE_Or, NI_SSE2_Or}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_SSE2_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE_Xor, NI_SSE2_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_SSE2_MultiplyLow, NI_Illegal, NI_SSE41_MultiplyLow, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_SSE2_Multiply}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE_Subtract, NI_SSE2_Subtract}, SimdAsHWIntrinsicFlag::None) + +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// ISA Function name NumArg Instructions Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* +// Vector Intrinsics +SIMD_AS_HWINTRINSIC(VectorT256, AndNot, 2, {NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX_AndNot, NI_AVX_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) +SIMD_AS_HWINTRINSIC(VectorT256, Equals, 2, {NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX_CompareEqual, NI_AVX_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::NotTableDriven) +SIMD_AS_HWINTRINSIC(VectorT256, GreaterThanOrEqual, 2, {NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX_CompareGreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::NotTableDriven) +SIMD_AS_HWINTRINSIC(VectorT256, LessThan, 2, {NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX_CompareLessThan, NI_AVX_CompareLessThan}, SimdAsHWIntrinsicFlag::NotTableDriven) +SIMD_AS_HWINTRINSIC(VectorT256, LessThanOrEqual, 2, {NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX_CompareLessThanOrEqual, NI_AVX_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::NotTableDriven) +SIMD_AS_HWINTRINSIC(VectorT256, op_Addition, 2, {NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX_Add, NI_AVX_Add}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseAnd, 2, {NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX_And, NI_AVX_And}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseOr, 2, {NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX_Or, NI_AVX_Or}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Divide, NI_AVX_Divide}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_ExclusiveOr, 2, {NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX2_Xor, NI_AVX_Xor, NI_AVX_Xor}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_AVX2_MultiplyLow, NI_Illegal, NI_Illegal, NI_Illegal, NI_AVX_Multiply, NI_AVX_Multiply}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, op_Subtraction, 2, {NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX2_Subtract, NI_AVX_Subtract, NI_AVX_Subtract}, SimdAsHWIntrinsicFlag::None) + +#endif // FEATURE_HW_INTRINSICS + +#undef SIMD_AS_HWINTRINSIC + +// clang-format on From 32209f35a11a447b654a1060e5bfa0f2719266fa Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 24 Apr 2020 12:46:40 -0700 Subject: [PATCH 02/40] Applying formatting patch --- src/coreclr/src/jit/namedintrinsiclist.h | 6 ++-- src/coreclr/src/jit/simdashwintrinsic.cpp | 35 ++++++++++++----------- src/coreclr/src/jit/simdashwintrinsic.h | 15 ++++++---- 3 files changed, 31 insertions(+), 25 deletions(-) diff --git a/src/coreclr/src/jit/namedintrinsiclist.h b/src/coreclr/src/jit/namedintrinsiclist.h index b3c6a7510eb7f2..d105eabdbb3dd9 100644 --- a/src/coreclr/src/jit/namedintrinsiclist.h +++ b/src/coreclr/src/jit/namedintrinsiclist.h @@ -41,12 +41,10 @@ enum NamedIntrinsic : unsigned short NI_SIMD_AS_HWINTRINSIC_START, #if defined(TARGET_XARCH) -#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ - NI_##classId##_##name, +#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) NI_##classId##_##name, #include "simdashwintrinsiclistxarch.h" #elif defined(TARGET_ARM64) -#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ - NI_##classId##_##name, +#define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) NI_##classId##_##name, #include "simdashwintrinsiclistarm64.h" #endif // !defined(TARGET_XARCH) && !defined(TARGET_ARM64) NI_SIMD_AS_HWINTRINSIC_END, diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 2b359f286f73bb..32c4859e185ff3 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -106,8 +106,8 @@ NamedIntrinsic SimdAsHWIntrinsicInfo::lookupId(CORINFO_SIG_INFO* sig, // Return Value: // The SimdAsHWIntrinsicClassId associated with className and enclosingClassName SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(const char* className, - const char* enclosingClassName, - int sizeOfVectorT) + const char* enclosingClassName, + int sizeOfVectorT) { assert(className != nullptr); @@ -203,9 +203,9 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, return impSimdAsHWIntrinsicSpecial(intrinsic, clsHnd, method, sig, mustExpand); } - CORINFO_ARG_LIST_HANDLE argList = sig->args; - var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass; + CORINFO_ARG_LIST_HANDLE argList = sig->args; + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass; GenTree* op1 = nullptr; GenTree* op2 = nullptr; @@ -215,8 +215,8 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, case 2: { CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); - op2 = getArgForHWIntrinsic(argType, argClass); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); @@ -270,8 +270,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, assert((hwIntrinsic != NI_Illegal) && varTypeIsSIMD(simdType) && compIsaSupportedDebugOnly(hwIntrinsicIsa)); - CORINFO_ARG_LIST_HANDLE argList = sig->args; - var_types argType = TYP_UNKNOWN; + CORINFO_ARG_LIST_HANDLE argList = sig->args; + var_types argType = TYP_UNKNOWN; GenTree* op1 = nullptr; GenTree* op2 = nullptr; @@ -284,8 +284,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, case 2: { CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); - op2 = getArgForHWIntrinsic(argType, argClass); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); @@ -376,16 +376,20 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, GenTree* constVector; GenTree* constVectorDup; - constVector = gtNewSIMDNode(retType, constVal, nullptr, SIMDIntrinsicInit, constVal->TypeGet(), simdSize); - constVector = impCloneExpr(constVector, &constVectorDup, clsHnd, (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("Clone for Vector unsigned comparison")); + constVector = + gtNewSIMDNode(retType, constVal, nullptr, SIMDIntrinsicInit, constVal->TypeGet(), simdSize); + constVector = impCloneExpr(constVector, &constVectorDup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector unsigned comparison")); NamedIntrinsic subtractIntrinsic = (simdSize == 32) ? NI_AVX2_Subtract : NI_SSE2_Subtract; // op1 = op1 - constVector - op1 = gtNewSimdHWIntrinsicNode(retType, op1, constVector, subtractIntrinsic, baseType, simdSize); + op1 = + gtNewSimdHWIntrinsicNode(retType, op1, constVector, subtractIntrinsic, baseType, simdSize); // op2 = op2 - constVector - op2 = gtNewSimdHWIntrinsicNode(retType, op2, constVectorDup, subtractIntrinsic, baseType, simdSize); + op2 = gtNewSimdHWIntrinsicNode(retType, op2, constVectorDup, subtractIntrinsic, baseType, + simdSize); } return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); @@ -397,6 +401,5 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, assert(!"Unexpected SimdAsHWIntrinsic"); return nullptr; - } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/simdashwintrinsic.h b/src/coreclr/src/jit/simdashwintrinsic.h index 9e561f0c319ac2..9fc39d7ab5573a 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.h +++ b/src/coreclr/src/jit/simdashwintrinsic.h @@ -5,7 +5,8 @@ #ifndef _SIMD_AS_HWINTRINSIC_H_ #define _SIMD_AS_HWINTRINSIC_H_ -enum class SimdAsHWIntrinsicClassId { +enum class SimdAsHWIntrinsicClassId +{ Unknown, Vector2, Vector3, @@ -31,19 +32,23 @@ enum class SimdAsHWIntrinsicFlag : unsigned int NeedsOperandsSwapped = 0x08, }; -inline SimdAsHWIntrinsicFlag operator ~(SimdAsHWIntrinsicFlag value) { +inline SimdAsHWIntrinsicFlag operator~(SimdAsHWIntrinsicFlag value) +{ return static_cast(~static_cast(value)); } -inline SimdAsHWIntrinsicFlag operator |(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs) { +inline SimdAsHWIntrinsicFlag operator|(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs) +{ return static_cast(static_cast(lhs) | static_cast(rhs)); } -inline SimdAsHWIntrinsicFlag operator &(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs) { +inline SimdAsHWIntrinsicFlag operator&(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs) +{ return static_cast(static_cast(lhs) & static_cast(rhs)); } -inline SimdAsHWIntrinsicFlag operator ^(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs) { +inline SimdAsHWIntrinsicFlag operator^(SimdAsHWIntrinsicFlag lhs, SimdAsHWIntrinsicFlag rhs) +{ return static_cast(static_cast(lhs) ^ static_cast(rhs)); } From a5ad01c6a27225870d8abaf9e06d0229a6b472e5 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 24 Apr 2020 12:57:43 -0700 Subject: [PATCH 03/40] Fixing a preprocessor concatenation for non windows --- src/coreclr/src/jit/simdashwintrinsic.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 32c4859e185ff3..c7280004f7e672 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -11,11 +11,11 @@ static const SimdAsHWIntrinsicInfo simdAsHWIntrinsicInfoArray[] = { // clang-format off #if defined(TARGET_XARCH) #define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ - {NI_##classId##_##name, #name, SimdAsHWIntrinsicClassId::##classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, + {NI_##classId##_##name, #name, SimdAsHWIntrinsicClassId::classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, #include "simdashwintrinsiclistxarch.h" #elif defined(TARGET_ARM64) #define SIMD_AS_HWINTRINSIC(classId, name, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, flag) \ - {NI_##classId##_##name, #name, SimdAsHWIntrinsicClassId::##classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, + {NI_##classId##_##name, #name, SimdAsHWIntrinsicClassId::classId, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(flag)}, #include "simdashwintrinsiclistarm64.h" #else #error Unsupported platform From 3b5f8f488b253f48cc6cc11f106e03d249a4a8ce Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 24 Apr 2020 13:24:54 -0700 Subject: [PATCH 04/40] Add a default case to workaround a compiler warning on FreeBSD --- src/coreclr/src/jit/simdashwintrinsic.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index c7280004f7e672..d80ceb14f9f9a8 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -394,6 +394,13 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); } + + default: + { + // Some platforms warn about unhandled switch cases + // We handle it more generally via the assert and return below. + break; + } } } } From 8744b7b56f53acb168d1487b6c5750383496d293 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 24 Apr 2020 15:42:39 -0700 Subject: [PATCH 05/40] Fixing a noway_assert to include GT_HWINTRINSIC --- src/coreclr/src/jit/flowgraph.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp index 1a11b13cb4ef21..5ab5036d8e44ce 100644 --- a/src/coreclr/src/jit/flowgraph.cpp +++ b/src/coreclr/src/jit/flowgraph.cpp @@ -22298,12 +22298,10 @@ void Compiler::fgNoteNonInlineCandidate(Statement* stmt, GenTreeCall* call) */ GenTree* Compiler::fgGetStructAsStructPtr(GenTree* tree) { - noway_assert((tree->gtOper == GT_LCL_VAR) || (tree->gtOper == GT_FIELD) || (tree->gtOper == GT_IND) || - (tree->gtOper == GT_BLK) || (tree->gtOper == GT_OBJ) || tree->OperIsSIMD() || - // tree->gtOper == GT_CALL || cannot get address of call. - // tree->gtOper == GT_MKREFANY || inlining should've been aborted due to mkrefany opcode. - // tree->gtOper == GT_RET_EXPR || cannot happen after fgUpdateInlineReturnExpressionPlaceHolder - (tree->gtOper == GT_COMMA)); + noway_assert(tree->OperIs(GT_LCL_VAR, GT_FIELD, GT_IND, GT_BLK, GT_OBJ, GT_COMMA) || tree->OperIsSIMD() || tree->OperIsHWIntrinsic()); + // GT_CALL, cannot get address of call. + // GT_MKREFANY, inlining should've been aborted due to mkrefany opcode. + // GT_RET_EXPR, cannot happen after fgUpdateInlineReturnExpressionPlaceHolder switch (tree->OperGet()) { From 4beacab2d973db3aa478fd132fbd9e0080799e4f Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 24 Apr 2020 16:11:34 -0700 Subject: [PATCH 06/40] Fixing some asserts that were being triggered --- src/coreclr/src/jit/simd.cpp | 44 ++++++++++++----------- src/coreclr/src/jit/simdashwintrinsic.cpp | 10 +++--- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp index 9077971e67cf30..bfd8c04f76789c 100644 --- a/src/coreclr/src/jit/simd.cpp +++ b/src/coreclr/src/jit/simd.cpp @@ -162,11 +162,13 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u if (typeHnd == m_simdHandleCache->SIMDFloatHandle) { simdBaseType = TYP_FLOAT; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDIntHandle) { simdBaseType = TYP_INT; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDVector2Handle) @@ -192,46 +194,55 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u } else if (typeHnd == m_simdHandleCache->SIMDVectorHandle) { + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDUShortHandle) { simdBaseType = TYP_USHORT; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDUByteHandle) { simdBaseType = TYP_UBYTE; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDDoubleHandle) { simdBaseType = TYP_DOUBLE; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDLongHandle) { simdBaseType = TYP_LONG; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDShortHandle) { simdBaseType = TYP_SHORT; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDByteHandle) { simdBaseType = TYP_BYTE; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDUIntHandle) { simdBaseType = TYP_UINT; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } else if (typeHnd == m_simdHandleCache->SIMDULongHandle) { simdBaseType = TYP_ULONG; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Known type SIMD Vector\n"); } @@ -253,6 +264,8 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u { if (wcsncmp(&(className[16]), W("Vector`1["), 9) == 0) { + size = getSIMDVectorRegisterByteLength(); + if (wcsncmp(&(className[25]), W("System.Single"), 13) == 0) { m_simdHandleCache->SIMDFloatHandle = typeHnd; @@ -348,6 +361,7 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u else if (wcsncmp(&(className[16]), W("Vector"), 6) == 0) { m_simdHandleCache->SIMDVectorHandle = typeHnd; + size = getSIMDVectorRegisterByteLength(); JITDUMP(" Found type Vector\n"); } else @@ -356,18 +370,6 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u } } } - if (simdBaseType != TYP_UNKNOWN && sizeBytes != nullptr) - { - // If not a fixed size vector then its size is same as SIMD vector - // register length in bytes - if (size == 0) - { - size = getSIMDVectorRegisterByteLength(); - } - - *sizeBytes = size; - setUsesSIMDTypes(true); - } } #ifdef FEATURE_HW_INTRINSICS else if (isIntrinsicType(typeHnd)) @@ -776,18 +778,18 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u simdBaseType = TYP_UNKNOWN; } #endif // TARGET_XARCH + } +#endif // FEATURE_HW_INTRINSICS - if (sizeBytes != nullptr) - { - *sizeBytes = size; - } + if (sizeBytes != nullptr) + { + *sizeBytes = size; + } - if (simdBaseType != TYP_UNKNOWN) - { - setUsesSIMDTypes(true); - } + if (simdBaseType != TYP_UNKNOWN) + { + setUsesSIMDTypes(true); } -#endif // FEATURE_HW_INTRINSICS return simdBaseType; } diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index d80ceb14f9f9a8..82829dd01e336c 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -176,16 +176,16 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, return nullptr; } - NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); - CORINFO_InstructionSet hwIntrinsicIsa = HWIntrinsicInfo::lookupIsa(hwIntrinsic); - bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); + NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); if ((hwIntrinsic == NI_Illegal) || !varTypeIsSIMD(simdType)) { - assert(!"Unexpected SimdAsHWIntrinsic"); return nullptr; } + CORINFO_InstructionSet hwIntrinsicIsa = HWIntrinsicInfo::lookupIsa(hwIntrinsic); + if (!compOpportunisticallyDependsOn(hwIntrinsicIsa)) { return nullptr; @@ -243,7 +243,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, bool mustExpand) { assert(featureSIMD); - assert(SimdAsHWIntrinsicInfo::IsTableDriven(intrinsic)); + assert(!SimdAsHWIntrinsicInfo::IsTableDriven(intrinsic)); var_types retType = JITtype2varType(sig->retType); var_types baseType = TYP_UNKNOWN; From 3af99b289736b46238c3f47e0847d12a4210dc32 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 24 Apr 2020 18:17:28 -0700 Subject: [PATCH 07/40] Use getSIMDVectorRegisterByteLength --- src/coreclr/src/jit/importer.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index dd71f04f5f5101..e05b7397cbeb8e 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -4463,13 +4463,7 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) CORINFO_SIG_INFO sig; info.compCompHnd->getMethodSig(method, &sig); - int sizeOfVectorT = 16; -#if defined(TARGET_XARCH) - if (compOpportunisticallyDependsOn(InstructionSet_AVX2)) - { - sizeOfVectorT = 32; - } -#endif // TARGET_XARCH + int sizeOfVectorT = getSIMDVectorRegisterByteLength(); result = SimdAsHWIntrinsicInfo::lookupId(&sig, className, methodName, enclosingClassName, sizeOfVectorT); } From e229ca0b60eea3a81bb73c674c0500e46edfe747 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 24 Apr 2020 18:39:26 -0700 Subject: [PATCH 08/40] Applying formatting patch --- src/coreclr/src/jit/flowgraph.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp index 5ab5036d8e44ce..689fa7f0606e89 100644 --- a/src/coreclr/src/jit/flowgraph.cpp +++ b/src/coreclr/src/jit/flowgraph.cpp @@ -22298,7 +22298,8 @@ void Compiler::fgNoteNonInlineCandidate(Statement* stmt, GenTreeCall* call) */ GenTree* Compiler::fgGetStructAsStructPtr(GenTree* tree) { - noway_assert(tree->OperIs(GT_LCL_VAR, GT_FIELD, GT_IND, GT_BLK, GT_OBJ, GT_COMMA) || tree->OperIsSIMD() || tree->OperIsHWIntrinsic()); + noway_assert(tree->OperIs(GT_LCL_VAR, GT_FIELD, GT_IND, GT_BLK, GT_OBJ, GT_COMMA) || tree->OperIsSIMD() || + tree->OperIsHWIntrinsic()); // GT_CALL, cannot get address of call. // GT_MKREFANY, inlining should've been aborted due to mkrefany opcode. // GT_RET_EXPR, cannot happen after fgUpdateInlineReturnExpressionPlaceHolder From 92ec83c98296eacf6d5ac762807b5aed7ba0fb1a Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 24 Apr 2020 18:44:11 -0700 Subject: [PATCH 09/40] Fixing ARM64 to use the actual type size --- src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp index 70fe9c540babe9..d29213fb9bfb73 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenarm64.cpp @@ -207,7 +207,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else { - emitSize = EA_SIZE(node->gtSIMDSize); + emitSize = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->gtSIMDSize)); opt = genGetSimdInsOpt(emitSize, intrin.baseType); if ((opt == INS_OPTS_1D) && (intrin.category == HW_Category_SimpleSIMD)) From e9e7b89a852193be7dcc0c2d00c9d272844ae6e3 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 24 Apr 2020 19:30:58 -0700 Subject: [PATCH 10/40] Removing the [Intrinsic] attribute from some Vector2/3/4 methods which aren't intrinsic --- .../src/System/Numerics/Vector2_Intrinsics.cs | 2 -- .../src/System/Numerics/Vector3_Intrinsics.cs | 2 -- .../src/System/Numerics/Vector4_Intrinsics.cs | 2 -- 3 files changed, 6 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs index b2d58347904e5f..27d3469bfb92c5 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector2_Intrinsics.cs @@ -211,7 +211,6 @@ public static Vector2 SquareRoot(Vector2 value) /// The scalar value. /// The source vector. /// The scaled vector. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector2 operator *(float left, Vector2 right) { @@ -224,7 +223,6 @@ public static Vector2 SquareRoot(Vector2 value) /// The source vector. /// The scalar value. /// The scaled vector. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector2 operator *(Vector2 left, float right) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs index c41baa46aa8f71..df32e8331d70c0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector3_Intrinsics.cs @@ -230,7 +230,6 @@ public static Vector3 SquareRoot(Vector3 value) /// The source vector. /// The scalar value. /// The scaled vector. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector3 operator *(Vector3 left, float right) { @@ -243,7 +242,6 @@ public static Vector3 SquareRoot(Vector3 value) /// The scalar value. /// The source vector. /// The scaled vector. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector3 operator *(float left, Vector3 right) { diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs index 440c78882d44a4..70d692457e1a50 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector4_Intrinsics.cs @@ -265,7 +265,6 @@ public static Vector4 SquareRoot(Vector4 value) /// The source vector. /// The scalar value. /// The scaled vector. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 operator *(Vector4 left, float right) { @@ -278,7 +277,6 @@ public static Vector4 SquareRoot(Vector4 value) /// The scalar value. /// The source vector. /// The scaled vector. - [Intrinsic] [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector4 operator *(float left, Vector4 right) { From f788049e1f183862f9f8f314faf27d08494021dc Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 25 Apr 2020 08:56:39 -0700 Subject: [PATCH 11/40] Updating SSE/SSE2 CompareGreaterThan and related functions to be table driven --- src/coreclr/src/jit/hwintrinsic.h | 67 ++++++++---- .../src/jit/hwintrinsiccodegenxarch.cpp | 21 +--- src/coreclr/src/jit/hwintrinsiclistxarch.h | 19 ++-- src/coreclr/src/jit/hwintrinsicxarch.cpp | 101 +----------------- src/coreclr/src/jit/lowerxarch.cpp | 66 ++++++++---- 5 files changed, 104 insertions(+), 170 deletions(-) diff --git a/src/coreclr/src/jit/hwintrinsic.h b/src/coreclr/src/jit/hwintrinsic.h index 91b2eb07ec422c..f3eae092d02ee8 100644 --- a/src/coreclr/src/jit/hwintrinsic.h +++ b/src/coreclr/src/jit/hwintrinsic.h @@ -308,7 +308,7 @@ struct HWIntrinsicInfo } #ifdef TARGET_XARCH - static int lookupIval(NamedIntrinsic id) + static int lookupIval(NamedIntrinsic id, bool opportunisticallyDependsOnAVX) { switch (id) { @@ -325,6 +325,17 @@ struct HWIntrinsicInfo case NI_SSE_CompareScalarGreaterThan: case NI_SSE2_CompareGreaterThan: case NI_SSE2_CompareScalarGreaterThan: + case NI_AVX_CompareGreaterThan: + { + if (opportunisticallyDependsOnAVX) + { + return static_cast(FloatComparisonMode::OrderedGreaterThanSignaling); + } + + assert(id != NI_AVX_CompareGreaterThan); + return static_cast(FloatComparisonMode::OrderedLessThanSignaling); + } + case NI_SSE_CompareLessThan: case NI_SSE_CompareScalarLessThan: case NI_SSE2_CompareLessThan: @@ -338,6 +349,17 @@ struct HWIntrinsicInfo case NI_SSE_CompareScalarGreaterThanOrEqual: case NI_SSE2_CompareGreaterThanOrEqual: case NI_SSE2_CompareScalarGreaterThanOrEqual: + case NI_AVX_CompareGreaterThanOrEqual: + { + if (opportunisticallyDependsOnAVX) + { + return static_cast(FloatComparisonMode::OrderedGreaterThanOrEqualSignaling); + } + + assert(id != NI_AVX_CompareGreaterThanOrEqual); + return static_cast(FloatComparisonMode::OrderedLessThanOrEqualSignaling); + } + case NI_SSE_CompareLessThanOrEqual: case NI_SSE_CompareScalarLessThanOrEqual: case NI_SSE2_CompareLessThanOrEqual: @@ -360,6 +382,17 @@ struct HWIntrinsicInfo case NI_SSE_CompareScalarNotGreaterThan: case NI_SSE2_CompareNotGreaterThan: case NI_SSE2_CompareScalarNotGreaterThan: + case NI_AVX_CompareNotGreaterThan: + { + if (opportunisticallyDependsOnAVX) + { + return static_cast(FloatComparisonMode::UnorderedNotGreaterThanSignaling); + } + + assert(id != NI_AVX_CompareNotGreaterThan); + return static_cast(FloatComparisonMode::UnorderedNotLessThanSignaling); + } + case NI_SSE_CompareNotLessThan: case NI_SSE_CompareScalarNotLessThan: case NI_SSE2_CompareNotLessThan: @@ -373,6 +406,18 @@ struct HWIntrinsicInfo case NI_SSE_CompareScalarNotGreaterThanOrEqual: case NI_SSE2_CompareNotGreaterThanOrEqual: case NI_SSE2_CompareScalarNotGreaterThanOrEqual: + case NI_AVX_CompareNotGreaterThanOrEqual: + { + if (opportunisticallyDependsOnAVX) + { + return static_cast(FloatComparisonMode::UnorderedNotGreaterThanOrEqualSignaling); + } + + assert(id != NI_AVX_CompareNotGreaterThanOrEqual); + return static_cast(FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling); + + } + case NI_SSE_CompareNotLessThanOrEqual: case NI_SSE_CompareScalarNotLessThanOrEqual: case NI_SSE2_CompareNotLessThanOrEqual: @@ -441,26 +486,6 @@ struct HWIntrinsicInfo return static_cast(FloatRoundingMode::ToZero); } - case NI_AVX_CompareGreaterThan: - { - return static_cast(FloatComparisonMode::OrderedGreaterThanSignaling); - } - - case NI_AVX_CompareGreaterThanOrEqual: - { - return static_cast(FloatComparisonMode::OrderedGreaterThanOrEqualSignaling); - } - - case NI_AVX_CompareNotGreaterThan: - { - return static_cast(FloatComparisonMode::UnorderedNotGreaterThanSignaling); - } - - case NI_AVX_CompareNotGreaterThanOrEqual: - { - return static_cast(FloatComparisonMode::UnorderedNotGreaterThanOrEqualSignaling); - } - default: { return -1; diff --git a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp index 2be9f5ced23ca0..99936b265202f5 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp @@ -82,7 +82,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsicId); HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsicId); - int ival = HWIntrinsicInfo::lookupIval(intrinsicId); + int ival = HWIntrinsicInfo::lookupIval(intrinsicId, compOpportunisticallyDependsOn(InstructionSet_AVX)); int numArgs = HWIntrinsicInfo::lookupNumArgs(node); assert(HWIntrinsicInfo::RequiresCodegen(intrinsicId)); @@ -1352,25 +1352,6 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { - // All integer overloads are handled by table codegen - case NI_SSE2_CompareLessThan: - { - assert(op1 != nullptr); - assert(op2 != nullptr); - - assert(baseType == TYP_DOUBLE); - - int ival = HWIntrinsicInfo::lookupIval(intrinsicId); - assert((ival >= 0) && (ival <= 127)); - - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); - emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival); - - break; - } - case NI_SSE2_X64_ConvertScalarToVector128Double: { assert(baseType == TYP_LONG); diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index 31c164bebc5dcd..e1be06c90858e2 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -91,11 +91,11 @@ HARDWARE_INTRINSIC(SSE, CompareEqual, HARDWARE_INTRINSIC(SSE, CompareScalarOrderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE, CompareScalarOrderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE, CompareScalarOrderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) @@ -111,9 +111,9 @@ HARDWARE_INTRINSIC(SSE, CompareNotEqual, HARDWARE_INTRINSIC(SSE, CompareScalarOrderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE, CompareScalarNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE, CompareScalarNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE, CompareNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE, CompareScalarNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) @@ -192,15 +192,15 @@ HARDWARE_INTRINSIC(SSE2, CompareEqual, HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareGreaterThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE2, CompareGreaterThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE2, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_Special, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE2, CompareLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) @@ -212,9 +212,9 @@ HARDWARE_INTRINSIC(SSE2, CompareNotEqual, HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE2, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, CompareScalarNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(SSE2, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, CompareScalarNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, CompareNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, CompareScalarNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) @@ -489,6 +489,7 @@ HARDWARE_INTRINSIC(AVX2, BroadcastScalarToVector256, HARDWARE_INTRINSIC(AVX2, BroadcastVector128ToVector256, 32, 1, {INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_vbroadcasti128, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, CompareEqual, 32, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2, CompareGreaterThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, CompareLessThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, ExtractVector128, 32, 2, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) diff --git a/src/coreclr/src/jit/hwintrinsicxarch.cpp b/src/coreclr/src/jit/hwintrinsicxarch.cpp index 286a07937ca0e2..97d28b9426ee62 100644 --- a/src/coreclr/src/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsicxarch.cpp @@ -1276,35 +1276,6 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAND switch (intrinsic) { - case NI_SSE_CompareGreaterThan: - case NI_SSE_CompareGreaterThanOrEqual: - case NI_SSE_CompareNotGreaterThan: - case NI_SSE_CompareNotGreaterThanOrEqual: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass); - assert(baseType == TYP_FLOAT); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - // These intrinsics are "special import" because the non-AVX path isn't directly - // hardware supported. Instead, they start with "swapped operands" and we fix that here. - - FloatComparisonMode comparison = - static_cast(HWIntrinsicInfo::lookupIval(intrinsic)); - comparison = HWIntrinsicInfo::lookupFloatComparisonModeForSwappedArgs(comparison); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast(comparison)), - NI_AVX_Compare, baseType, simdSize); - } - else - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, baseType, simdSize); - } - break; - } - case NI_SSE_CompareScalarGreaterThan: case NI_SSE_CompareScalarGreaterThanOrEqual: case NI_SSE_CompareScalarNotGreaterThan: @@ -1322,9 +1293,8 @@ GenTree* Compiler::impSSEIntrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAND // hardware supported. Instead, they start with "swapped operands" and we fix that here. FloatComparisonMode comparison = - static_cast(HWIntrinsicInfo::lookupIval(intrinsic)); - comparison = HWIntrinsicInfo::lookupFloatComparisonModeForSwappedArgs(comparison); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast(comparison)), + static_cast(HWIntrinsicInfo::lookupIval(intrinsic, true)); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast(comparison)), NI_AVX_CompareScalar, baseType, simdSize); } else @@ -1383,68 +1353,6 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN switch (intrinsic) { - case NI_SSE2_CompareGreaterThan: - { - if (baseType != TYP_DOUBLE) - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, NI_SSE2_CompareGreaterThan, baseType, simdSize); - - break; - } - - __fallthrough; - } - - case NI_SSE2_CompareGreaterThanOrEqual: - case NI_SSE2_CompareNotGreaterThan: - case NI_SSE2_CompareNotGreaterThanOrEqual: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - assert(baseType == TYP_DOUBLE); - - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - // These intrinsics are "special import" because the non-AVX path isn't directly - // hardware supported. Instead, they start with "swapped operands" and we fix that here. - - FloatComparisonMode comparison = - static_cast(HWIntrinsicInfo::lookupIval(intrinsic)); - comparison = HWIntrinsicInfo::lookupFloatComparisonModeForSwappedArgs(comparison); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast(comparison)), - NI_AVX_Compare, baseType, simdSize); - } - else - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, baseType, simdSize); - } - break; - } - - case NI_SSE2_CompareLessThan: - { - assert(sig->numArgs == 2); - op2 = impSIMDPopStack(TYP_SIMD16); - op1 = impSIMDPopStack(TYP_SIMD16); - - if (baseType == TYP_DOUBLE) - { - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, intrinsic, baseType, simdSize); - } - else - { - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, NI_SSE2_CompareGreaterThan, baseType, simdSize); - } - break; - } - case NI_SSE2_CompareScalarGreaterThan: case NI_SSE2_CompareScalarGreaterThanOrEqual: case NI_SSE2_CompareScalarNotGreaterThan: @@ -1461,9 +1369,8 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HAN // hardware supported. Instead, they start with "swapped operands" and we fix that here. FloatComparisonMode comparison = - static_cast(HWIntrinsicInfo::lookupIval(intrinsic)); - comparison = HWIntrinsicInfo::lookupFloatComparisonModeForSwappedArgs(comparison); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast(comparison)), + static_cast(HWIntrinsicInfo::lookupIval(intrinsic, true)); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(static_cast(comparison)), NI_AVX_CompareScalar, baseType, simdSize); } else diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index 1e39664a6c81b1..8d0cc0684b8768 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -929,6 +929,49 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) switch (node->gtHWIntrinsicId) { + case NI_SSE2_CompareGreaterThan: + { + if (node->gtBaseType != TYP_DOUBLE) + { + break; + } + + __fallthrough; + } + + case NI_SSE_CompareGreaterThan: + case NI_SSE_CompareGreaterThanOrEqual: + case NI_SSE_CompareNotGreaterThan: + case NI_SSE_CompareNotGreaterThanOrEqual: + case NI_SSE2_CompareGreaterThanOrEqual: + case NI_SSE2_CompareNotGreaterThan: + case NI_SSE2_CompareNotGreaterThanOrEqual: + { + assert((node->gtBaseType == TYP_FLOAT) || (node->gtBaseType == TYP_DOUBLE)); + + if (compOpportunisticallyDependsOn(InstructionSet_AVX)) + { + break; + } + + // pre-AVX doesn't actually support these intrinsics in hardware so we need to swap the operands around + std::swap(node->gtOp1, node->gtOp2) + break; + } + + case NI_SSE2_CompareLessThan: + case NI_AVX2_CompareLessThan: + { + if (node->gtBaseType == TYP_DOUBLE) + { + break; + } + + // this isn't actually supported in hardware so we need to swap the operands around + std::swap(node->gtOp1, node->gtOp2) + break; + } + case NI_SSE_CompareScalarOrderedEqual: LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FEQ); break; @@ -2662,7 +2705,6 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge switch (containingIntrinsicId) { case NI_SSE_Shuffle: - case NI_SSE2_CompareLessThan: case NI_SSE2_ShiftLeftLogical: case NI_SSE2_ShiftRightArithmetic: case NI_SSE2_ShiftRightLogical: @@ -3278,28 +3320,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case HW_Category_Special: - { - if (intrinsicId == NI_SSE2_CompareLessThan) - { - bool supportsRegOptional = false; - - if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) - { - MakeSrcContained(node, op2); - } - else if (supportsRegOptional) - { - op2->SetRegOptional(); - } - } - else - { - unreached(); - } - break; - } - default: { unreached(); From 0cf2a0bf4b6243c9037645b546ca56a2a090d92e Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 25 Apr 2020 12:03:55 -0700 Subject: [PATCH 12/40] Fixing the SimdAsHWIntrinsic relational operations to match the GT_SIMD behavior --- src/coreclr/src/jit/compiler.h | 18 +- .../src/jit/hwintrinsiccodegenxarch.cpp | 3 +- src/coreclr/src/jit/lowerxarch.cpp | 12 +- src/coreclr/src/jit/simdashwintrinsic.cpp | 504 +++++++++++++----- src/coreclr/src/jit/simdashwintrinsic.h | 13 +- .../src/jit/simdashwintrinsiclistxarch.h | 18 +- 6 files changed, 400 insertions(+), 168 deletions(-) diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index f9a6651e846923..f30b233276a395 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -3689,11 +3689,12 @@ class Compiler protected: bool compSupportsHWIntrinsic(CORINFO_InstructionSet isa); - GenTree* impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, - CORINFO_CLASS_HANDLE clsHnd, - CORINFO_METHOD_HANDLE method, - CORINFO_SIG_INFO* sig, - bool mustExpand); + GenTree* impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_SIG_INFO* sig, + var_types retType, + var_types baseType, + unsigned simdSize); GenTree* impSpecialIntrinsic(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, @@ -3714,6 +3715,13 @@ class Compiler GenTree* impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig); GenTree* impBMI1OrBMI2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig); + GenTree* impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + var_types retType, + var_types baseType, + unsigned simdSize, + GenTree* op1, + GenTree* op2); #endif // TARGET_XARCH #endif // FEATURE_HW_INTRINSICS GenTree* impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd, diff --git a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp index 99936b265202f5..e8d5ddc82886d6 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp @@ -82,9 +82,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsicId); HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsicId); - int ival = HWIntrinsicInfo::lookupIval(intrinsicId, compOpportunisticallyDependsOn(InstructionSet_AVX)); int numArgs = HWIntrinsicInfo::lookupNumArgs(node); + int ival = HWIntrinsicInfo::lookupIval(intrinsicId, compiler->compOpportunisticallyDependsOn(InstructionSet_AVX)); + assert(HWIntrinsicInfo::RequiresCodegen(intrinsicId)); if (genIsTableDrivenHWIntrinsic(intrinsicId, category)) diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index 8d0cc0684b8768..0eb278c13018c5 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -931,7 +931,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { case NI_SSE2_CompareGreaterThan: { - if (node->gtBaseType != TYP_DOUBLE) + if (node->gtSIMDBaseType != TYP_DOUBLE) { break; } @@ -947,28 +947,28 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareNotGreaterThan: case NI_SSE2_CompareNotGreaterThanOrEqual: { - assert((node->gtBaseType == TYP_FLOAT) || (node->gtBaseType == TYP_DOUBLE)); + assert((node->gtSIMDBaseType == TYP_FLOAT) || (node->gtSIMDBaseType == TYP_DOUBLE)); - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX)) { break; } // pre-AVX doesn't actually support these intrinsics in hardware so we need to swap the operands around - std::swap(node->gtOp1, node->gtOp2) + std::swap(node->gtOp1, node->gtOp2); break; } case NI_SSE2_CompareLessThan: case NI_AVX2_CompareLessThan: { - if (node->gtBaseType == TYP_DOUBLE) + if (node->gtSIMDBaseType == TYP_DOUBLE) { break; } // this isn't actually supported in hardware so we need to swap the operands around - std::swap(node->gtOp1, node->gtOp2) + std::swap(node->gtOp1, node->gtOp2); break; } diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 82829dd01e336c..3cdd2ddbf3df7f 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -149,8 +149,11 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_SIG_INFO* sig, bool mustExpand) { + assert(!mustExpand); + if (!featureSIMD) { + // We can't support SIMD intrinsics if the JIT doesn't support the feature return nullptr; } @@ -173,21 +176,15 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, if (!varTypeIsArithmetic(baseType)) { + // We only support intrinsics on the 10 primitive arithmetic types return nullptr; } - NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); - bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); + NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); if ((hwIntrinsic == NI_Illegal) || !varTypeIsSIMD(simdType)) { - return nullptr; - } - - CORINFO_InstructionSet hwIntrinsicIsa = HWIntrinsicInfo::lookupIsa(hwIntrinsic); - - if (!compOpportunisticallyDependsOn(hwIntrinsicIsa)) - { + // The baseType isn't supported by the intrinsic return nullptr; } @@ -198,9 +195,18 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, compFloatingPointUsed = true; } - if (!SimdAsHWIntrinsicInfo::IsTableDriven(intrinsic)) + if (hwIntrinsic == intrinsic) { - return impSimdAsHWIntrinsicSpecial(intrinsic, clsHnd, method, sig, mustExpand); + // The SIMD intrinsic requires special handling outside the normal code path + return impSimdAsHWIntrinsicSpecial(intrinsic, clsHnd, sig, retType, baseType, simdSize); + } + + CORINFO_InstructionSet hwIntrinsicIsa = HWIntrinsicInfo::lookupIsa(hwIntrinsic); + + if (!compOpportunisticallyDependsOn(hwIntrinsicIsa)) + { + // The JIT doesn't support the required ISA + return nullptr; } CORINFO_ARG_LIST_HANDLE argList = sig->args; @@ -210,6 +216,8 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, GenTree* op1 = nullptr; GenTree* op2 = nullptr; + bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); + switch (sig->numArgs) { case 2: @@ -236,39 +244,19 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, return nullptr; } -GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, - CORINFO_CLASS_HANDLE clsHnd, - CORINFO_METHOD_HANDLE method, - CORINFO_SIG_INFO* sig, - bool mustExpand) +GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + CORINFO_SIG_INFO* sig, + var_types retType, + var_types baseType, + unsigned simdSize) { assert(featureSIMD); - assert(!SimdAsHWIntrinsicInfo::IsTableDriven(intrinsic)); - - var_types retType = JITtype2varType(sig->retType); - var_types baseType = TYP_UNKNOWN; - var_types simdType = TYP_UNKNOWN; - unsigned simdSize = 0; - - if (retType == TYP_STRUCT) - { - baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &simdSize); - simdType = getSIMDTypeForSize(simdSize); - retType = simdType; - } - else - { - assert(!"Unexpected SimdAsHWIntrinsic"); - return nullptr; - } - + assert(retType != TYP_UNKNOWN); assert(varTypeIsArithmetic(baseType)); - - NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); - CORINFO_InstructionSet hwIntrinsicIsa = HWIntrinsicInfo::lookupIsa(hwIntrinsic); - bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); - - assert((hwIntrinsic != NI_Illegal) && varTypeIsSIMD(simdType) && compIsaSupportedDebugOnly(hwIntrinsicIsa)); + assert(simdSize != 0); + assert(varTypeIsSIMD(getSIMDTypeForSize(simdSize))); + assert(SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType) == intrinsic); CORINFO_ARG_LIST_HANDLE argList = sig->args; var_types argType = TYP_UNKNOWN; @@ -276,7 +264,24 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, GenTree* op1 = nullptr; GenTree* op2 = nullptr; + SimdAsHWIntrinsicClassId classId = SimdAsHWIntrinsicInfo::lookupClassId(intrinsic); + bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); + #if defined(TARGET_XARCH) + if ((baseType != TYP_FLOAT) && !compOpportunisticallyDependsOn(InstructionSet_SSE2)) + { + // Vector, for everything but float, requires at least SSE2 + return nullptr; + } + else if (!compOpportunisticallyDependsOn(InstructionSet_SSE)) + { + // Vector requires at least SSE + return nullptr; + } + + // Vector, when 32-bytes, requires at least AVX2 + assert((classId != SimdAsHWIntrinsicClassId::VectorT256) || compIsaSupportedDebugOnly(InstructionSet_AVX2)); + CORINFO_CLASS_HANDLE argClass; switch (sig->numArgs) @@ -290,123 +295,350 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); - if (SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)) - { - GenTree* tmp = op2; - op2 = op1; - op1 = tmp; - } + assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); switch (intrinsic) { + case NI_VectorT128_GreaterThan: + case NI_VectorT128_GreaterThanOrEqual: case NI_VectorT128_LessThan: case NI_VectorT128_LessThanOrEqual: + case NI_VectorT256_GreaterThan: + case NI_VectorT256_GreaterThanOrEqual: case NI_VectorT256_LessThan: case NI_VectorT256_LessThanOrEqual: { - if (varTypeIsIntegral(baseType)) - { - GenTree* tmp = op2; - op2 = op1; - op1 = tmp; - } + return impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1, op2); + } - __fallthrough; + default: + { + // Some platforms warn about unhandled switch cases + // We handle it more generally via the assert and nullptr return below. + break; } + } + } + } +#endif - case NI_VectorT128_GreaterThan: + assert(!"Unexpected SimdAsHWIntrinsic"); + return nullptr; +} + +#if defined(TARGET_XARCH) +GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, + CORINFO_CLASS_HANDLE clsHnd, + var_types retType, + var_types baseType, + unsigned simdSize, + GenTree* op1, + GenTree* op2) +{ + assert(featureSIMD); + assert(retType != TYP_UNKNOWN); + assert(varTypeIsIntegral(baseType)); + assert(simdSize != 0); + assert(varTypeIsSIMD(getSIMDTypeForSize(simdSize))); + assert(op1 != nullptr); + assert(op2 != nullptr); + assert(!SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic)); + + bool isVectorT256 = (SimdAsHWIntrinsicInfo::lookupClassId(intrinsic) == SimdAsHWIntrinsicClassId::VectorT256); + + // Vector for the rel-ops covered here requires at least SSE2 + assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); + + // Vector, when 32-bytes, requires at least AVX2 + assert(!isVectorT256 || compIsaSupportedDebugOnly(InstructionSet_AVX2)); + + switch (intrinsic) + { + case NI_VectorT128_Equals: + case NI_VectorT256_Equals: + { + // These ones aren't "special", but they are used by the other + // relational operators and so are defined for convenience. + + NamedIntrinsic hwIntrinsic = NI_Illegal; + + if (isVectorT256 || (baseType != TYP_LONG)) + { + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + assert(hwIntrinsic != intrinsic); + } + else if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + hwIntrinsic = NI_SSE41_CompareEqual; + } + else + { + // There is no direct SSE2 support for comparing TYP_LONG vectors. + // These have to be implemented in terms of TYP_INT vector comparison operations. + // + // t = (op1 == op2) i.e. compare for equality as if op1 and op2 are Vector + // op1 = t + // op2 = Shuffle(t, (2, 3, 0, 1)) + // result = BitwiseAnd(op1, op2) + // + // Shuffle is meant to swap the comparison results of low-32-bits and high 32-bits of + // respective long elements. + + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, TYP_INT); + assert(hwIntrinsic != intrinsic); + + GenTree* t = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + + t = impCloneExpr(t, &op1, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector equality comparison")); + + op2 = gtNewSimdHWIntrinsicNode(retType, t, gtNewIconNode(SHUFFLE_ZWXY, TYP_INT), hwIntrinsic, baseType, + simdSize); + + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, TYP_INT); + assert(hwIntrinsic != NI_VectorT128_op_BitwiseAnd); + } + assert(hwIntrinsic != NI_Illegal); + + return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + } + + case NI_VectorT128_GreaterThanOrEqual: + case NI_VectorT128_LessThanOrEqual: + case NI_VectorT256_GreaterThanOrEqual: + case NI_VectorT256_LessThanOrEqual: + { + // There is no direct support for doing a combined comparison and equality for integral types. + // These have to be implemented by performing both halves and combining their results. + // + // op1Dup = op1 + // op2Dup = op2 + // + // op1 = GreaterThan(op1, op2) + // op2 = Equals(op1Dup, op2Dup) + // + // result = BitwiseOr(op1, op2) + // + // Where the GreaterThan(op1, op2) comparison could also be LessThan(op1, op2) + + GenTree* op1Dup; + op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector equality comparison")); + + GenTree* op2Dup; + op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector equality comparison")); + + switch (intrinsic) + { case NI_VectorT128_GreaterThanOrEqual: - case NI_VectorT256_GreaterThan: + { + intrinsic = NI_VectorT128_GreaterThan; + break; + } + + case NI_VectorT128_LessThanOrEqual: + { + intrinsic = NI_VectorT128_LessThan; + break; + } + case NI_VectorT256_GreaterThanOrEqual: { - if (varTypeIsUnsigned(baseType)) - { - // Vector, Vector, Vector and Vector: - // Hardware supports > for signed comparison. Therefore, to use it for - // comparing unsigned numbers, we subtract a constant from both the - // operands such that the result fits within the corresponding signed - // type. The resulting signed numbers are compared using signed comparison. - // - // Vector: constant to be subtracted is 2^7 - // Vector constant to be subtracted is 2^15 - // Vector constant to be subtracted is 2^31 - // Vector constant to be subtracted is 2^63 - // - // We need to treat op1 and op2 as signed for comparison purpose after - // the transformation. - - GenTree* constVal = nullptr; - - switch (baseType) - { - case TYP_UBYTE: - { - constVal = gtNewIconNode(0x80808080, TYP_INT); - baseType = TYP_BYTE; - break; - } - - case TYP_USHORT: - { - constVal = gtNewIconNode(0x80008000, TYP_INT); - baseType = TYP_SHORT; - break; - } - - case TYP_UINT: - { - constVal = gtNewIconNode(0x80000000, TYP_INT); - baseType = TYP_INT; - break; - } - - case TYP_ULONG: - { - constVal = gtNewLconNode(0x8000000000000000); - baseType = TYP_LONG; - break; - } - - default: - { - unreached(); - } - } - - GenTree* constVector; - GenTree* constVectorDup; - - constVector = - gtNewSIMDNode(retType, constVal, nullptr, SIMDIntrinsicInit, constVal->TypeGet(), simdSize); - constVector = impCloneExpr(constVector, &constVectorDup, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector unsigned comparison")); - - NamedIntrinsic subtractIntrinsic = (simdSize == 32) ? NI_AVX2_Subtract : NI_SSE2_Subtract; - - // op1 = op1 - constVector - op1 = - gtNewSimdHWIntrinsicNode(retType, op1, constVector, subtractIntrinsic, baseType, simdSize); - - // op2 = op2 - constVector - op2 = gtNewSimdHWIntrinsicNode(retType, op2, constVectorDup, subtractIntrinsic, baseType, - simdSize); - } + intrinsic = NI_VectorT256_GreaterThan; + break; + } - return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + case NI_VectorT256_LessThanOrEqual: + { + intrinsic = NI_VectorT256_LessThan; + break; } default: { - // Some platforms warn about unhandled switch cases - // We handle it more generally via the assert and return below. - break; + unreached(); } } + + op1 = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1, op2); + + intrinsic = isVectorT256 ? NI_VectorT256_Equals : NI_VectorT128_Equals; + op2 = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1Dup, op2Dup); + intrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseOr : NI_VectorT128_op_BitwiseOr; + + NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); } - } -#endif - assert(!"Unexpected SimdAsHWIntrinsic"); - return nullptr; + case NI_VectorT128_GreaterThan: + case NI_VectorT128_LessThan: + case NI_VectorT256_GreaterThan: + case NI_VectorT256_LessThan: + { + NamedIntrinsic hwIntrinsic = NI_Illegal; + + if (varTypeIsUnsigned(baseType)) + { + // Vector, Vector, Vector and Vector: + // Hardware supports > for signed comparison. Therefore, to use it for + // comparing unsigned numbers, we subtract a constant from both the + // operands such that the result fits within the corresponding signed + // type. The resulting signed numbers are compared using signed comparison. + // + // Vector: constant to be subtracted is 2^7 + // Vector constant to be subtracted is 2^15 + // Vector constant to be subtracted is 2^31 + // Vector constant to be subtracted is 2^63 + // + // We need to treat op1 and op2 as signed for comparison purpose after + // the transformation. + + GenTree* constVal = nullptr; + + switch (baseType) + { + case TYP_UBYTE: + { + constVal = gtNewIconNode(0x80808080, TYP_INT); + baseType = TYP_BYTE; + break; + } + + case TYP_USHORT: + { + constVal = gtNewIconNode(0x80008000, TYP_INT); + baseType = TYP_SHORT; + break; + } + + case TYP_UINT: + { + constVal = gtNewIconNode(0x80000000, TYP_INT); + baseType = TYP_INT; + break; + } + + case TYP_ULONG: + { + constVal = gtNewLconNode(0x8000000000000000); + baseType = TYP_LONG; + break; + } + + default: + { + unreached(); + } + } + + GenTree* constVector = gtNewSIMDNode(retType, constVal, nullptr, SIMDIntrinsicInit, + constVal->TypeGet(), simdSize); + + GenTree* constVectorDup; + constVector = impCloneExpr(constVector, &constVectorDup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector unsigned greater/less comparison")); + + NamedIntrinsic hwIntrinsic = isVectorT256 ? NI_AVX2_Subtract : NI_SSE2_Subtract; + + // op1 = op1 - constVector + op1 = gtNewSimdHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, baseType, simdSize); + + // op2 = op2 - constVector + op2 = gtNewSimdHWIntrinsicNode(retType, op2, constVectorDup, hwIntrinsic, baseType, simdSize); + } + + // This should have been mutated by the above path + assert(varTypeIsIntegral(baseType) && !varTypeIsUnsigned(baseType)); + + if (isVectorT256 || (baseType != TYP_LONG)) + { + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + assert(hwIntrinsic != intrinsic); + } + else if (compOpportunisticallyDependsOn(InstructionSet_SSE42)) + { + hwIntrinsic = NI_SSE42_CompareGreaterThan; + } + else + { + // There is no direct SSE2 support for comparing TYP_LONG vectors. + // These have to be implemented in terms of TYP_INT vector comparison operations. + // + // Let us consider the case of single long element comparison. + // Say op1 = (x1, y1) and op2 = (x2, y2) where x1, y1, x2, and y2 are 32-bit integers that comprise the + // longs op1 and op2. + // + // GreaterThan(op1, op2) can be expressed in terms of > relationship between 32-bit integers that + // comprise op1 and op2 as + // = (x1, y1) > (x2, y2) + // = (x1 > x2) || [(x1 == x2) && (y1 > y2)] - eq (1) + // + // op1Dup1 = op1 + // op1Dup2 = op1Dup1 + // op2Dup1 = op2 + // op2Dup2 = op2Dup1 + // + // t = (op1 > op2) - 32-bit signed comparison + // u = (op1Dup1 == op2Dup1) - 32-bit equality comparison + // v = (op1Dup2 > op2Dup2) - 32-bit unsigned comparison + // + // op1 = Shuffle(t, (3, 3, 1, 1)) - This corresponds to (x1 > x2) in eq(1) above + // v = Shuffle(v, (2, 2, 0, 0)) - This corresponds to (y1 > y2) in eq(1) above + // u = Shuffle(u, (3, 3, 1, 1)) - This corresponds to (x1 == x2) in eq(1) above + // op2 = BitwiseAnd(v, u) - This corresponds to [(x1 == x2) && (y1 > y2)] in eq(1) above + // + // result = BitwiseOr(op1, op2) + + GenTree* op1Dup1; + op1 = impCloneExpr(op1, &op1Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector greater/less than comparison")); + + GenTree* op1Dup2; + op1Dup1 = impCloneExpr(op1Dup1, &op1Dup2, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector greater/less than comparison")); + + GenTree* op2Dup1; + op2 = impCloneExpr(op2, &op2Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector greater/less than comparison")); + + GenTree* op2Dup2; + op2Dup1 = impCloneExpr(op2Dup1, &op2Dup2, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector greater/less than comparison")); + + NamedIntrinsic equHWIntrinsic = isVectorT256 ? NI_VectorT256_Equals : NI_VectorT128_Equals; + + GenTree* t = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, TYP_INT, simdSize, op1, op2); + GenTree* u = + impSimdAsHWIntrinsicRelOp(equHWIntrinsic, clsHnd, retType, TYP_INT, simdSize, op1Dup1, op2Dup1); + GenTree* v = + impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, TYP_UINT, simdSize, op1Dup2, op2Dup2); + + op1 = gtNewSimdHWIntrinsicNode(retType, t, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), hwIntrinsic, + baseType, simdSize); + + v = gtNewSimdHWIntrinsicNode(retType, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), hwIntrinsic, baseType, + simdSize); + u = gtNewSimdHWIntrinsicNode(retType, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), hwIntrinsic, baseType, + simdSize); + + hwIntrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseAnd : NI_VectorT128_op_BitwiseAnd; + op2 = gtNewSimdHWIntrinsicNode(retType, v, u, hwIntrinsic, baseType, simdSize); + + hwIntrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseOr : NI_VectorT128_op_BitwiseOr; + } + assert(hwIntrinsic != NI_Illegal); + + return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + } + + default: + { + assert(!"Unexpected SimdAsHWIntrinsic"); + return nullptr; + } + } } +#endif // TARGET_XARCH + #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/simdashwintrinsic.h b/src/coreclr/src/jit/simdashwintrinsic.h index 9fc39d7ab5573a..e5d951e38703d1 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.h +++ b/src/coreclr/src/jit/simdashwintrinsic.h @@ -22,14 +22,11 @@ enum class SimdAsHWIntrinsicFlag : unsigned int // Indicates compFloatingPointUsed does not need to be set. NoFloatingPointUsed = 0x1, - // Indicates the intrinsic requires special handling and can't be table driven. - NotTableDriven = 0x2, - // Indicates the intrinsic is for an instance method. - InstanceMethod = 0x04, + InstanceMethod = 0x02, // Indicates the operands should be swapped in importation. - NeedsOperandsSwapped = 0x08, + NeedsOperandsSwapped = 0x04, }; inline SimdAsHWIntrinsicFlag operator~(SimdAsHWIntrinsicFlag value) @@ -123,12 +120,6 @@ struct SimdAsHWIntrinsicInfo return (flags & SimdAsHWIntrinsicFlag::InstanceMethod) == SimdAsHWIntrinsicFlag::InstanceMethod; } - static bool IsTableDriven(NamedIntrinsic id) - { - SimdAsHWIntrinsicFlag flags = lookupFlags(id); - return (flags & SimdAsHWIntrinsicFlag::NotTableDriven) == SimdAsHWIntrinsicFlag::None; - } - static bool NeedsOperandsSwapped(NamedIntrinsic id) { SimdAsHWIntrinsicFlag flags = lookupFlags(id); diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index ead239560477a8..daceee2107f905 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -72,11 +72,11 @@ SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Ille // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector Intrinsics SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE_AndNot, NI_SSE2_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) -SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE41_CompareEqual, NI_SSE41_CompareEqual, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::NotTableDriven) -SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE_CompareGreaterThanOrEqual, NI_SSE2_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::NotTableDriven) -SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE_CompareLessThan, NI_SSE2_CompareLessThan}, SimdAsHWIntrinsicFlag::NotTableDriven) -SIMD_AS_HWINTRINSIC(VectorT128, LessThanOrEqual, 2, {NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE2_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE42_CompareGreaterThan, NI_SSE_CompareLessThanOrEqual, NI_SSE2_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::NotTableDriven) +SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_VectorT128_Equals, NI_VectorT128_Equals, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_SSE_CompareGreaterThanOrEqual, NI_SSE2_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_SSE_CompareLessThan, NI_SSE2_CompareLessThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, LessThanOrEqual, 2, {NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_SSE_CompareLessThanOrEqual, NI_SSE2_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Addition, 2, {NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE_Add, NI_SSE2_Add}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE_And, NI_SSE2_And}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE_Or, NI_SSE2_Or}, SimdAsHWIntrinsicFlag::None) @@ -92,10 +92,10 @@ SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_SSE2 // Vector Intrinsics SIMD_AS_HWINTRINSIC(VectorT256, AndNot, 2, {NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX_AndNot, NI_AVX_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) SIMD_AS_HWINTRINSIC(VectorT256, Equals, 2, {NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX_CompareEqual, NI_AVX_CompareEqual}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::NotTableDriven) -SIMD_AS_HWINTRINSIC(VectorT256, GreaterThanOrEqual, 2, {NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX_CompareGreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::NotTableDriven) -SIMD_AS_HWINTRINSIC(VectorT256, LessThan, 2, {NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX_CompareLessThan, NI_AVX_CompareLessThan}, SimdAsHWIntrinsicFlag::NotTableDriven) -SIMD_AS_HWINTRINSIC(VectorT256, LessThanOrEqual, 2, {NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX2_CompareGreaterThan, NI_AVX_CompareLessThanOrEqual, NI_AVX_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::NotTableDriven) +SIMD_AS_HWINTRINSIC(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, GreaterThanOrEqual, 2, {NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, LessThan, 2, {NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX_CompareLessThan, NI_AVX_CompareLessThan}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, LessThanOrEqual, 2, {NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_AVX_CompareLessThanOrEqual, NI_AVX_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_Addition, 2, {NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX_Add, NI_AVX_Add}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseAnd, 2, {NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX_And, NI_AVX_And}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseOr, 2, {NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX_Or, NI_AVX_Or}, SimdAsHWIntrinsicFlag::None) From 10e72356281c6a607686aa10aab91d0042f54739 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 25 Apr 2020 16:53:27 -0700 Subject: [PATCH 13/40] Ensure that GT_HWINTRINSIC fixes the type for certain TYP_SIMD8 --- src/coreclr/src/jit/rationalize.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/coreclr/src/jit/rationalize.cpp b/src/coreclr/src/jit/rationalize.cpp index 9f45bede3abbbf..9f17c9adc0813e 100644 --- a/src/coreclr/src/jit/rationalize.cpp +++ b/src/coreclr/src/jit/rationalize.cpp @@ -768,6 +768,28 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge break; #endif // FEATURE_SIMD +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + { + noway_assert(comp->supportSIMDTypes()); + + GenTreeHWIntrinsic* simdNode = node->AsHWIntrinsic(); + unsigned simdSize = simdNode->gtSIMDSize; + + // TODO-1stClassStructs: This should be handled more generally for enregistered or promoted + // structs that are passed or returned in a different register type than their enregistered + // type(s). + if (simdNode->gtType == TYP_I_IMPL && simdNode->gtSIMDSize == TARGET_POINTER_SIZE) + { + // This happens when it is consumed by a GT_RET_EXPR. + // It can only be a Vector2f or Vector2i. + assert(genTypeSize(simdNode->gtSIMDBaseType) == 4); + simdNode->gtType = TYP_SIMD8; + } + break; + } +#endif // FEATURE_HW_INTRINSICS + default: // These nodes should not be present in HIR. assert(!node->OperIs(GT_CMP, GT_SETCC, GT_JCC, GT_JCMP, GT_LOCKADD)); From 9022d94bf27a2343a0d4a9410fb4c2349d078e56 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 25 Apr 2020 17:16:40 -0700 Subject: [PATCH 14/40] Fixing the SimdAsHWIntrinsic Vector.op_Multiply support to match the GT_SIMD behavior --- src/coreclr/src/jit/simdashwintrinsic.cpp | 52 +++++++++++++++++++ .../src/jit/simdashwintrinsiclistxarch.h | 2 +- 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 3cdd2ddbf3df7f..0b9405b4afcb81 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -311,6 +311,58 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, return impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1, op2); } + case NI_VectorT128_op_Multiply: + { + assert(baseType == TYP_INT); + + NamedIntrinsic hwIntrinsic = NI_Illegal; + + if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + hwIntrinsic = NI_SSE41_MultiplyLow; + } + else + { + // op1Dup = op1 + GenTree* op1Dup; + op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector multiply")); + + // op2Dup = op2 + GenTree* op2Dup; + op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector multiply")); + + // op1 = Sse2.ShiftRightLogical128BitLane(op1, 4) + op1 = gtNewSimdHWIntrinsicNode(retType, op1, gtNewIconNode(4, TYP_INT), + NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize); + + // op2 = Sse2.ShiftRightLogical128BitLane(op1, 4) + op2 = gtNewSimdHWIntrinsicNode(retType, op2, gtNewIconNode(4, TYP_INT), + NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize); + + // op2 = Sse2.Multiply(op2.AsUInt64(), op1.AsUInt64()).AsInt32() + op2 = gtNewSimdHWIntrinsicNode(retType, op2, op1, NI_SSE2_Multiply, TYP_ULONG, simdSize); + + // op2 = Sse2.Shuffle(op2, (0, 0, 2, 0)) + op2 = gtNewSimdHWIntrinsicNode(retType, op2, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), + NI_SSE2_Shuffle, baseType, simdSize); + + // op1 = Sse2.Multiply(op1Dup.AsUInt64(), op2Dup.AsUInt64()).AsInt32() + op1 = gtNewSimdHWIntrinsicNode(retType, op1Dup, op2Dup, NI_SSE2_Multiply, TYP_ULONG, simdSize); + + // op1 = Sse2.Shuffle(op1, (0, 0, 2, 0)) + op1 = gtNewSimdHWIntrinsicNode(retType, op1, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), + NI_SSE2_Shuffle, baseType, simdSize); + + // result = Sse2.UnpackLow(op1, op2) + hwIntrinsic = NI_SSE2_UnpackLow; + } + assert(hwIntrinsic != NI_Illegal); + + return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + } + default: { // Some platforms warn about unhandled switch cases diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index daceee2107f905..a298a4bc1c3a1f 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -82,7 +82,7 @@ SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_SSE2 SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE_Or, NI_SSE2_Or}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_SSE2_Divide}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE_Xor, NI_SSE2_Xor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_SSE2_MultiplyLow, NI_Illegal, NI_SSE41_MultiplyLow, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_SSE2_Multiply}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_SSE2_MultiplyLow, NI_Illegal, NI_VectorT128_Multiply, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_SSE2_Multiply}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE_Subtract, NI_SSE2_Subtract}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* From 16048cdce2c82ecdb538759f95a609460f7c1686 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 25 Apr 2020 17:29:50 -0700 Subject: [PATCH 15/40] Fixing the SimdAsHWIntrinsic Vector2/3 Division to match the GT_SIMD behavior --- src/coreclr/src/jit/lowerxarch.cpp | 31 ++++++++++++------- src/coreclr/src/jit/simdashwintrinsic.cpp | 26 ++++++++++++++++ .../src/jit/simdashwintrinsiclistxarch.h | 6 ++-- 3 files changed, 48 insertions(+), 15 deletions(-) diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index 0eb278c13018c5..f85c2248d089bb 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -3030,7 +3030,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) GenTree* op2 = node->gtGetOp2(); GenTree* op3 = nullptr; - if (!HWIntrinsicInfo::SupportsContainment(intrinsicId) || (simdSize == 8) || (simdSize == 12)) + if (!HWIntrinsicInfo::SupportsContainment(intrinsicId)) { // AVX2 gather are not containable and always have constant IMM argument if (HWIntrinsicInfo::isAVX2GatherIntrinsic(intrinsicId)) @@ -3043,6 +3043,24 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) return; } + if (HWIntrinsicInfo::lookupCategory(intrinsicId) == HW_Category_IMM) + { + GenTree* lastOp = HWIntrinsicInfo::lookupLastOp(node); + assert(lastOp != nullptr); + + if (HWIntrinsicInfo::isImmOp(intrinsicId, lastOp) && lastOp->IsCnsIntOrI()) + { + MakeSrcContained(node, lastOp); + } + } + + if ((node->gtSIMDSize == 8) || (node->gtSIMDSize == 12)) + { + // TODO-XArch-CQ: Ideally we would key this off of the size containingNode + // expects vs the size node actually is or would be if spilled to the stack + return; + } + // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained const bool isCommutative = HWIntrinsicInfo::IsCommutative(intrinsicId); @@ -3507,17 +3525,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { unreached(); } - - if (HWIntrinsicInfo::lookupCategory(intrinsicId) == HW_Category_IMM) - { - GenTree* lastOp = HWIntrinsicInfo::lookupLastOp(node); - assert(lastOp != nullptr); - - if (HWIntrinsicInfo::isImmOp(intrinsicId, lastOp) && lastOp->IsCnsIntOrI()) - { - MakeSrcContained(node, lastOp); - } - } } } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 0b9405b4afcb81..5037622eb7a10f 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -299,6 +299,32 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, switch (intrinsic) { + case NI_Vector2_op_Division: + case NI_Vector3_op_Division: + { + // Vector2/3 div: since the top-most elements will be zero, we end up + // perfoming 0/0 which is a NAN. Therefore, post division we need to set the + // top-most elements to zero. This is achieved by left logical shift followed + // by right logical shift of the result. + + // These are 16 byte operations, so we subtract from 16 bytes, not the vector register length. + unsigned shiftCount = 16 - simdSize; + assert((shiftCount > 0) && (shiftCount <= 16)); + + // retNode = Sse.Divide(op1, op2); + GenTree* retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, NI_SSE_Divide, baseType, simdSize); + + // retNode = Sse.ShiftLeftLogical128BitLane(retNode.AsInt32(), shiftCount).AsSingle() + retNode = gtNewSimdHWIntrinsicNode(retType, retNode, gtNewIconNode(shiftCount, TYP_INT), + NI_SSE2_ShiftLeftLogical128BitLane, TYP_INT, simdSize); + + // retNode = Sse.ShiftRightLogical128BitLane(retNode.AsInt32(), shiftCount).AsSingle() + retNode = gtNewSimdHWIntrinsicNode(retType, retNode, gtNewIconNode(shiftCount, TYP_INT), + NI_SSE2_ShiftRightLogical128BitLane, TYP_INT, simdSize); + + return retNode; + } + case NI_VectorT128_GreaterThan: case NI_VectorT128_GreaterThanOrEqual: case NI_VectorT128_LessThan: diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index a298a4bc1c3a1f..934ac7aecb8ea9 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -32,7 +32,7 @@ SIMD_AS_HWINTRINSIC(Vector2, GreaterThanOrEqual, 2, {NI_Ille SIMD_AS_HWINTRINSIC(Vector2, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -47,7 +47,7 @@ SIMD_AS_HWINTRINSIC(Vector3, GreaterThanOrEqual, 2, {NI_Ille SIMD_AS_HWINTRINSIC(Vector3, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Subtract, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -82,7 +82,7 @@ SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_SSE2 SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE_Or, NI_SSE2_Or}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_SSE2_Divide}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_ExclusiveOr, 2, {NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE2_Xor, NI_SSE_Xor, NI_SSE2_Xor}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_SSE2_MultiplyLow, NI_Illegal, NI_VectorT128_Multiply, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_SSE2_Multiply}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_SSE2_MultiplyLow, NI_Illegal, NI_VectorT128_op_Multiply, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_SSE2_Multiply}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE2_Subtract, NI_SSE_Subtract, NI_SSE2_Subtract}, SimdAsHWIntrinsicFlag::None) // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* From 82d646ea2691950a4a2db6338538b8e6be1f7083 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 25 Apr 2020 20:48:37 -0700 Subject: [PATCH 16/40] Porting Abs, Min, and Max to use the SimdAsHWIntrinsic support --- src/coreclr/src/jit/compiler.h | 8 + src/coreclr/src/jit/hwintrinsiclistxarch.h | 2 +- src/coreclr/src/jit/simdashwintrinsic.cpp | 333 +++++++++++++++++- .../src/jit/simdashwintrinsiclistarm64.h | 12 + .../src/jit/simdashwintrinsiclistxarch.h | 15 + 5 files changed, 358 insertions(+), 12 deletions(-) diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index f30b233276a395..a7c04f956bed7d 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -3696,6 +3696,14 @@ class Compiler var_types baseType, unsigned simdSize); + GenTree* impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, + var_types retType, + var_types baseType, + unsigned simdSize, + GenTree* op1, + GenTree* op2, + GenTree* op3); + GenTree* impSpecialIntrinsic(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE method, diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index e1be06c90858e2..32eac239096ba2 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -475,7 +475,7 @@ HARDWARE_INTRINSIC(AVX, Xor, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX2 Intrinsics -HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, {INS_pabsb, INS_pabsb, INS_pabsw, INS_pabsw, INS_pabsd, INS_pabsd, INS_paddq, INS_paddq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, {INS_invalid, INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(AVX2, Add, 32, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2, AddSaturate, 32, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2, AlignRight, 32, 3, {INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 5037622eb7a10f..4ed29745c8d39f 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -220,6 +220,15 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, switch (sig->numArgs) { + case 1: + { + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + + assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); + return gtNewSimdHWIntrinsicNode(retType, op1, hwIntrinsic, baseType, simdSize); + } + case 2: { CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); @@ -231,9 +240,7 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, if (SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)) { - GenTree* tmp = op2; - op2 = op1; - op1 = tmp; + std::swap(op1, op2); } return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); @@ -260,6 +267,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, CORINFO_ARG_LIST_HANDLE argList = sig->args; var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass; GenTree* op1 = nullptr; GenTree* op2 = nullptr; @@ -268,6 +276,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, bool isInstanceMethod = SimdAsHWIntrinsicInfo::IsInstanceMethod(intrinsic); #if defined(TARGET_XARCH) + bool isVectorT256 = (SimdAsHWIntrinsicInfo::lookupClassId(intrinsic) == SimdAsHWIntrinsicClassId::VectorT256); + if ((baseType != TYP_FLOAT) && !compOpportunisticallyDependsOn(InstructionSet_SSE2)) { // Vector, for everything but float, requires at least SSE2 @@ -280,12 +290,113 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } // Vector, when 32-bytes, requires at least AVX2 - assert((classId != SimdAsHWIntrinsicClassId::VectorT256) || compIsaSupportedDebugOnly(InstructionSet_AVX2)); - - CORINFO_CLASS_HANDLE argClass; + assert(!isVectorT256 || compIsaSupportedDebugOnly(InstructionSet_AVX2)); +#endif switch (sig->numArgs) { + case 1: + { + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, argList, &argClass))); + op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); + + assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); + + switch (intrinsic) + { +#if defined(TARGET_XARCH) + case NI_Vector2_Abs: + case NI_Vector3_Abs: + case NI_Vector4_Abs: + case NI_VectorT128_Abs: + case NI_VectorT256_Abs: + { + if (varTypeIsFloating(baseType)) + { + // Abs(vf) = vf & new SIMDVector(0x7fffffff); + // Abs(vd) = vf & new SIMDVector(0x7fffffffffffffff); + GenTree* bitMask = nullptr; + + if (baseType == TYP_FLOAT) + { + static_assert_no_msg(sizeof(float) == sizeof(int)); + int mask = 0x7fffffff; + bitMask = gtNewDconNode(*((float*)&mask), TYP_FLOAT); + } + else + { + assert(baseType == TYP_DOUBLE); + static_assert_no_msg(sizeof(double) == sizeof(__int64)); + + __int64 mask = 0x7fffffffffffffffLL; + bitMask = gtNewDconNode(*((double*)&mask), TYP_DOUBLE); + } + assert(bitMask != nullptr); + + bitMask = gtNewSIMDNode(retType, bitMask, SIMDIntrinsicInit, baseType, simdSize); + + intrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseAnd : NI_VectorT128_op_BitwiseAnd; + intrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + + return gtNewSimdHWIntrinsicNode(retType, op1, bitMask, intrinsic, baseType, simdSize); + } + else if (varTypeIsUnsigned(baseType)) + { + return op1; + } + else if ((baseType != TYP_LONG) && compOpportunisticallyDependsOn(InstructionSet_SSSE3)) + { + return gtNewSimdHWIntrinsicNode(retType, op1, NI_SSSE3_Abs, baseType, simdSize); + } + else + { + GenTree* tmp; + NamedIntrinsic hwIntrinsic; + + GenTree* op1Dup1; + op1 = impCloneExpr(op1, &op1Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector absolute value")); + + GenTree* op1Dup2; + op1Dup1 = impCloneExpr(op1Dup1, &op1Dup2, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector absolute value")); + + // op1 = op1 < Zero + tmp = gtNewSIMDVectorZero(retType, baseType, simdSize); + hwIntrinsic = isVectorT256 ? NI_VectorT256_LessThan : NI_VectorT128_LessThan; + op1 = + impSimdAsHWIntrinsicRelOp(hwIntrinsic, clsHnd, retType, baseType, simdSize, op1, tmp); + + // tmp = Zero - op1Dup1 + tmp = gtNewSIMDVectorZero(retType, baseType, simdSize); + hwIntrinsic = isVectorT256 ? NI_AVX2_Subtract : NI_SSE2_Subtract; + tmp = gtNewSimdHWIntrinsicNode(retType, tmp, op1Dup1, hwIntrinsic, baseType, simdSize); + + // result = ConditionalSelect(op1, tmp, op1Dup2) + return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, tmp, op1Dup2); + } + break; + } +#elif defined(TARGET_ARM64) + case NI_VectorT128_Abs: + { + assert(varTypeIsUnsigned(baseType)); + return op1; + } +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + + default: + { + // Some platforms warn about unhandled switch cases + // We handle it more generally via the assert and nullptr return below. + break; + } + } + break; + } + case 2: { CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(argList); @@ -299,6 +410,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, switch (intrinsic) { +#if defined(TARGET_XARCH) case NI_Vector2_op_Division: case NI_Vector3_op_Division: { @@ -325,6 +437,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, return retNode; } + case NI_VectorT128_Equals: case NI_VectorT128_GreaterThan: case NI_VectorT128_GreaterThanOrEqual: case NI_VectorT128_LessThan: @@ -337,6 +450,108 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, return impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1, op2); } + case NI_VectorT256_Max: + case NI_VectorT256_Min: + { + assert((baseType == TYP_LONG) || (baseType == TYP_ULONG)); + intrinsic = (intrinsic == NI_VectorT256_Max) ? NI_VectorT128_Max : NI_VectorT128_Min; + __fallthrough; + } + + case NI_VectorT128_Max: + case NI_VectorT128_Min: + { + if ((baseType == TYP_BYTE) || (baseType == TYP_USHORT)) + { + GenTree* constVal = nullptr; + + NamedIntrinsic opIntrinsic; + NamedIntrinsic hwIntrinsic; + + switch (baseType) + { + case TYP_BYTE: + { + constVal = gtNewIconNode(0x80808080, TYP_INT); + opIntrinsic = NI_VectorT128_op_Subtraction; + baseType = TYP_UBYTE; + break; + } + + case TYP_USHORT: + { + constVal = gtNewIconNode(0x80008000, TYP_INT); + opIntrinsic = NI_VectorT128_op_Addition; + baseType = TYP_SHORT; + break; + } + + default: + { + unreached(); + } + } + + GenTree* constVector = + gtNewSIMDNode(retType, constVal, nullptr, SIMDIntrinsicInit, TYP_INT, simdSize); + + GenTree* constVectorDup; + constVector = impCloneExpr(constVector, &constVectorDup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector min/max")); + + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, baseType); + + // op1 = op1 - constVector + // -or- + // op1 = op1 + constVector + op1 = gtNewSimdHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, baseType, simdSize); + + // op2 = op2 - constVector + // -or- + // op2 = op2 + constVector + op2 = gtNewSimdHWIntrinsicNode(retType, op2, constVectorDup, hwIntrinsic, baseType, simdSize); + + // op1 = Max(op1, op2) + // -or- + // op1 = Min(op1, op2) + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + op1 = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + + // result = op1 + constVectorDup + // -or- + // result = op1 - constVectorDup + opIntrinsic = (opIntrinsic == NI_VectorT128_op_Subtraction) ? NI_VectorT128_op_Addition + : NI_VectorT128_op_Subtraction; + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, baseType); + return gtNewSimdHWIntrinsicNode(retType, op1, constVectorDup, hwIntrinsic, baseType, simdSize); + } + + GenTree* op1Dup; + op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector min/max")); + + GenTree* op2Dup; + op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector min/max")); + + if (intrinsic == NI_VectorT128_Max) + { + intrinsic = isVectorT256 ? NI_VectorT256_GreaterThan : NI_VectorT128_GreaterThan; + } + else + { + intrinsic = isVectorT256 ? NI_VectorT256_LessThan : NI_VectorT128_LessThan; + } + + // op1 = op1 > op2 + // -or- + // op1 = op1 < op2 + op1 = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1, op2); + + // result = ConditionalSelect(op1, op1Dup, op2Dup) + return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, op1Dup, op2Dup); + } + case NI_VectorT128_op_Multiply: { assert(baseType == TYP_INT); @@ -388,6 +603,36 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); } +#elif defined(TARGET_ARM64) + case NI_VectorT128_Max: + case NI_VectorT128_Min: + { + assert((baseType == TYP_LONG) || (baseType == TYP_ULONG)); + + NamedIntrinsic hwIntrinsic; + + GenTree* op1Dup; + op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector min/max")); + + GenTree* op2Dup; + op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector min/max")); + + intrinsic = (intrinsic == NI_VectorT128_Max) ? NI_VectorT128_GreaterThan : NI_VectorT128_LessThan; + + // op1 = op1 > op2 + // -or- + // op1 = op1 < op2 + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + op1 = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + + // result = ConditionalSelect(op1, op1Dup, op2Dup) + return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, op1Dup, op2Dup); + } +#else +#error Unsupported platform +#endif // TARGET_XARCH default: { @@ -396,14 +641,78 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, break; } } + break; } } -#endif assert(!"Unexpected SimdAsHWIntrinsic"); return nullptr; } +GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, + var_types retType, + var_types baseType, + unsigned simdSize, + GenTree* op1, + GenTree* op2, + GenTree* op3) +{ + assert(featureSIMD); + assert(retType != TYP_UNKNOWN); + assert(varTypeIsIntegral(baseType)); + assert(simdSize != 0); + assert(varTypeIsSIMD(getSIMDTypeForSize(simdSize))); + assert(op1 != nullptr); + assert(op2 != nullptr); + assert(op3 != nullptr); + +#if defined(TARGET_XARCH) + bool isVectorT256 = (simdSize == 32); + + // Vector for the rel-ops covered here requires at least SSE2 + assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); + + // Vector, when 32-bytes, requires at least AVX2 + assert(!isVectorT256 || compIsaSupportedDebugOnly(InstructionSet_AVX2)); + + if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + NamedIntrinsic hwIntrinsic = NI_SSE41_BlendVariable; + + if (isVectorT256) + { + hwIntrinsic = varTypeIsIntegral(baseType) ? NI_AVX2_BlendVariable : NI_AVX_BlendVariable; + } + + return gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, hwIntrinsic, baseType, simdSize); + } +#endif // TARGET_XARCH + + NamedIntrinsic hwIntrinsic; + + GenTree* op1Dup; + op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone for Vector conditional select")); + + // op2 = op2 & op1 + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, baseType); + op2 = gtNewSimdHWIntrinsicNode(retType, op2, op1, hwIntrinsic, baseType, simdSize); + + // op3 = op3 & ~op1Dup + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_AndNot, baseType); + + if (SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(hwIntrinsic)) + { + std::swap(op3, op1Dup); + } + + op3 = gtNewSimdHWIntrinsicNode(retType, op3, op1Dup, hwIntrinsic, baseType, simdSize); + + // result = op2 | op3 + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseOr, baseType); + return gtNewSimdHWIntrinsicNode(retType, op2, op3, hwIntrinsic, baseType, simdSize); +} + #if defined(TARGET_XARCH) GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, @@ -684,11 +993,11 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, op2Dup1 = impCloneExpr(op2Dup1, &op2Dup2, clsHnd, (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("Clone for Vector greater/less than comparison")); - NamedIntrinsic equHWIntrinsic = isVectorT256 ? NI_VectorT256_Equals : NI_VectorT128_Equals; + NamedIntrinsic ceqHWIntrinsic = isVectorT256 ? NI_VectorT256_Equals : NI_VectorT128_Equals; GenTree* t = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, TYP_INT, simdSize, op1, op2); GenTree* u = - impSimdAsHWIntrinsicRelOp(equHWIntrinsic, clsHnd, retType, TYP_INT, simdSize, op1Dup1, op2Dup1); + impSimdAsHWIntrinsicRelOp(ceqHWIntrinsic, clsHnd, retType, TYP_INT, simdSize, op1Dup1, op2Dup1); GenTree* v = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, TYP_UINT, simdSize, op1Dup2, op2Dup2); @@ -700,10 +1009,12 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, u = gtNewSimdHWIntrinsicNode(retType, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), hwIntrinsic, baseType, simdSize); - hwIntrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseAnd : NI_VectorT128_op_BitwiseAnd; + intrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseAnd : NI_VectorT128_op_BitwiseAnd; + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); op2 = gtNewSimdHWIntrinsicNode(retType, v, u, hwIntrinsic, baseType, simdSize); - hwIntrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseOr : NI_VectorT128_op_BitwiseOr; + intrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseOr : NI_VectorT128_op_BitwiseOr; + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); } assert(hwIntrinsic != NI_Illegal); diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h index 09c1d50aa8a1dc..38f3a219c2612c 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h @@ -26,11 +26,14 @@ // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector2 Intrinsics +SIMD_AS_HWINTRINSIC(Vector2, Abs, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -41,11 +44,14 @@ SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Ille // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector3 Intrinsics +SIMD_AS_HWINTRINSIC(Vector3, Abs, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -56,11 +62,14 @@ SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Ille // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics +SIMD_AS_HWINTRINSIC(Vector4, Abs, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Arm64_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -71,12 +80,15 @@ SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Ille // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector Intrinsics +SIMD_AS_HWINTRINSIC(VectorT128, Abs, 2, {NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Arm64_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_AdvSimd_Arm64_Abs}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual, NI_AdvSimd_CompareGreaterThanOrEqual, NI_AdvSimd_Arm64_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan, NI_AdvSimd_CompareLessThan, NI_AdvSimd_Arm64_CompareLessThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, LessThanOrEqual, 2, {NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual, NI_AdvSimd_CompareLessThanOrEqual, NI_AdvSimd_Arm64_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Max, 2, {NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_AdvSimd_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_AdvSimd_Max, NI_AdvSimd_Arm64_Max}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Min, 2, {NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_AdvSimd_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_AdvSimd_Min, NI_AdvSimd_Arm64_Min}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Addition, 2, {NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Add, NI_AdvSimd_Arm64_Add}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And, NI_AdvSimd_And}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or, NI_AdvSimd_Or}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index 934ac7aecb8ea9..7bf789819eb648 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -26,11 +26,14 @@ // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector2 Intrinsics +SIMD_AS_HWINTRINSIC(Vector2, Abs, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -41,11 +44,14 @@ SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Ille // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector3 Intrinsics +SIMD_AS_HWINTRINSIC(Vector3, Abs, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_op_Division, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -56,11 +62,14 @@ SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Ille // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics +SIMD_AS_HWINTRINSIC(Vector4, Abs, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Division, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Divide, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Multiply, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Multiply, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -71,12 +80,15 @@ SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Ille // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector Intrinsics +SIMD_AS_HWINTRINSIC(VectorT128, Abs, 2, {NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE_AndNot, NI_SSE2_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_VectorT128_Equals, NI_VectorT128_Equals, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThanOrEqual, 2, {NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_VectorT128_GreaterThanOrEqual, NI_SSE_CompareGreaterThanOrEqual, NI_SSE2_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, LessThan, 2, {NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_SSE2_CompareLessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_VectorT128_LessThan, NI_SSE_CompareLessThan, NI_SSE2_CompareLessThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, LessThanOrEqual, 2, {NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_VectorT128_LessThanOrEqual, NI_SSE_CompareLessThanOrEqual, NI_SSE2_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Max, 2, {NI_VectorT128_Max, NI_SSE2_Max, NI_SSE2_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_VectorT128_Max, NI_SSE_Max, NI_SSE2_Max}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Min, 2, {NI_VectorT128_Min, NI_SSE2_Min, NI_SSE2_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_VectorT128_Min, NI_SSE_Min, NI_SSE2_Min}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_Addition, 2, {NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE2_Add, NI_SSE_Add, NI_SSE2_Add}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseAnd, 2, {NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE2_And, NI_SSE_And, NI_SSE2_And}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, op_BitwiseOr, 2, {NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE2_Or, NI_SSE_Or, NI_SSE2_Or}, SimdAsHWIntrinsicFlag::None) @@ -90,12 +102,15 @@ SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_SSE2 // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector Intrinsics +SIMD_AS_HWINTRINSIC(VectorT256, Abs, 2, {NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, AndNot, 2, {NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX_AndNot, NI_AVX_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) SIMD_AS_HWINTRINSIC(VectorT256, Equals, 2, {NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX_CompareEqual, NI_AVX_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, GreaterThanOrEqual, 2, {NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_VectorT256_GreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual, NI_AVX_CompareGreaterThanOrEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, LessThan, 2, {NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX2_CompareLessThan, NI_VectorT256_LessThan, NI_AVX_CompareLessThan, NI_AVX_CompareLessThan}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, LessThanOrEqual, 2, {NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_VectorT256_LessThanOrEqual, NI_AVX_CompareLessThanOrEqual, NI_AVX_CompareLessThanOrEqual}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, Max, 2, {NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_AVX2_Max, NI_VectorT256_Max, NI_VectorT256_Max, NI_AVX_Max, NI_AVX_Max}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, Min, 2, {NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_AVX2_Min, NI_VectorT256_Min, NI_VectorT256_Min, NI_AVX_Min, NI_AVX_Min}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_Addition, 2, {NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX2_Add, NI_AVX_Add, NI_AVX_Add}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseAnd, 2, {NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX2_And, NI_AVX_And, NI_AVX_And}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, op_BitwiseOr, 2, {NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX2_Or, NI_AVX_Or, NI_AVX_Or}, SimdAsHWIntrinsicFlag::None) From ed399c45761ca8a7fa67a1a261e62ae80cb3d388 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 25 Apr 2020 23:56:21 -0700 Subject: [PATCH 17/40] Minor fixups to the SSE2 codepath --- src/coreclr/src/jit/simdashwintrinsic.cpp | 39 ++++++++++------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 4ed29745c8d39f..c5c9e545f29679 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -267,7 +267,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, CORINFO_ARG_LIST_HANDLE argList = sig->args; var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass; + CORINFO_CLASS_HANDLE argClass; GenTree* op1 = nullptr; GenTree* op2 = nullptr; @@ -364,8 +364,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, // op1 = op1 < Zero tmp = gtNewSIMDVectorZero(retType, baseType, simdSize); hwIntrinsic = isVectorT256 ? NI_VectorT256_LessThan : NI_VectorT128_LessThan; - op1 = - impSimdAsHWIntrinsicRelOp(hwIntrinsic, clsHnd, retType, baseType, simdSize, op1, tmp); + op1 = impSimdAsHWIntrinsicRelOp(hwIntrinsic, clsHnd, retType, baseType, simdSize, op1, tmp); // tmp = Zero - op1Dup1 tmp = gtNewSIMDVectorZero(retType, baseType, simdSize); @@ -701,7 +700,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, // op3 = op3 & ~op1Dup hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_AndNot, baseType); - if (SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(hwIntrinsic)) + if (SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(NI_VectorT128_AndNot)) { std::swap(op3, op1Dup); } @@ -774,15 +773,15 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, TYP_INT); assert(hwIntrinsic != intrinsic); - GenTree* t = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + GenTree* t = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, TYP_INT, simdSize); t = impCloneExpr(t, &op1, clsHnd, (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("Clone for Vector equality comparison")); - op2 = gtNewSimdHWIntrinsicNode(retType, t, gtNewIconNode(SHUFFLE_ZWXY, TYP_INT), hwIntrinsic, baseType, - simdSize); + op2 = gtNewSimdHWIntrinsicNode(retType, t, gtNewIconNode(SHUFFLE_ZWXY, TYP_INT), NI_SSE2_Shuffle, + TYP_INT, simdSize); - hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, TYP_INT); + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, baseType); assert(hwIntrinsic != NI_VectorT128_op_BitwiseAnd); } assert(hwIntrinsic != NI_Illegal); @@ -919,8 +918,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, } } - GenTree* constVector = gtNewSIMDNode(retType, constVal, nullptr, SIMDIntrinsicInit, - constVal->TypeGet(), simdSize); + GenTree* constVector = + gtNewSIMDNode(retType, constVal, nullptr, SIMDIntrinsicInit, constVal->TypeGet(), simdSize); GenTree* constVectorDup; constVector = impCloneExpr(constVector, &constVectorDup, clsHnd, (unsigned)CHECK_SPILL_ALL, @@ -993,28 +992,24 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, op2Dup1 = impCloneExpr(op2Dup1, &op2Dup2, clsHnd, (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("Clone for Vector greater/less than comparison")); - NamedIntrinsic ceqHWIntrinsic = isVectorT256 ? NI_VectorT256_Equals : NI_VectorT128_Equals; - GenTree* t = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, TYP_INT, simdSize, op1, op2); - GenTree* u = - impSimdAsHWIntrinsicRelOp(ceqHWIntrinsic, clsHnd, retType, TYP_INT, simdSize, op1Dup1, op2Dup1); + GenTree* u = impSimdAsHWIntrinsicRelOp(NI_VectorT128_Equals, clsHnd, retType, TYP_INT, simdSize, + op1Dup1, op2Dup1); GenTree* v = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, TYP_UINT, simdSize, op1Dup2, op2Dup2); - op1 = gtNewSimdHWIntrinsicNode(retType, t, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), hwIntrinsic, - baseType, simdSize); + op1 = gtNewSimdHWIntrinsicNode(retType, t, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle, + TYP_INT, simdSize); - v = gtNewSimdHWIntrinsicNode(retType, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), hwIntrinsic, baseType, + v = gtNewSimdHWIntrinsicNode(retType, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_SSE2_Shuffle, TYP_INT, simdSize); - u = gtNewSimdHWIntrinsicNode(retType, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), hwIntrinsic, baseType, + u = gtNewSimdHWIntrinsicNode(retType, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle, TYP_INT, simdSize); - intrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseAnd : NI_VectorT128_op_BitwiseAnd; - hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, baseType); op2 = gtNewSimdHWIntrinsicNode(retType, v, u, hwIntrinsic, baseType, simdSize); - intrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseOr : NI_VectorT128_op_BitwiseOr; - hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseOr, baseType); } assert(hwIntrinsic != NI_Illegal); From 1e57f6e0339ca934cd9dfffbdf2f705d4b1f6304 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 26 Apr 2020 07:27:22 -0700 Subject: [PATCH 18/40] Applying formatting patch --- src/coreclr/src/jit/hwintrinsic.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/src/jit/hwintrinsic.h b/src/coreclr/src/jit/hwintrinsic.h index f3eae092d02ee8..6d400f64d971e2 100644 --- a/src/coreclr/src/jit/hwintrinsic.h +++ b/src/coreclr/src/jit/hwintrinsic.h @@ -415,7 +415,6 @@ struct HWIntrinsicInfo assert(id != NI_AVX_CompareNotGreaterThanOrEqual); return static_cast(FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling); - } case NI_SSE_CompareNotLessThanOrEqual: From aaa996208640d20375ba6f2e6630ea0921f773fc Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 26 Apr 2020 11:16:07 -0700 Subject: [PATCH 19/40] Fixing a check in lowering --- src/coreclr/src/jit/lower.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index 4479a84bf8944d..03f0692cbb826a 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -1331,7 +1331,7 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) LclVarDsc* varDsc = &comp->lvaTable[varNum]; type = varDsc->lvType; } - else if (arg->OperIs(GT_SIMD, GT_HWINTRINSIC)) + else if (arg->OperIsSIMD()) { assert((arg->AsSIMD()->gtSIMDSize == 16) || (arg->AsSIMD()->gtSIMDSize == 12)); @@ -1340,6 +1340,15 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) type = TYP_SIMD12; } } + else if (arg->OperIsHWIntrinsic()) + { + assert((arg->AsHWIntrinsic()->gtSIMDSize == 16) || (arg->AsHWIntrinsic()->gtSIMDSize == 12)); + + if (arg->AsHWIntrinsic()->gtSIMDSize == 12) + { + type = TYP_SIMD12; + } + } } #elif defined(TARGET_AMD64) // TYP_SIMD8 parameters that are passed as longs From d22ecabd0795bc8b524ad8eeb6eb8f469a5cc85c Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 27 Apr 2020 16:10:18 -0700 Subject: [PATCH 20/40] Mark SimdAsHWIntrinsic nodes so we can lookup the correct handle --- src/coreclr/src/jit/compiler.h | 17 +++++ src/coreclr/src/jit/gentree.cpp | 15 +++- src/coreclr/src/jit/gentree.h | 5 +- src/coreclr/src/jit/simdashwintrinsic.cpp | 92 ++++++++++++----------- 4 files changed, 82 insertions(+), 47 deletions(-) diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index a7c04f956bed7d..96eb1f20d01225 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -2610,6 +2610,23 @@ class Compiler NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size); + + GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode( + var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size) + { + GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, baseType, size); + node->gtFlags |= GTF_SIMDASHW_OP; + return node; + } + + GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode( + var_types type, GenTree* op1, GenTree* op2, NamedIntrinsic hwIntrinsicID, var_types baseType, unsigned size) + { + GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, op1, op2, hwIntrinsicID, baseType, size); + node->gtFlags |= GTF_SIMDASHW_OP; + return node; + } + GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID); GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index bd8e7e422ffe93..35b68545e1de85 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -17197,6 +17197,12 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree) if (varTypeIsSIMD(tree)) { structHnd = gtGetStructHandleForSIMD(tree->gtType, TYP_FLOAT); +#ifdef FEATURE_HW_INTRINSICS + if (structHnd == NO_CLASS_HANDLE) + { + structHnd = gtGetStructHandleForHWSIMD(tree->gtType, TYP_FLOAT); + } +#endif } #endif break; @@ -17263,7 +17269,14 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree) #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: - structHnd = gtGetStructHandleForHWSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType); + if ((tree->gtFlags & GTF_SIMDASHW_OP) != 0) + { + structHnd = gtGetStructHandleForSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType); + } + else + { + structHnd = gtGetStructHandleForHWSIMD(tree->gtType, tree->AsHWIntrinsic()->gtSIMDBaseType); + } break; #endif break; diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 79275a3c135115..39d35abc4bd696 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -745,7 +745,7 @@ struct GenTree #define GTF_UNSIGNED 0x00008000 // With GT_CAST: the source operand is an unsigned type // With operators: the specified node is an unsigned operator - // + // #define GTF_LATE_ARG 0x00010000 // The specified node is evaluated to a temp in the arg list, and this temp is added to gtCallLateArgs. #define GTF_SPILL 0x00020000 // Needs to be spilled here @@ -913,6 +913,9 @@ struct GenTree #define GTF_SIMD12_OP 0x80000000 // GT_SIMD -- Indicates that the operands need to be handled as SIMD12 // even if they have been retyped as SIMD16. +#define GTF_SIMDASHW_OP 0x80000000 // GT_HWINTRINSIC -- Indicates that the structHandle should be gotten from gtGetStructHandleForSIMD + // rarther than from gtGetStructHandleForHWSIMD. + //--------------------------------------------------------------------- // // GenTree flags stored in gtDebugFlags. diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index c5c9e545f29679..9c33561476d41c 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -226,7 +226,7 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, op1 = getArgForHWIntrinsic(argType, argClass, isInstanceMethod); assert(!SimdAsHWIntrinsicInfo::NeedsOperandsSwapped(intrinsic)); - return gtNewSimdHWIntrinsicNode(retType, op1, hwIntrinsic, baseType, simdSize); + return gtNewSimdAsHWIntrinsicNode(retType, op1, hwIntrinsic, baseType, simdSize); } case 2: @@ -243,7 +243,7 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, std::swap(op1, op2); } - return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); } } @@ -338,7 +338,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, intrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseAnd : NI_VectorT128_op_BitwiseAnd; intrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); - return gtNewSimdHWIntrinsicNode(retType, op1, bitMask, intrinsic, baseType, simdSize); + return gtNewSimdAsHWIntrinsicNode(retType, op1, bitMask, intrinsic, baseType, simdSize); } else if (varTypeIsUnsigned(baseType)) { @@ -346,7 +346,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, } else if ((baseType != TYP_LONG) && compOpportunisticallyDependsOn(InstructionSet_SSSE3)) { - return gtNewSimdHWIntrinsicNode(retType, op1, NI_SSSE3_Abs, baseType, simdSize); + return gtNewSimdAsHWIntrinsicNode(retType, op1, NI_SSSE3_Abs, baseType, simdSize); } else { @@ -369,7 +369,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, // tmp = Zero - op1Dup1 tmp = gtNewSIMDVectorZero(retType, baseType, simdSize); hwIntrinsic = isVectorT256 ? NI_AVX2_Subtract : NI_SSE2_Subtract; - tmp = gtNewSimdHWIntrinsicNode(retType, tmp, op1Dup1, hwIntrinsic, baseType, simdSize); + tmp = gtNewSimdAsHWIntrinsicNode(retType, tmp, op1Dup1, hwIntrinsic, baseType, simdSize); // result = ConditionalSelect(op1, tmp, op1Dup2) return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, tmp, op1Dup2); @@ -423,15 +423,15 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, assert((shiftCount > 0) && (shiftCount <= 16)); // retNode = Sse.Divide(op1, op2); - GenTree* retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, NI_SSE_Divide, baseType, simdSize); + GenTree* retNode = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, NI_SSE_Divide, baseType, simdSize); // retNode = Sse.ShiftLeftLogical128BitLane(retNode.AsInt32(), shiftCount).AsSingle() - retNode = gtNewSimdHWIntrinsicNode(retType, retNode, gtNewIconNode(shiftCount, TYP_INT), - NI_SSE2_ShiftLeftLogical128BitLane, TYP_INT, simdSize); + retNode = gtNewSimdAsHWIntrinsicNode(retType, retNode, gtNewIconNode(shiftCount, TYP_INT), + NI_SSE2_ShiftLeftLogical128BitLane, TYP_INT, simdSize); // retNode = Sse.ShiftRightLogical128BitLane(retNode.AsInt32(), shiftCount).AsSingle() - retNode = gtNewSimdHWIntrinsicNode(retType, retNode, gtNewIconNode(shiftCount, TYP_INT), - NI_SSE2_ShiftRightLogical128BitLane, TYP_INT, simdSize); + retNode = gtNewSimdAsHWIntrinsicNode(retType, retNode, gtNewIconNode(shiftCount, TYP_INT), + NI_SSE2_ShiftRightLogical128BitLane, TYP_INT, simdSize); return retNode; } @@ -503,18 +503,18 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, // op1 = op1 - constVector // -or- // op1 = op1 + constVector - op1 = gtNewSimdHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, baseType, simdSize); + op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, baseType, simdSize); // op2 = op2 - constVector // -or- // op2 = op2 + constVector - op2 = gtNewSimdHWIntrinsicNode(retType, op2, constVectorDup, hwIntrinsic, baseType, simdSize); + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, constVectorDup, hwIntrinsic, baseType, simdSize); // op1 = Max(op1, op2) // -or- // op1 = Min(op1, op2) hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); - op1 = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); // result = op1 + constVectorDup // -or- @@ -522,7 +522,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, opIntrinsic = (opIntrinsic == NI_VectorT128_op_Subtraction) ? NI_VectorT128_op_Addition : NI_VectorT128_op_Subtraction; hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, baseType); - return gtNewSimdHWIntrinsicNode(retType, op1, constVectorDup, hwIntrinsic, baseType, simdSize); + return gtNewSimdAsHWIntrinsicNode(retType, op1, constVectorDup, hwIntrinsic, baseType, + simdSize); } GenTree* op1Dup; @@ -574,33 +575,34 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, nullptr DEBUGARG("Clone for Vector multiply")); // op1 = Sse2.ShiftRightLogical128BitLane(op1, 4) - op1 = gtNewSimdHWIntrinsicNode(retType, op1, gtNewIconNode(4, TYP_INT), - NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize); + op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, gtNewIconNode(4, TYP_INT), + NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize); // op2 = Sse2.ShiftRightLogical128BitLane(op1, 4) - op2 = gtNewSimdHWIntrinsicNode(retType, op2, gtNewIconNode(4, TYP_INT), - NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize); + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, gtNewIconNode(4, TYP_INT), + NI_SSE2_ShiftRightLogical128BitLane, baseType, simdSize); // op2 = Sse2.Multiply(op2.AsUInt64(), op1.AsUInt64()).AsInt32() - op2 = gtNewSimdHWIntrinsicNode(retType, op2, op1, NI_SSE2_Multiply, TYP_ULONG, simdSize); + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, op1, NI_SSE2_Multiply, TYP_ULONG, simdSize); // op2 = Sse2.Shuffle(op2, (0, 0, 2, 0)) - op2 = gtNewSimdHWIntrinsicNode(retType, op2, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), - NI_SSE2_Shuffle, baseType, simdSize); + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), + NI_SSE2_Shuffle, baseType, simdSize); // op1 = Sse2.Multiply(op1Dup.AsUInt64(), op2Dup.AsUInt64()).AsInt32() - op1 = gtNewSimdHWIntrinsicNode(retType, op1Dup, op2Dup, NI_SSE2_Multiply, TYP_ULONG, simdSize); + op1 = + gtNewSimdAsHWIntrinsicNode(retType, op1Dup, op2Dup, NI_SSE2_Multiply, TYP_ULONG, simdSize); // op1 = Sse2.Shuffle(op1, (0, 0, 2, 0)) - op1 = gtNewSimdHWIntrinsicNode(retType, op1, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), - NI_SSE2_Shuffle, baseType, simdSize); + op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), + NI_SSE2_Shuffle, baseType, simdSize); // result = Sse2.UnpackLow(op1, op2) hwIntrinsic = NI_SSE2_UnpackLow; } assert(hwIntrinsic != NI_Illegal); - return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); } #elif defined(TARGET_ARM64) case NI_VectorT128_Max: @@ -624,7 +626,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, // -or- // op1 = op1 < op2 hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); - op1 = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); // result = ConditionalSelect(op1, op1Dup, op2Dup) return impSimdAsHWIntrinsicCndSel(clsHnd, retType, baseType, simdSize, op1, op1Dup, op2Dup); @@ -683,7 +685,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, hwIntrinsic = varTypeIsIntegral(baseType) ? NI_AVX2_BlendVariable : NI_AVX_BlendVariable; } - return gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, hwIntrinsic, baseType, simdSize); + return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, op3, hwIntrinsic, baseType, simdSize); } #endif // TARGET_XARCH @@ -695,7 +697,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, // op2 = op2 & op1 hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, baseType); - op2 = gtNewSimdHWIntrinsicNode(retType, op2, op1, hwIntrinsic, baseType, simdSize); + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, op1, hwIntrinsic, baseType, simdSize); // op3 = op3 & ~op1Dup hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_AndNot, baseType); @@ -705,11 +707,11 @@ GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, std::swap(op3, op1Dup); } - op3 = gtNewSimdHWIntrinsicNode(retType, op3, op1Dup, hwIntrinsic, baseType, simdSize); + op3 = gtNewSimdAsHWIntrinsicNode(retType, op3, op1Dup, hwIntrinsic, baseType, simdSize); // result = op2 | op3 hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseOr, baseType); - return gtNewSimdHWIntrinsicNode(retType, op2, op3, hwIntrinsic, baseType, simdSize); + return gtNewSimdAsHWIntrinsicNode(retType, op2, op3, hwIntrinsic, baseType, simdSize); } #if defined(TARGET_XARCH) @@ -773,20 +775,20 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, TYP_INT); assert(hwIntrinsic != intrinsic); - GenTree* t = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, TYP_INT, simdSize); + GenTree* t = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, TYP_INT, simdSize); t = impCloneExpr(t, &op1, clsHnd, (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("Clone for Vector equality comparison")); - op2 = gtNewSimdHWIntrinsicNode(retType, t, gtNewIconNode(SHUFFLE_ZWXY, TYP_INT), NI_SSE2_Shuffle, - TYP_INT, simdSize); + op2 = gtNewSimdAsHWIntrinsicNode(retType, t, gtNewIconNode(SHUFFLE_ZWXY, TYP_INT), NI_SSE2_Shuffle, + TYP_INT, simdSize); hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, baseType); assert(hwIntrinsic != NI_VectorT128_op_BitwiseAnd); } assert(hwIntrinsic != NI_Illegal); - return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); } case NI_VectorT128_GreaterThanOrEqual: @@ -854,7 +856,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, intrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseOr : NI_VectorT128_op_BitwiseOr; NamedIntrinsic hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); - return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); } case NI_VectorT128_GreaterThan: @@ -928,10 +930,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, NamedIntrinsic hwIntrinsic = isVectorT256 ? NI_AVX2_Subtract : NI_SSE2_Subtract; // op1 = op1 - constVector - op1 = gtNewSimdHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, baseType, simdSize); + op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, baseType, simdSize); // op2 = op2 - constVector - op2 = gtNewSimdHWIntrinsicNode(retType, op2, constVectorDup, hwIntrinsic, baseType, simdSize); + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, constVectorDup, hwIntrinsic, baseType, simdSize); } // This should have been mutated by the above path @@ -998,22 +1000,22 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, GenTree* v = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, TYP_UINT, simdSize, op1Dup2, op2Dup2); - op1 = gtNewSimdHWIntrinsicNode(retType, t, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle, - TYP_INT, simdSize); + op1 = gtNewSimdAsHWIntrinsicNode(retType, t, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle, + TYP_INT, simdSize); - v = gtNewSimdHWIntrinsicNode(retType, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_SSE2_Shuffle, TYP_INT, - simdSize); - u = gtNewSimdHWIntrinsicNode(retType, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle, TYP_INT, - simdSize); + v = gtNewSimdAsHWIntrinsicNode(retType, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_SSE2_Shuffle, + TYP_INT, simdSize); + u = gtNewSimdAsHWIntrinsicNode(retType, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle, + TYP_INT, simdSize); hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, baseType); - op2 = gtNewSimdHWIntrinsicNode(retType, v, u, hwIntrinsic, baseType, simdSize); + op2 = gtNewSimdAsHWIntrinsicNode(retType, v, u, hwIntrinsic, baseType, simdSize); hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseOr, baseType); } assert(hwIntrinsic != NI_Illegal); - return gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); + return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); } default: From 5830101072445079e4e0121fc4b314ebd245cc9d Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 27 Apr 2020 16:33:28 -0700 Subject: [PATCH 21/40] Adding the 3 operand overload for gtNewSimdAsHWIntrinsicNode --- src/coreclr/src/jit/compiler.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index 96eb1f20d01225..e591737ce5b70f 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -2627,6 +2627,19 @@ class Compiler return node; } + GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(var_types type, + GenTree* op1, + GenTree* op2, + GenTree* op3, + NamedIntrinsic hwIntrinsicID, + var_types baseType, + unsigned size) + { + GenTreeHWIntrinsic* node = gtNewSimdHWIntrinsicNode(type, op1, op2, op3, hwIntrinsicID, baseType, size); + node->gtFlags |= GTF_SIMDASHW_OP; + return node; + } + GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID); GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, From 4f02f57c5b66bbcfd928dc7f7b12fc4a63922394 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 27 Apr 2020 20:53:06 -0700 Subject: [PATCH 22/40] Fixing BuildHWIntrinsic to properly take RMW into account --- .../src/jit/hwintrinsiccodegenxarch.cpp | 13 +-- src/coreclr/src/jit/hwintrinsiclistxarch.h | 2 +- src/coreclr/src/jit/lsraxarch.cpp | 87 +++++++++---------- 3 files changed, 46 insertions(+), 56 deletions(-) diff --git a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp index e8d5ddc82886d6..4b3c9101261c7c 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp @@ -1942,16 +1942,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - if (op3->isContained() || op3->isUsedFromSpillTemp()) - { - // 213 form: op1 = (op2 * op1) + [op3] - - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); - - isCommutative = !copiesUpperBits; - } - else if (op2->isContained() || op2->isUsedFromSpillTemp()) + if (op2->isContained() || op2->isUsedFromSpillTemp()) { // 132 form: op1 = (op1 * op3) + [op2] @@ -1971,7 +1962,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { - // 213 form: op1 = (op2 * op1) + op3 + // 213 form: op1 = (op2 * op1) + [op3] op1Reg = op1->GetRegNum(); op2Reg = op2->GetRegNum(); diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index 32eac239096ba2..d14c5f1a81d9b0 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -496,7 +496,7 @@ HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int16, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int32, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int64, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX2, GatherVector128, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX2, GatherVector128, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX2, GatherVector256, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX2, GatherMaskVector128, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX2, GatherMaskVector256, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment) diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index 524de87f521a16..cba6cb88ffd44e 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -2458,8 +2458,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) assert(isRMW); // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0 - srcCount += BuildOperandUses(op1); - srcCount += BuildDelayFreeUses(op2); + tgtPrefUse = BuildUse(op1); + + srcCount += 1; + srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2); srcCount += BuildDelayFreeUses(op3, RBM_XMM0); buildUses = false; @@ -2493,7 +2495,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) assert(isRMW); // CRC32 may operate over "byte" but on x86 only RBM_BYTE_REGS can be used as byte registers. - srcCount += BuildOperandUses(op1); + tgtPrefUse = BuildUse(op1); + + srcCount += 1; srcCount += BuildDelayFreeUses(op2, varTypeIsByte(baseType) ? allByteRegs() : RBM_NONE); buildUses = false; @@ -2539,29 +2543,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - if (op3->isContained()) - { - // 213 form: op1 = (op2 * op1) + [op3] - - if (copiesUpperBits) - { - tgtPrefUse = BuildUse(op1); - - srcCount += 1; - srcCount += BuildDelayFreeUses(op2); - } - else - { - // op1 and op2 are commutative, so don't - // set either to be tgtPref or delayFree - - srcCount += BuildOperandUses(op1); - srcCount += BuildOperandUses(op2); - } - - srcCount += BuildOperandUses(op3); - } - else if (op2->isContained()) + if (op2->isContained()) { // 132 form: op1 = (op1 * op3) + [op2] @@ -2583,25 +2565,22 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else { - // 213 form: op1 = (op2 * op1) + op3 + // 213 form: op1 = (op2 * op1) + [op3] + + tgtPrefUse = BuildUse(op1); + srcCount += 1; if (copiesUpperBits) { - tgtPrefUse = BuildUse(op1); - - srcCount += 1; srcCount += BuildDelayFreeUses(op2); } else { - // op1 and op2 are commutative, so don't - // set either to be tgtPref or delayFree - - srcCount += BuildOperandUses(op1); - srcCount += BuildOperandUses(op2); + tgtPrefUse2 = BuildUse(op2); + srcCount += 1; } - srcCount += BuildDelayFreeUses(op3); + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3); } buildUses = false; @@ -2612,9 +2591,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) case NI_AVX2_GatherVector256: { assert(numArgs == 3); + assert(!isRMW); + // Any pair of the index, mask, or destination registers should be different srcCount += BuildOperandUses(op1); srcCount += BuildDelayFreeUses(op2); + srcCount += BuildDelayFreeUses(op3); // get a tmp register for mask that will be cleared by gather instructions buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs()); @@ -2628,15 +2610,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) case NI_AVX2_GatherMaskVector256: { assert(numArgs == 5); + assert(!isRMW); + assert(intrinsicTree->gtGetOp1()->OperIsList()); + + GenTreeArgList* argList = intrinsicTree->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest(); + GenTree* op4 = argList->Current(); + GenTree* op5 = argList->Rest()->Current(); + // Any pair of the index, mask, or destination registers should be different srcCount += BuildOperandUses(op1); - srcCount += BuildOperandUses(op2); + srcCount += BuildDelayFreeUses(op2); srcCount += BuildDelayFreeUses(op3); - - assert(intrinsicTree->gtGetOp1()->OperIsList()); - GenTreeArgList* argList = intrinsicTree->gtGetOp1()->AsArgList(); - GenTree* op4 = argList->Rest()->Rest()->Rest()->Current(); srcCount += BuildDelayFreeUses(op4); + srcCount += BuildDelayFreeUses(op5); // get a tmp register for mask that will be cleared by gather instructions buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs()); @@ -2661,6 +2647,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { srcCount += BuildAddrUses(op1); } + else if (isRMW && !op1->isContained()) + { + tgtPrefUse = BuildUse(op1); + srcCount += 1; + } else { srcCount += BuildOperandUses(op1); @@ -2672,9 +2663,17 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { srcCount += BuildAddrUses(op2->gtGetOp1()); } - else if (isRMW) + else if (isRMW && !op2->isContained()) { - srcCount += BuildDelayFreeUses(op2); + if (HWIntrinsicInfo::IsCommutative(intrinsicId)) + { + tgtPrefUse2 = BuildUse(op2); + srcCount += 1; + } + else + { + srcCount += BuildDelayFreeUses(op2); + } } else { @@ -2683,7 +2682,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) if (op3 != nullptr) { - srcCount += (isRMW) ? BuildDelayFreeUses(op3) : BuildOperandUses(op3); + srcCount += isRMW ? BuildDelayFreeUses(op3) : BuildOperandUses(op3); } } } From 39f5086f4cf60f4342ce7d7b8aec34acb85c6c29 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 28 Apr 2020 12:22:52 -0700 Subject: [PATCH 23/40] Fixing the rationalize handling of GT_HWINTRINSIC to account for SIMD vs non-SIMD nodes --- src/coreclr/src/jit/rationalize.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/coreclr/src/jit/rationalize.cpp b/src/coreclr/src/jit/rationalize.cpp index 9f17c9adc0813e..07bdd29f834111 100644 --- a/src/coreclr/src/jit/rationalize.cpp +++ b/src/coreclr/src/jit/rationalize.cpp @@ -771,20 +771,24 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: { - noway_assert(comp->supportSIMDTypes()); + GenTreeHWIntrinsic* hwIntrinsicNode = node->AsHWIntrinsic(); + + if (!hwIntrinsicNode->isSIMD()) + { + break; + } - GenTreeHWIntrinsic* simdNode = node->AsHWIntrinsic(); - unsigned simdSize = simdNode->gtSIMDSize; + noway_assert(comp->supportSIMDTypes()); // TODO-1stClassStructs: This should be handled more generally for enregistered or promoted // structs that are passed or returned in a different register type than their enregistered // type(s). - if (simdNode->gtType == TYP_I_IMPL && simdNode->gtSIMDSize == TARGET_POINTER_SIZE) + if ((hwIntrinsicNode->gtType == TYP_I_IMPL) && (hwIntrinsicNode->gtSIMDSize == TARGET_POINTER_SIZE)) { // This happens when it is consumed by a GT_RET_EXPR. // It can only be a Vector2f or Vector2i. - assert(genTypeSize(simdNode->gtSIMDBaseType) == 4); - simdNode->gtType = TYP_SIMD8; + assert(genTypeSize(hwIntrinsicNode->gtSIMDBaseType) == 4); + hwIntrinsicNode->gtType = TYP_SIMD8; } break; } From fe969fda135303fdfe84bc0c9e57ef012fc00b4a Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 28 Apr 2020 12:24:08 -0700 Subject: [PATCH 24/40] Fixing the importer to not create SIMD nodes if featureSIMD is disabled --- src/coreclr/src/jit/importer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index e05b7397cbeb8e..de4697e4dd69bf 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -4143,7 +4143,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, case NI_System_MathF_FusedMultiplyAdd: { #ifdef TARGET_XARCH - if (compExactlyDependsOn(InstructionSet_FMA)) + if (compExactlyDependsOn(InstructionSet_FMA) && supportSIMDTypes()) { assert(varTypeIsFloating(callType)); From bf291c5a210c8d5f7ace054d710ecbf2cd968bf4 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 28 Apr 2020 13:08:31 -0700 Subject: [PATCH 25/40] Fixing the SSE4.2 implementation of CompareLessThan --- src/coreclr/src/jit/hwintrinsiclistxarch.h | 1 + src/coreclr/src/jit/lowerxarch.cpp | 3 +++ src/coreclr/src/jit/simdashwintrinsic.cpp | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index d14c5f1a81d9b0..64cd58d88ea5b3 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -384,6 +384,7 @@ HARDWARE_INTRINSIC(SSE41_X64, Insert, // SSE42 Intrinsics HARDWARE_INTRINSIC(SSE42, Crc32, 0, 2, {INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) HARDWARE_INTRINSIC(SSE42, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE42, CompareLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags diff --git a/src/coreclr/src/jit/lowerxarch.cpp b/src/coreclr/src/jit/lowerxarch.cpp index f85c2248d089bb..ad0426bbb620da 100644 --- a/src/coreclr/src/jit/lowerxarch.cpp +++ b/src/coreclr/src/jit/lowerxarch.cpp @@ -933,6 +933,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { if (node->gtSIMDBaseType != TYP_DOUBLE) { + assert(varTypeIsIntegral(node->gtSIMDBaseType)); break; } @@ -960,12 +961,14 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } case NI_SSE2_CompareLessThan: + case NI_SSE42_CompareLessThan: case NI_AVX2_CompareLessThan: { if (node->gtSIMDBaseType == TYP_DOUBLE) { break; } + assert(varTypeIsIntegral(node->gtSIMDBaseType)); // this isn't actually supported in hardware so we need to swap the operands around std::swap(node->gtOp1, node->gtOp2); diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 9c33561476d41c..6fe81026b17a40 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -946,7 +946,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, } else if (compOpportunisticallyDependsOn(InstructionSet_SSE42)) { - hwIntrinsic = NI_SSE42_CompareGreaterThan; + hwIntrinsic = + (intrinsic == NI_VectorT128_GreaterThan) ? NI_SSE42_CompareGreaterThan : NI_SSE42_CompareLessThan; } else { From ef4a77ce39022ec716758097f87b29eba3e7981f Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 29 Apr 2020 12:33:09 -0700 Subject: [PATCH 26/40] Preserve the base type for subtraction/addition operations --- src/coreclr/src/jit/simdashwintrinsic.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 6fe81026b17a40..8f7b808d912597 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -462,7 +462,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { if ((baseType == TYP_BYTE) || (baseType == TYP_USHORT)) { - GenTree* constVal = nullptr; + GenTree* constVal = nullptr; + var_types opType = baseType; NamedIntrinsic opIntrinsic; NamedIntrinsic hwIntrinsic; @@ -498,17 +499,17 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, constVector = impCloneExpr(constVector, &constVectorDup, clsHnd, (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("Clone for Vector min/max")); - hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, baseType); + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, opType); // op1 = op1 - constVector // -or- // op1 = op1 + constVector - op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, baseType, simdSize); + op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, opType, simdSize); // op2 = op2 - constVector // -or- // op2 = op2 + constVector - op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, constVectorDup, hwIntrinsic, baseType, simdSize); + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, constVectorDup, hwIntrinsic, opType, simdSize); // op1 = Max(op1, op2) // -or- @@ -521,8 +522,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, // result = op1 - constVectorDup opIntrinsic = (opIntrinsic == NI_VectorT128_op_Subtraction) ? NI_VectorT128_op_Addition : NI_VectorT128_op_Subtraction; - hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, baseType); - return gtNewSimdAsHWIntrinsicNode(retType, op1, constVectorDup, hwIntrinsic, baseType, + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, opType); + return gtNewSimdAsHWIntrinsicNode(retType, op1, constVectorDup, hwIntrinsic, opType, simdSize); } @@ -882,7 +883,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, // We need to treat op1 and op2 as signed for comparison purpose after // the transformation. - GenTree* constVal = nullptr; + GenTree* constVal = nullptr; + var_types opType = baseType; switch (baseType) { @@ -930,10 +932,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, NamedIntrinsic hwIntrinsic = isVectorT256 ? NI_AVX2_Subtract : NI_SSE2_Subtract; // op1 = op1 - constVector - op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, baseType, simdSize); + op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, opType, simdSize); // op2 = op2 - constVector - op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, constVectorDup, hwIntrinsic, baseType, simdSize); + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, constVectorDup, hwIntrinsic, opType, simdSize); } // This should have been mutated by the above path From e1b5acb5c477836c70979250b4e6c9d210b438ac Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 29 Apr 2020 14:32:10 -0700 Subject: [PATCH 27/40] Applying formatting patch --- src/coreclr/src/jit/simdashwintrinsic.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 8f7b808d912597..c9917888dbace5 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -523,8 +523,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, opIntrinsic = (opIntrinsic == NI_VectorT128_op_Subtraction) ? NI_VectorT128_op_Addition : NI_VectorT128_op_Subtraction; hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, opType); - return gtNewSimdAsHWIntrinsicNode(retType, op1, constVectorDup, hwIntrinsic, opType, - simdSize); + return gtNewSimdAsHWIntrinsicNode(retType, op1, constVectorDup, hwIntrinsic, opType, simdSize); } GenTree* op1Dup; From 1b837a638eff15f104926399f5f46a57566db4f1 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 29 Apr 2020 16:48:56 -0700 Subject: [PATCH 28/40] Responding to PR feedback --- src/coreclr/src/jit/lower.cpp | 16 ++++------------ src/coreclr/src/jit/lsraxarch.cpp | 9 ++++++--- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index 03f0692cbb826a..2228170bb557ba 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -1331,20 +1331,12 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) LclVarDsc* varDsc = &comp->lvaTable[varNum]; type = varDsc->lvType; } - else if (arg->OperIsSIMD()) + else if (arg->OperIs(GT_SIMD, GT_HWINTRINSIC)) { - assert((arg->AsSIMD()->gtSIMDSize == 16) || (arg->AsSIMD()->gtSIMDSize == 12)); + GenTreeJitIntrinsic* jitIntrinsic = reinterpret_cast(arg); + assert((jitIntrinsic->gtSIMDSize == 12) || (jitIntrinsic->gtSIMDSize == 16)); - if (arg->AsSIMD()->gtSIMDSize == 12) - { - type = TYP_SIMD12; - } - } - else if (arg->OperIsHWIntrinsic()) - { - assert((arg->AsHWIntrinsic()->gtSIMDSize == 16) || (arg->AsHWIntrinsic()->gtSIMDSize == 12)); - - if (arg->AsHWIntrinsic()->gtSIMDSize == 12) + if (jitIntrinsic->gtSIMDSize == 12) { type = TYP_SIMD12; } diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index cba6cb88ffd44e..f784b554538716 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -2596,7 +2596,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) // Any pair of the index, mask, or destination registers should be different srcCount += BuildOperandUses(op1); srcCount += BuildDelayFreeUses(op2); - srcCount += BuildDelayFreeUses(op3); + + // op3 should always be contained + assert(op3->isContained()); // get a tmp register for mask that will be cleared by gather instructions buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs()); @@ -2615,14 +2617,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) GenTreeArgList* argList = intrinsicTree->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest(); GenTree* op4 = argList->Current(); - GenTree* op5 = argList->Rest()->Current(); // Any pair of the index, mask, or destination registers should be different srcCount += BuildOperandUses(op1); srcCount += BuildDelayFreeUses(op2); srcCount += BuildDelayFreeUses(op3); srcCount += BuildDelayFreeUses(op4); - srcCount += BuildDelayFreeUses(op5); + + // op5 should always be contained + assert(argList->Rest()->Current()->isContained()); // get a tmp register for mask that will be cleared by gather instructions buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs()); From ac867ec9b326328a24f9685761e82845d274aa1d Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 29 Apr 2020 19:34:22 -0700 Subject: [PATCH 29/40] Fixing a copy/paste error under reinterpret cast --- src/coreclr/src/jit/lower.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index 2228170bb557ba..f54b4574ef7d4c 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -1333,7 +1333,7 @@ void Lowering::LowerArg(GenTreeCall* call, GenTree** ppArg) } else if (arg->OperIs(GT_SIMD, GT_HWINTRINSIC)) { - GenTreeJitIntrinsic* jitIntrinsic = reinterpret_cast(arg); + GenTreeJitIntrinsic* jitIntrinsic = reinterpret_cast(arg); assert((jitIntrinsic->gtSIMDSize == 12) || (jitIntrinsic->gtSIMDSize == 16)); if (jitIntrinsic->gtSIMDSize == 12) From 7482c498e295883bbb1354517d05ecaed3125fb3 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 1 May 2020 10:36:36 -0700 Subject: [PATCH 30/40] Fixing abs to expect 1 argument --- src/coreclr/src/jit/simdashwintrinsiclistarm64.h | 8 ++++---- src/coreclr/src/jit/simdashwintrinsiclistxarch.h | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h index 38f3a219c2612c..276270cf31f265 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h @@ -26,7 +26,7 @@ // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector2 Intrinsics -SIMD_AS_HWINTRINSIC(Vector2, Abs, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -44,7 +44,7 @@ SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Ille // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector3 Intrinsics -SIMD_AS_HWINTRINSIC(Vector3, Abs, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -62,7 +62,7 @@ SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Ille // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics -SIMD_AS_HWINTRINSIC(Vector4, Abs, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -80,7 +80,7 @@ SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Ille // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector Intrinsics -SIMD_AS_HWINTRINSIC(VectorT128, Abs, 2, {NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Arm64_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_AdvSimd_Arm64_Abs}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Abs, 1, {NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_VectorT128_Abs, NI_AdvSimd_Arm64_Abs, NI_VectorT128_Abs, NI_AdvSimd_Abs, NI_AdvSimd_Arm64_Abs}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear, NI_AdvSimd_BitwiseClear}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_Arm64_CompareEqual, NI_AdvSimd_CompareEqual, NI_AdvSimd_Arm64_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan, NI_AdvSimd_CompareGreaterThan, NI_AdvSimd_Arm64_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index 7bf789819eb648..f2efd040f59319 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -26,7 +26,7 @@ // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector2 Intrinsics -SIMD_AS_HWINTRINSIC(Vector2, Abs, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -44,7 +44,7 @@ SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Ille // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector3 Intrinsics -SIMD_AS_HWINTRINSIC(Vector3, Abs, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -62,7 +62,7 @@ SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Ille // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics -SIMD_AS_HWINTRINSIC(Vector4, Abs, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -80,7 +80,7 @@ SIMD_AS_HWINTRINSIC(Vector4, op_Subtraction, 2, {NI_Ille // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector Intrinsics -SIMD_AS_HWINTRINSIC(VectorT128, Abs, 2, {NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT128, Abs, 1, {NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs, NI_VectorT128_Abs}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, AndNot, 2, {NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE2_AndNot, NI_SSE_AndNot, NI_SSE2_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) SIMD_AS_HWINTRINSIC(VectorT128, Equals, 2, {NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_SSE2_CompareEqual, NI_VectorT128_Equals, NI_VectorT128_Equals, NI_SSE_CompareEqual, NI_SSE2_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT128, GreaterThan, 2, {NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_SSE2_CompareGreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_VectorT128_GreaterThan, NI_SSE_CompareGreaterThan, NI_SSE2_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) @@ -102,7 +102,7 @@ SIMD_AS_HWINTRINSIC(VectorT128, op_Subtraction, 2, {NI_SSE2 // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector Intrinsics -SIMD_AS_HWINTRINSIC(VectorT256, Abs, 2, {NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs}, SimdAsHWIntrinsicFlag::None) +SIMD_AS_HWINTRINSIC(VectorT256, Abs, 1, {NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_AVX2_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs, NI_VectorT256_Abs}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, AndNot, 2, {NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX2_AndNot, NI_AVX_AndNot, NI_AVX_AndNot}, SimdAsHWIntrinsicFlag::NeedsOperandsSwapped) SIMD_AS_HWINTRINSIC(VectorT256, Equals, 2, {NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX2_CompareEqual, NI_AVX_CompareEqual, NI_AVX_CompareEqual}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(VectorT256, GreaterThan, 2, {NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX2_CompareGreaterThan, NI_VectorT256_GreaterThan, NI_AVX_CompareGreaterThan, NI_AVX_CompareGreaterThan}, SimdAsHWIntrinsicFlag::None) From c7ef80d711576009d16bf6568230d2cb7edc4ca3 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 1 May 2020 10:36:54 -0700 Subject: [PATCH 31/40] Adding method comment headers that were missing --- src/coreclr/src/jit/simdashwintrinsic.cpp | 58 +++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index c9917888dbace5..827b30c87197cf 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -143,6 +143,19 @@ SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(const char* classN return SimdAsHWIntrinsicClassId::Unknown; } +//------------------------------------------------------------------------ +// impSimdAsIntrinsic: Import a SIMD intrinsic as a GT_HWINTRINSIC node if possible +// +// Arguments: +// intrinsic -- id of the intrinsic function. +// clsHnd -- class handle containing the intrinsic function. +// method -- method handle of the intrinsic function. +// sig -- signature of the intrinsic call +// mustExpand -- true if the intrinsic must return a GenTree*; otherwise, false +// +// Return Value: +// The GT_HWINTRINSIC node, or nullptr if not a supported intrinsic +// GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, CORINFO_METHOD_HANDLE method, @@ -251,6 +264,21 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, return nullptr; } +//------------------------------------------------------------------------ +// impSimdAsHWIntrinsicSpecial: Import a SIMD intrinsic as a GT_HWINTRINSIC node if possible +// This method handles cases which cannot be table driven +// +// Arguments: +// intrinsic -- id of the intrinsic function. +// clsHnd -- class handle containing the intrinsic function. +// sig -- signature of the intrinsic call +// retType -- the return type of the intrinsic call +// baseType -- the base type of SIMD type of the intrinsic +// simdSize -- the size of the SIMD type of the intrinsic +// +// Return Value: +// The GT_HWINTRINSIC node, or nullptr if not a supported intrinsic +// GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, CORINFO_SIG_INFO* sig, @@ -650,6 +678,21 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, return nullptr; } +//------------------------------------------------------------------------ +// impSimdAsHWIntrinsicCndSel: Import a SIMD conditional select intrinsic +// +// Arguments: +// clsHnd -- class handle containing the intrinsic function. +// retType -- the return type of the intrinsic call +// baseType -- the base type of SIMD type of the intrinsic +// simdSize -- the size of the SIMD type of the intrinsic +// op1 -- the first operand of the intrinsic +// op2 -- the second operand of the intrinsic +// op3 -- the third operand of the intrinsic +// +// Return Value: +// The GT_HWINTRINSIC node representing the conditional select +// GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, var_types retType, var_types baseType, @@ -715,6 +758,21 @@ GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, } #if defined(TARGET_XARCH) +//------------------------------------------------------------------------ +// impSimdAsHWIntrinsicRelOp: Import a SIMD relational operator intrinsic +// +// Arguments: +// intrinsic -- id of the intrinsic function. +// clsHnd -- class handle containing the intrinsic function. +// retType -- the return type of the intrinsic call +// baseType -- the base type of SIMD type of the intrinsic +// simdSize -- the size of the SIMD type of the intrinsic +// op1 -- the first operand of the intrinsic +// op2 -- the second operand of the intrinsic +// +// Return Value: +// The GT_HWINTRINSIC node representing the relational operator +// GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, var_types retType, From e569d252ea061cdfa8c5bc9f7e7a80ff8da33e7d Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 1 May 2020 11:35:51 -0700 Subject: [PATCH 32/40] Removing unused table entries from SimdAsHWIntrinsic for Vector2/3/4 --- src/coreclr/src/jit/simdashwintrinsiclistarm64.h | 15 --------------- src/coreclr/src/jit/simdashwintrinsiclistxarch.h | 15 --------------- 2 files changed, 30 deletions(-) diff --git a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h index 276270cf31f265..cfd47939cf3dc6 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistarm64.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistarm64.h @@ -27,11 +27,6 @@ // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector2 Intrinsics SIMD_AS_HWINTRINSIC(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -45,11 +40,6 @@ SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Ille // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector3 Intrinsics SIMD_AS_HWINTRINSIC(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -63,11 +53,6 @@ SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Ille // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics SIMD_AS_HWINTRINSIC(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_AdvSimd_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) diff --git a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h index f2efd040f59319..8f2ac6264041cd 100644 --- a/src/coreclr/src/jit/simdashwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/simdashwintrinsiclistxarch.h @@ -27,11 +27,6 @@ // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector2 Intrinsics SIMD_AS_HWINTRINSIC(Vector2, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector2_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector2, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector2, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -45,11 +40,6 @@ SIMD_AS_HWINTRINSIC(Vector2, op_Subtraction, 2, {NI_Ille // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector3 Intrinsics SIMD_AS_HWINTRINSIC(Vector3, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector3_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector3, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector3, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) @@ -63,11 +53,6 @@ SIMD_AS_HWINTRINSIC(Vector3, op_Subtraction, 2, {NI_Ille // ************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************* // Vector4 Intrinsics SIMD_AS_HWINTRINSIC(Vector4, Abs, 1, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Vector4_Abs, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, Equals, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, GreaterThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, GreaterThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareGreaterThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, LessThan, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThan, NI_Illegal}, SimdAsHWIntrinsicFlag::None) -SIMD_AS_HWINTRINSIC(Vector4, LessThanOrEqual, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_CompareLessThanOrEqual, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, Max, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Max, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, Min, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Min, NI_Illegal}, SimdAsHWIntrinsicFlag::None) SIMD_AS_HWINTRINSIC(Vector4, op_Addition, 2, {NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_Illegal, NI_SSE_Add, NI_Illegal}, SimdAsHWIntrinsicFlag::None) From 991a838295872d2d3791c31f5616221c17f065fe Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 1 May 2020 11:38:45 -0700 Subject: [PATCH 33/40] Ensure we catch intrinsics from the Vector static class --- src/coreclr/src/jit/simdashwintrinsic.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 827b30c87197cf..27ac92daa86db6 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -127,7 +127,7 @@ SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(const char* classN { return SimdAsHWIntrinsicClassId::Vector4; } - if (strcmp(className, "Vector`1") == 0) + if ((strcmp(className, "Vector") == 0) || (strcmp(className, "Vector`1") == 0)) { #if defined(TARGET_XARCH) if (sizeOfVectorT == 32) From 017fe54fa5e90355a573d1cd4f47ac10c2c1994a Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 1 May 2020 12:47:08 -0700 Subject: [PATCH 34/40] Fixing SSSE3_Abs and AVX2_Abs to get the base type from the first argument --- src/coreclr/src/jit/hwintrinsiclistxarch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/jit/hwintrinsiclistxarch.h b/src/coreclr/src/jit/hwintrinsiclistxarch.h index 64cd58d88ea5b3..eb3b5d31cd2f71 100644 --- a/src/coreclr/src/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/src/jit/hwintrinsiclistxarch.h @@ -318,7 +318,7 @@ HARDWARE_INTRINSIC(SSE3, MoveLowAndDuplicate, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SSSE3 Intrinsics -HARDWARE_INTRINSIC(SSSE3, Abs, 16, 1, {INS_invalid, INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSSE3, Abs, 16, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(SSSE3, AlignRight, 16, 3, {INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(SSSE3, HorizontalAdd, 16, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_invalid, INS_phaddd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSSE3, HorizontalAddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) @@ -476,7 +476,7 @@ HARDWARE_INTRINSIC(AVX, Xor, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX2 Intrinsics -HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, {INS_invalid, INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2, Add, 32, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2, AddSaturate, 32, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2, AlignRight, 32, 3, {INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) From 06bec3efcb9eeb27bcec1c9ef20be3258d308530 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 1 May 2020 14:18:34 -0700 Subject: [PATCH 35/40] Ensure we adjust the class handle used for intrinsics from the Vector static class --- src/coreclr/src/jit/simdashwintrinsic.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 27ac92daa86db6..16584d929f1dd9 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -208,6 +208,13 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, compFloatingPointUsed = true; } + if ((clsHnd == m_simdHandleCache->SIMDVectorHandle) && (sig->numArgs != 0)) + { + // We need to fixup the clsHnd in the case we are an intrinsic on Vector + // The first argument will be the appropriate Vector handle to use + clsHnd = info.compCompHnd->getArgClass(sig, sig->args); + } + if (hwIntrinsic == intrinsic) { // The SIMD intrinsic requires special handling outside the normal code path From bd6e87a821d8ae1d8d3272bab99b91db67064f47 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 1 May 2020 15:36:41 -0700 Subject: [PATCH 36/40] Ensure we populate the handle cache for clsHnd even if it isn't used --- src/coreclr/src/jit/simdashwintrinsic.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 16584d929f1dd9..9487ebedc20394 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -175,6 +175,18 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, var_types simdType = TYP_UNKNOWN; unsigned simdSize = 0; + // We want to resolve and populate the handle cache for this type even + // if it isn't the basis for anything carried on the node. + baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize); + assert(simdSize != 0); + + if ((clsHnd == m_simdHandleCache->SIMDVectorHandle) && (sig->numArgs != 0)) + { + // We need to fixup the clsHnd in the case we are an intrinsic on Vector + // The first argument will be the appropriate Vector handle to use + clsHnd = info.compCompHnd->getArgClass(sig, sig->args); + } + if (retType == TYP_STRUCT) { baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &simdSize); @@ -208,13 +220,6 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, compFloatingPointUsed = true; } - if ((clsHnd == m_simdHandleCache->SIMDVectorHandle) && (sig->numArgs != 0)) - { - // We need to fixup the clsHnd in the case we are an intrinsic on Vector - // The first argument will be the appropriate Vector handle to use - clsHnd = info.compCompHnd->getArgClass(sig, sig->args); - } - if (hwIntrinsic == intrinsic) { // The SIMD intrinsic requires special handling outside the normal code path From 341aac8c9725f508c9978c0128f73a3bcdb0663f Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 1 May 2020 17:01:37 -0700 Subject: [PATCH 37/40] Fix where we grab the base type from for the static Vector class --- src/coreclr/src/jit/simdashwintrinsic.cpp | 31 +++++++++++++++-------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 9487ebedc20394..c36e7d8c68ebee 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -180,25 +180,35 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize); assert(simdSize != 0); - if ((clsHnd == m_simdHandleCache->SIMDVectorHandle) && (sig->numArgs != 0)) - { - // We need to fixup the clsHnd in the case we are an intrinsic on Vector - // The first argument will be the appropriate Vector handle to use - clsHnd = info.compCompHnd->getArgClass(sig, sig->args); - } + CORINFO_CLASS_HANDLE argClass; if (retType == TYP_STRUCT) { baseType = getBaseTypeAndSizeOfSIMDType(sig->retTypeSigClass, &simdSize); - simdType = getSIMDTypeForSize(simdSize); - retType = simdType; + retType = getSIMDTypeForSize(simdSize); } else { - assert(!"Unexpected SimdAsHWIntrinsic"); - return nullptr; + argClass = info.compCompHnd->getArgClass(sig, sig->args); + baseType = getBaseTypeAndSizeOfSIMDType(argClass, &simdSize); } + if ((clsHnd == m_simdHandleCache->SIMDVectorHandle) && (sig->numArgs != 0)) + { + // We need to fixup the clsHnd in the case we are an intrinsic on Vector + // The first argument will be the appropriate Vector handle to use + clsHnd = info.compCompHnd->getArgClass(sig, sig->args); + + // We also need to adjust the baseType as some methods on Vector return + // a type different than the operation we need to perform. An example + // is LessThan or Equals which takes double but returns long. This is + // unlike the counterparts on Vector which take a return the same type. + baseType = getBaseTypeAndSizeOfSIMDType(clsHnd, &simdSize); + } + + simdType = getSIMDTypeForSize(simdSize); + assert(varTypeIsSIMD(simdType)); + if (!varTypeIsArithmetic(baseType)) { // We only support intrinsics on the 10 primitive arithmetic types @@ -236,7 +246,6 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_ARG_LIST_HANDLE argList = sig->args; var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass; GenTree* op1 = nullptr; GenTree* op2 = nullptr; From b6494eee4f1e1ab7125aa2e6cbe5d09f7f893f5e Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 1 May 2020 20:37:55 -0700 Subject: [PATCH 38/40] Fixing ConditionalSelect and improving the messages used for impCloneExpr in SimdAsHWIntrinsic --- src/coreclr/src/jit/simdashwintrinsic.cpp | 68 +++++++++++------------ 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index c36e7d8c68ebee..9cf9e9d34c0b8b 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -404,11 +404,11 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, GenTree* op1Dup1; op1 = impCloneExpr(op1, &op1Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector absolute value")); + nullptr DEBUGARG("Clone op1 for Vector.Abs")); GenTree* op1Dup2; op1Dup1 = impCloneExpr(op1Dup1, &op1Dup2, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector absolute value")); + nullptr DEBUGARG("Clone op1 for Vector.Abs")); // op1 = op1 < Zero tmp = gtNewSIMDVectorZero(retType, baseType, simdSize); @@ -498,16 +498,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, return impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1, op2); } - case NI_VectorT256_Max: - case NI_VectorT256_Min: - { - assert((baseType == TYP_LONG) || (baseType == TYP_ULONG)); - intrinsic = (intrinsic == NI_VectorT256_Max) ? NI_VectorT128_Max : NI_VectorT128_Min; - __fallthrough; - } - case NI_VectorT128_Max: case NI_VectorT128_Min: + case NI_VectorT256_Max: + case NI_VectorT256_Min: { if ((baseType == TYP_BYTE) || (baseType == TYP_USHORT)) { @@ -546,7 +540,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, GenTree* constVectorDup; constVector = impCloneExpr(constVector, &constVectorDup, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector min/max")); + nullptr DEBUGARG("Clone constVector for Vector.Max/Min")); hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, opType); @@ -577,13 +571,13 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, GenTree* op1Dup; op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector min/max")); + nullptr DEBUGARG("Clone op1 for Vector.Max/Min")); GenTree* op2Dup; op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector min/max")); + nullptr DEBUGARG("Clone op2 for Vector.Max/Min")); - if (intrinsic == NI_VectorT128_Max) + if ((intrinsic == NI_VectorT128_Max) || (intrinsic == NI_VectorT256_Max)) { intrinsic = isVectorT256 ? NI_VectorT256_GreaterThan : NI_VectorT128_GreaterThan; } @@ -616,12 +610,12 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, // op1Dup = op1 GenTree* op1Dup; op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector multiply")); + nullptr DEBUGARG("Clone op1 for Vector.Multiply")); // op2Dup = op2 GenTree* op2Dup; op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector multiply")); + nullptr DEBUGARG("Clone op2 for Vector.Multiply")); // op1 = Sse2.ShiftRightLogical128BitLane(op1, 4) op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, gtNewIconNode(4, TYP_INT), @@ -663,11 +657,11 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, GenTree* op1Dup; op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector min/max")); + nullptr DEBUGARG("Clone op1 for Vector.Max/Min")); GenTree* op2Dup; op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector min/max")); + nullptr DEBUGARG("Clone op2 for Vector.Max/Min")); intrinsic = (intrinsic == NI_VectorT128_Max) ? NI_VectorT128_GreaterThan : NI_VectorT128_LessThan; @@ -749,7 +743,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, hwIntrinsic = varTypeIsIntegral(baseType) ? NI_AVX2_BlendVariable : NI_AVX_BlendVariable; } - return gtNewSimdAsHWIntrinsicNode(retType, op1, op2, op3, hwIntrinsic, baseType, simdSize); + return gtNewSimdAsHWIntrinsicNode(retType, op3, op2, op1, hwIntrinsic, baseType, simdSize); } #endif // TARGET_XARCH @@ -757,7 +751,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicCndSel(CORINFO_CLASS_HANDLE clsHnd, GenTree* op1Dup; op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector conditional select")); + nullptr DEBUGARG("Clone op1 for Vector.ConditionalSelect")); // op2 = op2 & op1 hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, baseType); @@ -843,9 +837,9 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, // There is no direct SSE2 support for comparing TYP_LONG vectors. // These have to be implemented in terms of TYP_INT vector comparison operations. // - // t = (op1 == op2) i.e. compare for equality as if op1 and op2 are Vector - // op1 = t - // op2 = Shuffle(t, (2, 3, 0, 1)) + // tmp = (op1 == op2) i.e. compare for equality as if op1 and op2 are Vector + // op1 = tmp + // op2 = Shuffle(tmp, (2, 3, 0, 1)) // result = BitwiseAnd(op1, op2) // // Shuffle is meant to swap the comparison results of low-32-bits and high 32-bits of @@ -854,12 +848,12 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, TYP_INT); assert(hwIntrinsic != intrinsic); - GenTree* t = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, TYP_INT, simdSize); + GenTree* tmp = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, TYP_INT, simdSize); - t = impCloneExpr(t, &op1, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector equality comparison")); + tmp = impCloneExpr(tmp, &op1, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone tmp for Vector.Equals")); - op2 = gtNewSimdAsHWIntrinsicNode(retType, t, gtNewIconNode(SHUFFLE_ZWXY, TYP_INT), NI_SSE2_Shuffle, + op2 = gtNewSimdAsHWIntrinsicNode(retType, tmp, gtNewIconNode(SHUFFLE_ZWXY, TYP_INT), NI_SSE2_Shuffle, TYP_INT, simdSize); hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(NI_VectorT128_op_BitwiseAnd, baseType); @@ -890,11 +884,13 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, GenTree* op1Dup; op1 = impCloneExpr(op1, &op1Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector equality comparison")); + nullptr DEBUGARG("Clone op1 for Vector.GreaterThanOrEqual/LessThanOrEqual")); GenTree* op2Dup; op2 = impCloneExpr(op2, &op2Dup, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector equality comparison")); + nullptr DEBUGARG("Clone op2 for Vector.GreaterThanOrEqual/LessThanOrEqual")); + + NamedIntrinsic eqIntrinsic = isVectorT256 ? NI_VectorT256_Equals : NI_VectorT128_Equals; switch (intrinsic) { @@ -928,9 +924,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, } } - op1 = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1, op2); - - intrinsic = isVectorT256 ? NI_VectorT256_Equals : NI_VectorT128_Equals; + op1 = impSimdAsHWIntrinsicRelOp(eqIntrinsic, clsHnd, retType, baseType, simdSize, op1, op2); op2 = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, baseType, simdSize, op1Dup, op2Dup); intrinsic = isVectorT256 ? NI_VectorT256_op_BitwiseOr : NI_VectorT128_op_BitwiseOr; @@ -1005,7 +999,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, GenTree* constVectorDup; constVector = impCloneExpr(constVector, &constVectorDup, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector unsigned greater/less comparison")); + nullptr DEBUGARG("Clone constVector for Vector.GreaterThan/LessThan")); NamedIntrinsic hwIntrinsic = isVectorT256 ? NI_AVX2_Subtract : NI_SSE2_Subtract; @@ -1061,19 +1055,19 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, GenTree* op1Dup1; op1 = impCloneExpr(op1, &op1Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector greater/less than comparison")); + nullptr DEBUGARG("Clone op1 for Vector.GreaterThan/LessThan")); GenTree* op1Dup2; op1Dup1 = impCloneExpr(op1Dup1, &op1Dup2, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector greater/less than comparison")); + nullptr DEBUGARG("Clone op1 for Vector.GreaterThan/LessThan")); GenTree* op2Dup1; op2 = impCloneExpr(op2, &op2Dup1, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector greater/less than comparison")); + nullptr DEBUGARG("Clone op2 for Vector.GreaterThan/LessThan")); GenTree* op2Dup2; op2Dup1 = impCloneExpr(op2Dup1, &op2Dup2, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone for Vector greater/less than comparison")); + nullptr DEBUGARG("Clone op2 Vector.GreaterThan/LessThan")); GenTree* t = impSimdAsHWIntrinsicRelOp(intrinsic, clsHnd, retType, TYP_INT, simdSize, op1, op2); GenTree* u = impSimdAsHWIntrinsicRelOp(NI_VectorT128_Equals, clsHnd, retType, TYP_INT, simdSize, From 470f627649062a0f643bbd318898e116ab129eb8 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 2 May 2020 09:01:17 -0700 Subject: [PATCH 39/40] Ensure we clone the constVectorDup before using it --- src/coreclr/src/jit/simdashwintrinsic.cpp | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 9cf9e9d34c0b8b..4f2e2d7769f361 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -538,10 +538,14 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, GenTree* constVector = gtNewSIMDNode(retType, constVal, nullptr, SIMDIntrinsicInit, TYP_INT, simdSize); - GenTree* constVectorDup; - constVector = impCloneExpr(constVector, &constVectorDup, clsHnd, (unsigned)CHECK_SPILL_ALL, + GenTree* constVectorDup1; + constVector = impCloneExpr(constVector, &constVectorDup1, clsHnd, (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("Clone constVector for Vector.Max/Min")); + GenTree* constVectorDup2; + constVectorDup1 = impCloneExpr(constVectorDup1, &constVectorDup2, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone constVector for Vector.Max/Min")); + hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, opType); // op1 = op1 - constVector @@ -549,10 +553,10 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, // op1 = op1 + constVector op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, constVector, hwIntrinsic, opType, simdSize); - // op2 = op2 - constVector + // op2 = op2 - constVectorDup1 // -or- - // op2 = op2 + constVector - op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, constVectorDup, hwIntrinsic, opType, simdSize); + // op2 = op2 + constVectorDup1 + op2 = gtNewSimdAsHWIntrinsicNode(retType, op2, constVectorDup1, hwIntrinsic, opType, simdSize); // op1 = Max(op1, op2) // -or- @@ -560,13 +564,13 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); op1 = gtNewSimdAsHWIntrinsicNode(retType, op1, op2, hwIntrinsic, baseType, simdSize); - // result = op1 + constVectorDup + // result = op1 + constVectorDup2 // -or- - // result = op1 - constVectorDup + // result = op1 - constVectorDup2 opIntrinsic = (opIntrinsic == NI_VectorT128_op_Subtraction) ? NI_VectorT128_op_Addition : NI_VectorT128_op_Subtraction; hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, opType); - return gtNewSimdAsHWIntrinsicNode(retType, op1, constVectorDup, hwIntrinsic, opType, simdSize); + return gtNewSimdAsHWIntrinsicNode(retType, op1, constVectorDup2, hwIntrinsic, opType, simdSize); } GenTree* op1Dup; @@ -823,7 +827,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicRelOp(NamedIntrinsic intrinsic, NamedIntrinsic hwIntrinsic = NI_Illegal; - if (isVectorT256 || (baseType != TYP_LONG)) + if (isVectorT256 || ((baseType != TYP_LONG) && (baseType != TYP_ULONG))) { hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(intrinsic, baseType); assert(hwIntrinsic != intrinsic); From 03840fa11b3823ba6ad80e81c8b4278ca9daa0dc Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sat, 2 May 2020 17:03:19 -0700 Subject: [PATCH 40/40] Applying formatting patch --- src/coreclr/src/jit/simdashwintrinsic.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/jit/simdashwintrinsic.cpp b/src/coreclr/src/jit/simdashwintrinsic.cpp index 4f2e2d7769f361..1463ed30ca4072 100644 --- a/src/coreclr/src/jit/simdashwintrinsic.cpp +++ b/src/coreclr/src/jit/simdashwintrinsic.cpp @@ -543,8 +543,9 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, nullptr DEBUGARG("Clone constVector for Vector.Max/Min")); GenTree* constVectorDup2; - constVectorDup1 = impCloneExpr(constVectorDup1, &constVectorDup2, clsHnd, (unsigned)CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone constVector for Vector.Max/Min")); + constVectorDup1 = + impCloneExpr(constVectorDup1, &constVectorDup2, clsHnd, (unsigned)CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone constVector for Vector.Max/Min")); hwIntrinsic = SimdAsHWIntrinsicInfo::lookupHWIntrinsic(opIntrinsic, opType);