From 3376c1f9cacd87860df3282a09b1ffc324f79c59 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 16 Apr 2026 19:03:18 -0700 Subject: [PATCH] Revert "Accelerate `Half` with FP16 ISA (#122649)" This reverts commit f7693e144b086d38b65f2e950c9e68c7f3300af4. --- src/coreclr/jit/abi.cpp | 2 - src/coreclr/jit/codegencommon.cpp | 2 +- src/coreclr/jit/codegenxarch.cpp | 6 +- src/coreclr/jit/compiler.cpp | 32 +- src/coreclr/jit/compiler.h | 11 +- src/coreclr/jit/emit.h | 14 +- src/coreclr/jit/emitxarch.cpp | 119 +- src/coreclr/jit/float16.h | 19 - src/coreclr/jit/gentree.cpp | 28 +- src/coreclr/jit/gentree.h | 18 +- src/coreclr/jit/hwintrinsic.cpp | 6 +- src/coreclr/jit/hwintrinsic.h | 14 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 8 +- src/coreclr/jit/hwintrinsiclistxarch.h | 2230 ++++++++--------- src/coreclr/jit/importer.cpp | 29 +- src/coreclr/jit/importercalls.cpp | 590 +---- src/coreclr/jit/instr.cpp | 18 +- src/coreclr/jit/instrsxarch.h | 65 +- src/coreclr/jit/lclvars.cpp | 2 +- src/coreclr/jit/lower.cpp | 4 +- src/coreclr/jit/lowerxarch.cpp | 43 +- src/coreclr/jit/lsrabuild.cpp | 1 - src/coreclr/jit/lsraxarch.cpp | 1 - src/coreclr/jit/morph.cpp | 4 +- src/coreclr/jit/namedintrinsiclist.h | 37 +- src/coreclr/jit/regalloc.cpp | 1 - src/coreclr/jit/registeropswasm.cpp | 2 - src/coreclr/jit/scopeinfo.cpp | 6 +- src/coreclr/jit/simd.cpp | 36 +- src/coreclr/jit/simd.h | 81 +- src/coreclr/jit/typelist.h | 2 - src/coreclr/jit/utils.cpp | 115 - src/coreclr/jit/utils.h | 7 - src/coreclr/jit/valuenum.cpp | 13 +- src/coreclr/jit/valuenum.h | 23 - src/coreclr/jit/valuenumfuncs.h | 4 +- src/coreclr/jit/vartype.h | 13 +- src/coreclr/vm/amd64/profiler.cpp | 22 +- src/coreclr/vm/callhelpers.cpp | 15 +- src/coreclr/vm/callingconvention.h | 16 +- src/coreclr/vm/class.cpp | 33 - src/coreclr/vm/invokeutil.cpp | 11 - src/coreclr/vm/methodtable.h | 6 - src/coreclr/vm/reflectioninvocation.cpp | 10 +- src/coreclr/vm/typehandle.cpp | 9 - src/coreclr/vm/typehandle.h | 4 - .../System.Private.CoreLib/src/System/Half.cs | 86 +- 47 files changed, 1234 insertions(+), 2584 deletions(-) delete mode 100644 src/coreclr/jit/float16.h diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp index db9dd4ed62d72a..b26992c9aecc8b 100644 --- a/src/coreclr/jit/abi.cpp +++ b/src/coreclr/jit/abi.cpp @@ -118,8 +118,6 @@ var_types ABIPassingSegment::GetRegisterType() const { switch (Size) { - case 2: - return TYP_HALF; case 4: return TYP_FLOAT; case 8: diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 8cb24d54903b7f..d01f10408d9bfe 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -7409,7 +7409,7 @@ bool CodeGen::isStructReturn(GenTree* treeNode) } #if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - assert(!varTypeIsStruct(treeNode) || treeNode->TypeGet() == TYP_HALF); + assert(!varTypeIsStruct(treeNode)); return false; #else return varTypeIsStruct(treeNode) && (m_compiler->info.compRetNativeType == TYP_STRUCT); diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 19c789e444bc43..4afff4ae217058 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -6045,7 +6045,7 @@ void CodeGen::genCall(GenTreeCall* call) } else #endif // TARGET_X86 - if (varTypeIsFloating(returnType) || returnType == TYP_HALF) + if (varTypeIsFloating(returnType)) { returnReg = REG_FLOATRET; } @@ -6140,7 +6140,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA } else { - assert(!varTypeIsStruct(call) || call->TypeIs(TYP_HALF)); + assert(!varTypeIsStruct(call)); if (call->TypeIs(TYP_REF)) { @@ -8299,7 +8299,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk) } #endif // defined(TARGET_X86) && defined(FEATURE_SIMD) - if (varTypeIsSIMD(targetType) || targetType == TYP_HALF) + if (varTypeIsSIMD(targetType)) { regNumber srcReg = genConsumeReg(source); assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg))); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index f16065d09c60cf..30c911b0ac18b3 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -577,36 +577,6 @@ bool Compiler::isNativePrimitiveStructType(CORINFO_CLASS_HANDLE clsHnd) return strcmp(typeName, "CLong") == 0 || strcmp(typeName, "CULong") == 0 || strcmp(typeName, "NFloat") == 0; } -bool Compiler::isNativeHalfStructType(CORINFO_CLASS_HANDLE clsHnd) -{ -#if defined(TARGET_XARCH) - if (!isIntrinsicType(clsHnd)) - { - return false; - } - const char* namespaceName = nullptr; - const char* typeName = getClassNameFromMetadata(clsHnd, &namespaceName); - - if (strcmp(typeName, "Half") != 0) - { - return false; - } - - if (strcmp(namespaceName, "System") != 0) - { - return false; - } - - // We do an exact check since this impacts ABI and we want - // to throw out any R2R functions that mismatch here, this - // is similar to how we have to handle Vector - - return compExactlyDependsOn(InstructionSet_AVX10v1); -#else - return false; -#endif -} - //----------------------------------------------------------------------------- // getPrimitiveTypeForStruct: // Get the "primitive" type that is used for a struct @@ -681,7 +651,7 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS break; case 2: - useType = isNativeHalfStructType(clsHnd) ? TYP_HALF : TYP_USHORT; + useType = TYP_USHORT; break; #if !defined(TARGET_XARCH) || defined(UNIX_AMD64_ABI) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index cf80bf00b94a35..10e36182e61692 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4922,11 +4922,6 @@ class Compiler NamedIntrinsic lookupPrimitiveFloatNamedIntrinsic(CORINFO_METHOD_HANDLE method, const char* methodName); NamedIntrinsic lookupPrimitiveIntNamedIntrinsic(CORINFO_METHOD_HANDLE method, const char* methodName); - - NamedIntrinsic lookupHalfIntrinsic(NamedIntrinsic ni); - NamedIntrinsic lookupHalfConversionIntrinsic(var_types fromType, var_types toType); - int lookupHalfRoundingMode(NamedIntrinsic ni); - GenTree* impUnsupportedNamedIntrinsic(unsigned helper, CORINFO_METHOD_HANDLE method, CORINFO_SIG_INFO* sig, @@ -6138,7 +6133,6 @@ class Compiler // Returns true if the provided type should be treated as a primitive type // for the unmanaged calling conventions. bool isNativePrimitiveStructType(CORINFO_CLASS_HANDLE clsHnd); - bool isNativeHalfStructType(CORINFO_CLASS_HANDLE clsHnd); enum structPassingKind { @@ -10212,11 +10206,8 @@ class Compiler // Use to determine if a struct *might* be a SIMD type. As this function only takes a size, many // structs will fit the criteria. - bool structSizeMightRepresentAcceleratedType(size_t structSize) + bool structSizeMightRepresentSIMDType(size_t structSize) { - if (structSize == 2) - return true; - #ifdef FEATURE_SIMD return (structSize >= getMinVectorByteLength()) && (structSize <= getMaxVectorByteLength()); #else diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 88597074e85c97..3327e4bb2cdf41 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -2039,20 +2039,18 @@ class emitter #define PERFSCORE_THROUGHPUT_ZERO 0.0f // Only used for pseudo-instructions that don't generate code -#define PERFSCORE_THROUGHPUT_9X (1.0f / 9.0f) -#define PERFSCORE_THROUGHPUT_6X (1.0f / 6.0f) // Hextuple issue -#define PERFSCORE_THROUGHPUT_5X 0.20f // Pentuple issue -#define PERFSCORE_THROUGHPUT_4X 0.25f // Quad issue -#define PERFSCORE_THROUGHPUT_3X (1.0f / 3.0f) // Three issue -#define PERFSCORE_THROUGHPUT_2X 0.5f // Dual issue -#define PERFSCORE_THROUGHPUT_1P5X 0.67f // Dual issue +#define PERFSCORE_THROUGHPUT_9X (1.0f / 9.0f) +#define PERFSCORE_THROUGHPUT_6X (1.0f / 6.0f) // Hextuple issue +#define PERFSCORE_THROUGHPUT_5X 0.20f // Pentuple issue +#define PERFSCORE_THROUGHPUT_4X 0.25f // Quad issue +#define PERFSCORE_THROUGHPUT_3X (1.0f / 3.0f) // Three issue +#define PERFSCORE_THROUGHPUT_2X 0.5f // Dual issue #define PERFSCORE_THROUGHPUT_1C 1.0f // Single Issue #define PERFSCORE_THROUGHPUT_2C 2.0f // slower - 2 cycles #define PERFSCORE_THROUGHPUT_3C 3.0f // slower - 3 cycles #define PERFSCORE_THROUGHPUT_4C 4.0f // slower - 4 cycles -#define PERFSCORE_THROUGHPUT_4P5C 4.5f // slower - 4.5 cycles #define PERFSCORE_THROUGHPUT_5C 5.0f // slower - 5 cycles #define PERFSCORE_THROUGHPUT_6C 6.0f // slower - 6 cycles #define PERFSCORE_THROUGHPUT_7C 7.0f // slower - 7 cycles diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 17e637f2eaf119..ff65e2d883a25f 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -129,8 +129,7 @@ bool emitter::Is3OpRmwInstruction(instruction ins) return ((ins >= FIRST_FMA_INSTRUCTION) && (ins <= LAST_FMA_INSTRUCTION)) || (IsAVXVNNIFamilyInstruction(ins)) || ((ins >= FIRST_AVX512BMM_INSTRUCTION) && (ins <= LAST_AVX512BMM_INSTRUCTION)) || - ((ins >= FIRST_AVXIFMA_INSTRUCTION) && (ins <= LAST_AVXIFMA_INSTRUCTION)) || - ((ins >= FIRST_AVX10V1_FMA_INSTR) && (ins <= LAST_AVX10V1_FMA_INSTR)); + ((ins >= FIRST_AVXIFMA_INSTRUCTION) && (ins <= LAST_AVXIFMA_INSTRUCTION)); } } } @@ -3077,9 +3076,9 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co } // Now the byte in the 22 position should be either of the below: // 1. An escape byte 0F (For isa before AVX10.2) - // 2. A map number from 0 to 7 (For AVX10.1 and above) + // 2. A map number from 0 to 7 (For AVX10.2 and above) leadingBytes = check; - assert((leadingBytes == 0x0F) || ((m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v1) || + assert((leadingBytes == 0x0F) || ((m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) || (m_compiler->compIsaSupportedDebugOnly(InstructionSet_APX))) && (leadingBytes >= 0x00) && (leadingBytes <= 0x07))); @@ -3162,15 +3161,14 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co case 0x05: { - assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v1)); + assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v2)); evexPrefix |= (0x05 << 16); break; } case 0x06: { - assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512BMM) || - m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v1)); + assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512BMM)); evexPrefix |= (0x6 << 16); break; } @@ -5398,9 +5396,10 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64 || (attrSize == EA_16BYTE) || (attrSize == EA_32BYTE) || (attrSize == EA_64BYTE) // only for x64 - || (ins == INS_movzx) || (ins == INS_movsx) || (ins == INS_vmovsh) || (ins == INS_cmpxchg) || + || (ins == INS_movzx) || (ins == INS_movsx) || + (ins == INS_cmpxchg) // kmov instructions reach this path with EA_8BYTE size, even on x86 - IsKMOVInstruction(ins) + || IsKMOVInstruction(ins) // The prefetch instructions are always 3 bytes and have part of their modr/m byte hardcoded || isPrefetch(ins)); @@ -7433,7 +7432,6 @@ bool emitter::IsMovInstruction(instruction ins) case INS_kmovw_gpr: case INS_kmovd_gpr: case INS_kmovq_gpr: - case INS_vmovsh: { return true; } @@ -7632,13 +7630,6 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size) break; } - case INS_vmovsh: - { - // Clears the upper bits - hasSideEffect = true; - break; - } - default: { unreached(); @@ -7912,12 +7903,6 @@ bool emitter::emitIns_Mov( break; } - case INS_vmovsh: - { - assert(isFloatReg(dstReg) && isFloatReg(srcReg)); - break; - } - default: { unreached(); @@ -11820,10 +11805,6 @@ const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName) con case EA_2BYTE: { - if (IsXMMReg(reg)) - { - return emitXMMregName(reg); - } #if defined(TARGET_AMD64) if (reg > REG_RDI) { @@ -14549,7 +14530,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // Is this a 'big' opcode? else if (code & 0xFF000000) { - if (size == EA_2BYTE && !IsSimdInstruction(ins)) + if (size == EA_2BYTE) { assert(ins == INS_movbe); @@ -15417,7 +15398,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // Is this a 'big' opcode? else if (code & 0xFF000000) { - if (size == EA_2BYTE && !IsSimdInstruction(ins)) + if (size == EA_2BYTE) { assert(ins == INS_movbe); @@ -20973,7 +20954,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_movss: case INS_movsd_simd: case INS_movddup: - case INS_vmovsh: { if (memAccessKind == PERFSCORE_MEMORY_NONE) { @@ -21414,85 +21394,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; } - case INS_vaddsh: - case INS_vsubsh: - case INS_vmulsh: - case INS_vfmadd213sh: - case INS_vmaxsh: - case INS_vminsh: - case INS_vcvtsh2ss: - { - result.insLatency = PERFSCORE_LATENCY_4C; - result.insThroughput = PERFSCORE_THROUGHPUT_2X; - break; - } - - case INS_vdivsh: - { - result.insLatency = PERFSCORE_LATENCY_14C; - result.insThroughput = PERFSCORE_THROUGHPUT_4C; - break; - } - - case INS_vsqrtsh: - { - result.insLatency = PERFSCORE_LATENCY_14C; - result.insThroughput = PERFSCORE_THROUGHPUT_4P5C; - break; - } - - case INS_vrsqrtsh: - case INS_vcomish: - case INS_vucomish: - case INS_vrcpsh: - { - result.insLatency = PERFSCORE_LATENCY_4C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; - } - - case INS_vrndscalesh: - { - result.insLatency = PERFSCORE_LATENCY_8C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; - } - - case INS_vcvtss2sh: - { - result.insLatency = PERFSCORE_LATENCY_6C; - result.insThroughput = PERFSCORE_THROUGHPUT_1P5X; - break; - } - - case INS_vcvtsd2sh: - { - result.insLatency = PERFSCORE_LATENCY_7C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; - } - - case INS_vcvtsh2sd: - { - result.insLatency = PERFSCORE_LATENCY_10C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; - } - - case INS_vcvtsi2sh32: - case INS_vcvtsi2sh64: - case INS_vcvtsh2si32: - case INS_vcvtsh2si64: - case INS_vcvtusi2sh32: - case INS_vcvtusi2sh64: - case INS_vcvtsh2usi32: - case INS_vcvtsh2usi64: - { - result.insLatency = PERFSCORE_LATENCY_7C; - result.insThroughput = PERFSCORE_THROUGHPUT_1C; - break; - } - default: { assert((unsigned)ins < ArrLen(insThroughputInfos)); diff --git a/src/coreclr/jit/float16.h b/src/coreclr/jit/float16.h deleted file mode 100644 index ed08e940a02479..00000000000000 --- a/src/coreclr/jit/float16.h +++ /dev/null @@ -1,19 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -/***************************************************************************** - Both simd.cpp, gentree.cpp, and utils.cpp need a definition of float16_t - but do not share a common header. - - Defining here so as to not create accidental implicit include dependencies. - This definition can be removed once .NET moves to C++23 support. -******************************************************************************/ - -#ifndef _FLOAT16_H_ -#define _FLOAT16_H_ - -#include - -typedef uint16_t float16_t; - -#endif // _FLOAT16_H_ diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index cf1cfc9a9b0a07..dd585dbed86c78 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -8207,7 +8207,6 @@ GenTree* Compiler::gtNewZeroConNode(var_types type) return gtNewLconNode(0); } - case TYP_HALF: case TYP_FLOAT: case TYP_DOUBLE: { @@ -23060,17 +23059,6 @@ GenTree* Compiler::gtNewSimdCreateScalarUnsafeNode(var_types type, break; } - case TYP_HALF: - { - float16_t cnsVal = FloatingPointUtils::convertDoubleToFloat16(op1->AsDblCon()->DconValue()); - - for (unsigned i = 0; i < (simdSize / 2); i++) - { - vecCon->gtSimdVal.f16[i] = cnsVal; - } - break; - } - case TYP_FLOAT: { float cnsVal = static_cast(op1->AsDblCon()->DconValue()); @@ -27248,12 +27236,12 @@ GenTree* Compiler::gtNewSimdTernaryLogicNode( // GenTree* Compiler::gtNewSimdToScalarNode(var_types type, GenTree* op1, var_types simdBaseType, unsigned simdSize) { - assert(varTypeIsArithmetic(type) || TypeGet(type) == TYP_HALF); + assert(varTypeIsArithmetic(type)); assert(op1 != nullptr); assert(varTypeIsSIMD(op1)); - assert(varTypeIsArithmetic(simdBaseType) || TypeGet(simdBaseType) == TYP_HALF); + assert(varTypeIsArithmetic(simdBaseType)); NamedIntrinsic intrinsic = NI_Illegal; @@ -28629,7 +28617,6 @@ bool GenTreeHWIntrinsic::OperIsEmbRoundingEnabled() const case NI_AVX512_FusedMultiplySubtractNegated: case NI_AVX512_FusedMultiplySubtractNegatedScalar: case NI_AVX512_FusedMultiplySubtractScalar: - case NI_AVX10v1_FusedMultiplyAddScalar: { return numArgs == 4; } @@ -28647,13 +28634,6 @@ bool GenTreeHWIntrinsic::OperIsEmbRoundingEnabled() const case NI_AVX512_X64_ConvertScalarToVector128Double: case NI_AVX512_X64_ConvertScalarToVector128Single: #endif // TARGET_AMD64 - case NI_AVX10v1_AddScalar: - case NI_AVX10v1_DivideScalar: - case NI_AVX10v1_MultiplyScalar: - case NI_AVX10v1_SubtractScalar: - case NI_AVX10v1_ConvertScalarToVector128Half: - case NI_AVX10v1_ConvertScalarToVector128Single: - case NI_AVX10v1_ConvertScalarToVector128Double: { return numArgs == 3; } @@ -28670,13 +28650,9 @@ bool GenTreeHWIntrinsic::OperIsEmbRoundingEnabled() const case NI_AVX512_ConvertToVector512UInt32: case NI_AVX512_ConvertToVector512UInt64: case NI_AVX512_Sqrt: - case NI_AVX10v1_ConvertToInt32: - case NI_AVX10v1_ConvertToUInt32: #if defined(TARGET_AMD64) case NI_AVX512_X64_ConvertToInt64: case NI_AVX512_X64_ConvertToUInt64: - case NI_AVX10v1_ConvertToInt64: - case NI_AVX10v1_ConvertToUInt64: #endif // TARGET_AMD64 case NI_AVX10v2_ConvertToSByteWithSaturationAndZeroExtendToInt32: case NI_AVX10v2_ConvertToByteWithSaturationAndZeroExtendToInt32: diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 551040589e9952..67e4b5516684cc 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -3608,7 +3608,7 @@ struct GenTreeDblCon : public GenTree GenTreeDblCon(double val, var_types type = TYP_DOUBLE) : GenTree(GT_CNS_DBL, type) { - assert(varTypeIsFloating(type) || type == TYP_HALF); + assert(varTypeIsFloating(type)); SetDconValue(val); } #if DEBUGGABLE_GENTREE @@ -7014,22 +7014,6 @@ struct GenTreeVecCon : public GenTree break; } - case TYP_HALF: - { - if (arg->IsCnsFltOrDbl()) - { - simdVal.f16[argIdx] = FloatingPointUtils::convertDoubleToFloat16(arg->AsDblCon()->DconValue()); - return true; - } - else - { - // We expect the constant to have been already zeroed - // We check against the i16, rather than f16, to account for -0.0 - assert(simdVal.i16[argIdx] == 0); - } - break; - } - case TYP_FLOAT: { if (arg->IsCnsFltOrDbl()) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 2037f73ec21ea3..a5dc1001d3d92c 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -9,12 +9,12 @@ static const HWIntrinsicInfo hwIntrinsicInfoArray[] = { // clang-format off #if defined(TARGET_XARCH) -#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, category, flag) \ +#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ { \ /* name */ #name, \ /* flags */ static_cast(flag), \ /* id */ NI_##isa##_##name, \ - /* ins */ t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, \ + /* ins */ t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, \ /* isa */ InstructionSet_##isa, \ /* simdSize */ size, \ /* numArgs */ numarg, \ @@ -70,7 +70,7 @@ const HWIntrinsicInfo& HWIntrinsicInfo::lookup(NamedIntrinsic id) // The instruction for id and type instruction HWIntrinsicInfo::lookupIns(NamedIntrinsic id, var_types type, Compiler* comp) { - if ((type < TYP_BYTE) || (type > TYP_HALF)) + if ((type < TYP_BYTE) || (type > TYP_DOUBLE)) { assert(!"Unexpected type"); return INS_invalid; diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 63b230cffeba67..5267d426237a79 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -511,15 +511,13 @@ struct TernaryLogicInfo struct HWIntrinsicInfo { + // 32-bit: 36-bytes (34+2 trailing padding) + // 64-bit: 40-bytes (38+2 trailing padding) - const char* name; // 4 or 8-bytes - HWIntrinsicFlag flags; // 4-bytes - NamedIntrinsic id; // 2-bytes -#if defined(TARGET_XARCH) - uint16_t ins[11]; // 11 * 2-bytes -#else - uint16_t ins[10]; // 10 * 2-bytes -#endif + const char* name; // 4 or 8-bytes + HWIntrinsicFlag flags; // 4-bytes + NamedIntrinsic id; // 2-bytes + uint16_t ins[10]; // 10 * 2-bytes uint8_t isa; // 1-byte int8_t simdSize; // 1-byte int8_t numArgs; // 1-byte diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 04c1668510c32c..0c115532c755ea 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -1017,7 +1017,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case InstructionSet_AVX10v2_X64: case InstructionSet_AVXVNNIINT: case InstructionSet_AVXVNNIINT_V512: - case InstructionSet_AVX10v1: { genAvxFamilyIntrinsic(node, instOptions); break; @@ -1811,7 +1810,6 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* case NI_AVX512_FusedMultiplySubtractNegated: case NI_AVX512_FusedMultiplySubtractNegatedScalar: case NI_AVX512_FusedMultiplySubtractScalar: - case NI_AVX10v1_FusedMultiplyAddScalar: { // For FMA intrinsics, since it is not possible to get any contained operand in this case: embedded rounding // is limited in register-to-register form, and the control byte is dynamic, we don't need to do any swap. @@ -1855,7 +1853,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) regNumber targetReg = node->GetRegNum(); var_types baseType = node->GetSimdBaseType(); - assert((baseType >= TYP_BYTE) && (baseType <= TYP_HALF)); + assert((baseType >= TYP_BYTE) && (baseType <= TYP_DOUBLE)); GenTree* op1 = (node->GetOperandCount() >= 1) ? node->Op(1) : nullptr; GenTree* op2 = (node->GetOperandCount() >= 2) ? node->Op(2) : nullptr; @@ -1929,7 +1927,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) } else { - assert(varTypeIsFloating(baseType) || TypeGet(baseType) == TYP_HALF); + assert(varTypeIsFloating(baseType)); attr = emitTypeSize(baseType); @@ -2223,7 +2221,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) } else { - assert(varTypeIsFloating(baseType) || TypeGet(baseType) == TYP_HALF); + assert(varTypeIsFloating(baseType)); assert(instOptions == INS_OPTS_NONE); // Just use movaps for reg->reg moves as it has zero-latency on modern CPUs diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index eb3c89784fdfe1..9cccc717fd1351 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -17,1296 +17,1256 @@ 4) SIMD intrinsics have a non-zero `SIMD size` field based-on that operate over `Vector128`(16) or `Vector256`(32) 5) Scalar intrinsics that operate over general purpose registers (e.g., Sse41.Crc32) have `SIMD size` with 0 6) Each intrinsic has a `NumArg` for number of parameters, and some intrinsics that are overloaded on multiple parameter numbers have this field with -1 - 7) Each intrinsic has 11 `instructions` fields that list the instructions should be generated based-on the base type + 7) Each intrinsic has 10 `instructions` fields that list the instructions should be generated based-on the base type 8) Each intrinsic has one category with type of `enum HWIntrinsicCategory`, please see the definition of HWIntrinsicCategory for details 9) Each intrinsic has one or more flags with type of `enum HWIntrinsicFlag` */ // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for Vector128 #define FIRST_NI_Vector128 NI_Vector128_Abs -HARDWARE_INTRINSIC(Vector128, Abs, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, As, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsByte, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsDouble, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsNInt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsNUInt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsSByte, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsSingle, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsUInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsVector, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsVector128, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, AsVector128Unsafe, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_NoContainment) -HARDWARE_INTRINSIC(Vector128, AsVector2, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsd_simd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt32Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt64Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToSingle, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt32Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt64Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Vector128, CreateSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, Equals, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, EqualsAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ExtractMostSignificantBits, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, FusedMultiplyAdd, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_extractps, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, IsEvenInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsFinite, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsNaN, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsNegative, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsNegativeInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsNormal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsOddInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsPositive, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsPositiveInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsSubnormal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, IsZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, LessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, LoadAligned, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, LoadAlignedNonTemporal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, LoadUnsafe, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Max, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MaxMagnitude, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MaxMagnitudeNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MaxNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MaxNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Min, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MinMagnitude, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MinMagnitudeNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MinNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MinNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, NarrowWithSaturation, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, ShuffleNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, ShuffleNativeFallback, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, StoreUnsafe, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, SubtractSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, Sum, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Vector128, ToVector256, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(Vector128, ToVector256Unsafe, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(Vector128, ToVector512, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(Vector128, Truncate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, WidenLower, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, WidenUpper, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, WithElement, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_E, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Epsilon, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Indices, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NaN, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NegativeInfinity, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NegativeOne, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_NegativeZero, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_One, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Pi, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_PositiveInfinity, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Tau, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Addition, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_BitwiseAnd, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_BitwiseOr, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Division, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialSideEffect_Other|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_RightShift, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_Subtraction, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_UnaryNegation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_UnaryPlus, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector128, op_UnsignedRightShift, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Abs, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, AddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, As, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsByte, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsDouble, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsNInt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsNUInt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsSByte, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsSingle, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsUInt16, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsVector, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsVector128, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsVector128Unsafe, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_NoContainment) +HARDWARE_INTRINSIC(Vector128, AsVector2, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsd_simd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToInt32Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToInt64Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToSingle, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToUInt32Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToUInt64Native, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector128, CreateSequence, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, Equals, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, EqualsAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ExtractMostSignificantBits, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, FusedMultiplyAdd, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_extractps, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, GreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, GreaterThanAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, GreaterThanAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, IsEvenInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, IsFinite, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, IsInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, IsInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, IsNaN, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, IsNegative, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, IsNegativeInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, IsNormal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, IsOddInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, IsPositive, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, IsPositiveInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, IsSubnormal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, IsZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, LessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, LessThanAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, LessThanAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, LessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAll, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAny, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, LoadAligned, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, LoadAlignedNonTemporal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, LoadUnsafe, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Max, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, MaxMagnitude, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, MaxMagnitudeNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, MaxNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, MaxNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Min, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, MinMagnitude, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, MinMagnitudeNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, MinNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, MinNumber, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, NarrowWithSaturation, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector128, ShuffleNative, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector128, ShuffleNativeFallback, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, StoreUnsafe, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, SubtractSaturate, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Sum, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(Vector128, ToVector256, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(Vector128, ToVector256Unsafe, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(Vector128, ToVector512, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(Vector128, Truncate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, WidenLower, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, WidenUpper, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, WithElement, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_E, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_Epsilon, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_Indices, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_NaN, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_NegativeInfinity, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_NegativeOne, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_NegativeZero, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_One, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_Pi, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_PositiveInfinity, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_Tau, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_Addition, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_BitwiseAnd, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_BitwiseOr, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_Division, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialSideEffect_Other|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_RightShift, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_Subtraction, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_UnaryNegation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_UnaryPlus, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_UnsignedRightShift, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) #define LAST_NI_Vector128 NI_Vector128_op_UnsignedRightShift // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for Vector256 #define FIRST_NI_Vector256 NI_Vector256_Abs -HARDWARE_INTRINSIC(Vector256, Abs, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, AddSaturate, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, AndNot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, As, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsByte, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsDouble, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsInt16, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsNInt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsNUInt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsSByte, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsSingle, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsUInt16, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsUInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsVector, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, Ceiling, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConditionalSelect, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConvertToDouble, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConvertToInt32Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConvertToInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToInt64Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToSingle, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToUInt32Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToUInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToUInt64Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, CreateScalar, 32, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, CreateSequence, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, Dot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, Equals, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, EqualsAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ExtractMostSignificantBits, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, Floor, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, FusedMultiplyAdd, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, GetElement, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, GetLower, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(Vector256, GetUpper, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, GreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, GreaterThanAll, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, GreaterThanAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqualAll, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqualAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, IsEvenInteger, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsFinite, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsInteger, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsNaN, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsNegative, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsNegativeInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsNormal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsOddInteger, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsPositive, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsPositiveInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsSubnormal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, IsZero, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, LessThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, LessThanAll, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, LessThanAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, LessThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, LessThanOrEqualAll, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, LessThanOrEqualAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, LoadAligned, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, LoadAlignedNonTemporal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, LoadUnsafe, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, Max, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MaxMagnitude, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MaxMagnitudeNumber, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MaxNative, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MaxNumber, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, Min, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MinMagnitude, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MinMagnitudeNumber, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MinNative, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MinNumber, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, MultiplyAddEstimate, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, Narrow, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, NarrowWithSaturation, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, Round, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, ShiftLeft, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector256, ShuffleNative, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector256, ShuffleNativeFallback, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector256, Sqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, StoreAligned, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, StoreAlignedNonTemporal, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, StoreUnsafe, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, SubtractSaturate, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, Sum, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ToScalar, 32, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ToVector512, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(Vector256, ToVector512Unsafe, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(Vector256, Truncate, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, WidenLower, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, WidenUpper, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, WithElement, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, WithLower, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, WithUpper, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, get_AllBitsSet, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_E, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_Epsilon, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_Indices, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_NaN, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_NegativeInfinity, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_NegativeOne, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_NegativeZero, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_One, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_Pi, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_PositiveInfinity, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_Tau, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, get_Zero, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_Addition, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_BitwiseAnd, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_BitwiseOr, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_Division, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialSideEffect_Other|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector256, op_Equality, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector256, op_ExclusiveOr, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_Inequality, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector256, op_LeftShift, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_Multiply, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_OnesComplement, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_RightShift, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_Subtraction, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_UnaryNegation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector256, op_UnaryPlus, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_UnsignedRightShift, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Abs, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, AddSaturate, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, AndNot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, As, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsByte, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsDouble, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsInt16, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsNInt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsNUInt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsSByte, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsSingle, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsUInt16, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsUInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsVector, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, Ceiling, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ConditionalSelect, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ConvertToDouble, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ConvertToInt32Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ConvertToInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ConvertToInt64Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ConvertToSingle, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ConvertToUInt32Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ConvertToUInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ConvertToUInt64Native, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, CreateScalar, 32, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, CreateSequence, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, Dot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, Equals, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, EqualsAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ExtractMostSignificantBits, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector256, Floor, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, FusedMultiplyAdd, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, GetElement, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, GetLower, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movdqu32, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(Vector256, GetUpper, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, GreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, GreaterThanAll, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, GreaterThanAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqualAll, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqualAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, IsEvenInteger, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, IsFinite, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, IsInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, IsInteger, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, IsNaN, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, IsNegative, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, IsNegativeInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, IsNormal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, IsOddInteger, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, IsPositive, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, IsPositiveInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, IsSubnormal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, IsZero, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, LessThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, LessThanAll, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, LessThanAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, LessThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, LessThanOrEqualAll, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, LessThanOrEqualAny, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, LoadAligned, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, LoadAlignedNonTemporal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, LoadUnsafe, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, Max, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, MaxMagnitude, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, MaxMagnitudeNumber, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, MaxNative, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, MaxNumber, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Min, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, MinMagnitude, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, MinMagnitudeNumber, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, MinNative, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, MinNumber, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, MultiplyAddEstimate, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Narrow, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, NarrowWithSaturation, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, Round, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, ShiftLeft, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector256, ShuffleNative, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector256, ShuffleNativeFallback, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector256, Sqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, StoreAligned, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, StoreAlignedNonTemporal, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, StoreUnsafe, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, SubtractSaturate, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Sum, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ToScalar, 32, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ToVector512, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(Vector256, ToVector512Unsafe, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(Vector256, Truncate, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, WidenLower, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, WidenUpper, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, WithElement, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, WithLower, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, WithUpper, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, get_AllBitsSet, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_E, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_Epsilon, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_Indices, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_NaN, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_NegativeInfinity, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_NegativeOne, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_NegativeZero, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_One, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_Pi, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_PositiveInfinity, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_Tau, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_Zero, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, op_Addition, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, op_BitwiseAnd, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, op_BitwiseOr, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, op_Division, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialSideEffect_Other|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Vector256, op_Equality, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector256, op_ExclusiveOr, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, op_Inequality, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector256, op_LeftShift, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, op_Multiply, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, op_OnesComplement, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, op_RightShift, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, op_Subtraction, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, op_UnaryNegation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, op_UnaryPlus, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, op_UnsignedRightShift, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) #define LAST_NI_Vector256 NI_Vector256_op_UnsignedRightShift // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for Vector512 #define FIRST_NI_Vector512 NI_Vector512_Abs -HARDWARE_INTRINSIC(Vector512, Abs, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, AddSaturate, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, As, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsByte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsDouble, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsNInt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsNUInt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsSByte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsSingle, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsUInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsUInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsUInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsVector, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, AsVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, Ceiling, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, ConditionalSelect, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, ConvertToDouble, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToInt32Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToInt64Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToSingle, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToUInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToUInt32Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToUInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToUInt64Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, Create, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, CreateScalar, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector512, CreateScalarUnsafe, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector512, CreateSequence, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, Dot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, Equals, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, EqualsAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ExtractMostSignificantBits, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, Floor, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, FusedMultiplyAdd, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, GetElement, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, GetLower, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(Vector512, GetLower128, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, GetUpper, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, GreaterThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, GreaterThanAll, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, GreaterThanAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqualAll, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqualAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, IsEvenInteger, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsFinite, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsInfinity, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsInteger, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsNaN, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsNegative, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsNegativeInfinity, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsNormal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsOddInteger, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsPositive, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsPositiveInfinity, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsSubnormal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, IsZero, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, LessThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, LessThanAll, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, LessThanAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, LessThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, LessThanOrEqualAll, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, LessThanOrEqualAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, LoadAligned, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, LoadAlignedNonTemporal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, LoadUnsafe, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, Max, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MaxMagnitude, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MaxMagnitudeNumber, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MaxNative, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MaxNumber, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, Min, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MinMagnitude, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MinMagnitudeNumber, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MinNative, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MinNumber, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, MultiplyAddEstimate, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, Narrow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, NarrowWithSaturation, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, Round, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, ShiftLeft, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector512, ShuffleNative, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector512, ShuffleNativeFallback, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector512, Sqrt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, StoreUnsafe, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, SubtractSaturate, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, Sum, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ToScalar, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, Truncate, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, WidenLower, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, WidenUpper, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, WithElement, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, WithLower, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, WithUpper, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, get_AllBitsSet, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_E, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_Epsilon, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_Indices, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_NaN, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_NegativeInfinity, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_NegativeOne, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_NegativeZero, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_One, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_Pi, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_PositiveInfinity, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_Tau, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, get_Zero, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Addition, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_BitwiseAnd, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_BitwiseOr, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Division, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Equality, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector512, op_ExclusiveOr, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Inequality, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector512, op_LeftShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Multiply, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_OnesComplement, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_RightShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_Subtraction, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_UnaryNegation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_UnaryPlus, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(Vector512, op_UnsignedRightShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Abs, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, AddSaturate, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, As, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsByte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsDouble, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsNInt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsNUInt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsSByte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsSingle, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsUInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsUInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsUInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsVector, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, Ceiling, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, ConditionalSelect, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, ConvertToDouble, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToInt32Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToInt64Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToSingle, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToUInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToUInt32Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToUInt64, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToUInt64Native, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, Create, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, CreateScalar, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector512, CreateScalarUnsafe, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector512, CreateSequence, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Dot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, Equals, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, EqualsAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ExtractMostSignificantBits, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, Floor, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, FusedMultiplyAdd, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, GetElement, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, GetLower, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(Vector512, GetLower128, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, GetUpper, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, GreaterThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, GreaterThanAll, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, GreaterThanAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqualAll, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqualAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, IsEvenInteger, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, IsFinite, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, IsInfinity, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, IsInteger, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, IsNaN, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, IsNegative, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, IsNegativeInfinity, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, IsNormal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, IsOddInteger, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, IsPositive, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, IsPositiveInfinity, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, IsSubnormal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, IsZero, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, LessThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, LessThanAll, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, LessThanAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, LessThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, LessThanOrEqualAll, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, LessThanOrEqualAny, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, LoadAligned, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, LoadAlignedNonTemporal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, LoadUnsafe, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Max, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, MaxMagnitude, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, MaxMagnitudeNumber, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, MaxNative, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, MaxNumber, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Min, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, MinMagnitude, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, MinMagnitudeNumber, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, MinNative, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, MinNumber, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, MultiplyAddEstimate, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Narrow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, NarrowWithSaturation, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, Round, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, ShiftLeft, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector512, ShuffleNative, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector512, ShuffleNativeFallback, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector512, Sqrt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, StoreUnsafe, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, SubtractSaturate, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Sum, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ToScalar, 64, 1, {INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd32, INS_movd64, INS_movd64, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, Truncate, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, WidenLower, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, WidenUpper, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, WithElement, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, WithLower, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, WithUpper, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, get_AllBitsSet, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_E, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_Epsilon, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_Indices, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_NaN, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_NegativeInfinity, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_NegativeOne, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_NegativeZero, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_One, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_Pi, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_PositiveInfinity, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_Tau, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_Zero, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_Addition, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_BitwiseAnd, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_BitwiseOr, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_Division, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_Equality, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector512, op_ExclusiveOr, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_Inequality, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(Vector512, op_LeftShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_Multiply, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_OnesComplement, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_RightShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_Subtraction, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_UnaryNegation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_UnaryPlus, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_UnsignedRightShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) #define LAST_NI_Vector512 NI_Vector512_op_UnsignedRightShift // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for X86Base, SSE, SSE2, SSE3, SSSE3, SSE41, SSE42, POPCNT #define FIRST_NI_X86Base NI_X86Base_Abs -HARDWARE_INTRINSIC(X86Base, Abs, 16, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, Add, 16, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(X86Base, AddSaturate, 16, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(X86Base, AddScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, AddSubtract, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, AlignRight, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, And, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_pandd, INS_pandd, INS_andps, INS_andpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_pandnd, INS_pandnd, INS_andnps, INS_andnpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, Average, 16, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(X86Base, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, Blend, 16, 3, {INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, BlendVariable, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_blendvps, INS_blendvpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, CeilingScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, CompareEqual, 16, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareGreaterThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, CompareScalarEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, CompareUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2sd, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128UInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_cvtsd2si32, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, ConvertToInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si32, INS_cvttsd2si32, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int16, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32, 16, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int64, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(X86Base, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, Crc32, 0, 2, {INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(X86Base, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_idiv, INS_div, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(X86Base, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, DivideScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, DotProduct, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_dppd, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, Extract, 16, 2, {INS_pextrb, INS_pextrb, INS_pextrw, INS_pextrw, INS_pextrd, INS_pextrd, INS_invalid, INS_invalid, INS_extractps, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, FloorScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, HorizontalAdd, 16, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_phaddw, INS_phaddd, INS_phaddd, INS_invalid, INS_invalid, INS_haddps, INS_haddpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, HorizontalAddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, HorizontalSubtract, 16, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, HorizontalSubtractSaturate, 16, 2, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, Insert, 16, 3, {INS_pinsrb, INS_pinsrb, INS_pinsrw, INS_pinsrw, INS_pinsrd, INS_pinsrd, INS_invalid, INS_invalid, INS_insertps, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(X86Base, LoadAlignedVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, LoadAlignedVector128NonTemporal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, LoadAndDuplicateToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, LoadDquVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, LoadFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) -HARDWARE_INTRINSIC(X86Base, LoadHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_movhpd, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, LoadLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_movlpd, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, LoadScalarVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_movss, INS_movsd_simd, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, LoadVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(X86Base, MaskMove, 16, 3, {INS_maskmovdqu, INS_maskmovdqu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base, Max, 16, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_maxps, INS_maxpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(X86Base, MaxScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_maxsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, MemoryFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) -HARDWARE_INTRINSIC(X86Base, Min, 16, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_minps, INS_minpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(X86Base, MinHorizontal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_phminposuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, MinScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_minsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, MoveAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, MoveHighAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, MoveHighToLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(X86Base, MoveLowAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, MoveLowToHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(X86Base, MoveMask, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_movss, INS_movsd_simd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(X86Base, MultipleSumAbsoluteDifferences, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_mpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(X86Base, MultiplyAddAdjacent, 16, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(X86Base, MultiplyHigh, 16, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(X86Base, MultiplyHighRoundScale, 16, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, MultiplyLow, 16, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(X86Base, MultiplyScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, Or, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_pord, INS_pord, INS_orps, INS_orpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, PackSignedSaturate, 16, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, PackUnsignedSaturate, 16, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, Pause, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(X86Base, PopCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base, Prefetch0, 0, 1, {INS_invalid, INS_prefetcht0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(X86Base, Prefetch1, 0, 1, {INS_invalid, INS_prefetcht1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(X86Base, Prefetch2, 0, 1, {INS_invalid, INS_prefetcht2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(X86Base, PrefetchNonTemporal, 0, 1, {INS_invalid, INS_prefetchnta, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(X86Base, Reciprocal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, ReciprocalScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpss, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, ReciprocalSqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, ReciprocalSqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtss, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, RoundCurrentDirection, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, RoundCurrentDirectionScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, RoundToNearestInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, RoundToNearestIntegerScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, RoundToNegativeInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, RoundToNegativeInfinityScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, RoundToPositiveInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, RoundToPositiveInfinityScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, RoundToZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, RoundToZeroScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, ShiftLeftLogical, 16, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(X86Base, ShiftLeftLogical128BitLane, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, ShiftRightArithmetic, 16, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(X86Base, ShiftRightLogical, 16, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(X86Base, ShiftRightLogical128BitLane, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, Shuffle, 16, -1, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(X86Base, ShuffleHigh, 16, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(X86Base, ShuffleLow, 16, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(X86Base, Sign, 16, 2, {INS_psignb, INS_invalid, INS_psignw, INS_invalid, INS_psignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, Store, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, StoreFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) -HARDWARE_INTRINSIC(X86Base, StoreHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_movhpd, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base, StoreLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_movlpd, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti32, INS_movnti32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_movss, INS_movsd_simd, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base, Subtract, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, SubtractSaturate, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, SubtractScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(X86Base, SumAbsoluteDifferences, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, TestC, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, TestNotZAndNotC, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, TestZ, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(X86Base, UnpackHigh, 16, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, UnpackLow, 16, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(X86Base, Xor, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_pxord, INS_pxord, INS_xorps, INS_xorpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, Abs, 16, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, Add, 16, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, AddSaturate, 16, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, AddScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, AddSubtract, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, AlignRight, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, And, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_pandd, INS_pandd, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_pandnd, INS_pandnd, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, Average, 16, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, Blend, 16, 3, {INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, BlendVariable, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_blendvps, INS_blendvpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CeilingScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, CompareEqual, 16, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareGreaterThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, CompareScalarEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128UInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_cvtsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si32, INS_cvttsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int16, 16, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32, 16, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int64, 16, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, Crc32, 0, 2, {INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(X86Base, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_idiv, INS_div, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(X86Base, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, DivideScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, DotProduct, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_dppd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, Extract, 16, 2, {INS_pextrb, INS_pextrb, INS_pextrw, INS_pextrw, INS_pextrd, INS_pextrd, INS_invalid, INS_invalid, INS_extractps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, Floor, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, FloorScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, HorizontalAdd, 16, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_phaddw, INS_phaddd, INS_phaddd, INS_invalid, INS_invalid, INS_haddps, INS_haddpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, HorizontalAddSaturate, 16, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, HorizontalSubtract, 16, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, HorizontalSubtractSaturate, 16, 2, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, Insert, 16, 3, {INS_pinsrb, INS_pinsrb, INS_pinsrw, INS_pinsrw, INS_pinsrd, INS_pinsrd, INS_invalid, INS_invalid, INS_insertps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(X86Base, LoadAlignedVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, LoadAlignedVector128NonTemporal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, LoadAndDuplicateToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, LoadDquVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, LoadFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Base, LoadHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_movhpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, LoadLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_movlpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, LoadScalarVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_movss, INS_movsd_simd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, LoadVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(X86Base, MaskMove, 16, 3, {INS_maskmovdqu, INS_maskmovdqu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, Max, 16, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(X86Base, MaxScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_maxsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, MemoryFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Base, Min, 16, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(X86Base, MinHorizontal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_phminposuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, MinScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_minsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, MoveAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, MoveHighAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, MoveHighToLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(X86Base, MoveLowAndDuplicate, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, MoveLowToHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(X86Base, MoveMask, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(X86Base, MultipleSumAbsoluteDifferences, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_mpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, MultiplyAddAdjacent, 16, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(X86Base, MultiplyHigh, 16, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, MultiplyHighRoundScale, 16, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, MultiplyLow, 16, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, MultiplyScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, Or, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_pord, INS_pord, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, PackSignedSaturate, 16, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, PackUnsignedSaturate, 16, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, Pause, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, PopCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base, Prefetch0, 0, 1, {INS_invalid, INS_prefetcht0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, Prefetch1, 0, 1, {INS_invalid, INS_prefetcht1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, Prefetch2, 0, 1, {INS_invalid, INS_prefetcht2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, PrefetchNonTemporal, 0, 1, {INS_invalid, INS_prefetchnta, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, Reciprocal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ReciprocalScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ReciprocalSqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ReciprocalSqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, RoundCurrentDirection, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, RoundCurrentDirectionScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, RoundToNearestInteger, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, RoundToNearestIntegerScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, RoundToNegativeInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, RoundToNegativeInfinityScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, RoundToPositiveInfinity, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, RoundToPositiveInfinityScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, RoundToZero, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, RoundToZeroScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, ShiftLeftLogical, 16, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShiftLeftLogical128BitLane, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, ShiftRightArithmetic, 16, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShiftRightLogical, 16, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShiftRightLogical128BitLane, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, Shuffle, 16, -1, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShuffleHigh, 16, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShuffleLow, 16, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, Sign, 16, 2, {INS_psignb, INS_invalid, INS_psignw, INS_invalid, INS_psignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, Store, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, StoreFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Base, StoreHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_movhpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti32, INS_movnti32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_movss, INS_movsd_simd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, Subtract, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, SubtractSaturate, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, SubtractScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, SumAbsoluteDifferences, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, TestC, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, TestNotZAndNotC, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, TestZ, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, UnpackHigh, 16, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, UnpackLow, 16, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, Xor, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_pxord, INS_pxord, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) #define LAST_NI_X86Base NI_X86Base_Xor // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // 64-bit only Intrinsics for X86Base, SSE, SSE2, SSE3, SSSE3, SSE41, SSE42, POPCNT #define FIRST_NI_X86Base_X64 NI_X86Base_X64_BitScanForward -HARDWARE_INTRINSIC(X86Base_X64, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base_X64, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base_X64, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base_X64, ConvertToInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si64, INS_cvttsd2si64, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base_X64, Crc32, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_crc32, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(X86Base_X64, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(X86Base_X64, Extract, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pextrq, INS_pextrq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base_X64, Insert, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pinsrq, INS_pinsrq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(X86Base_X64, PopCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(X86Base_X64, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti64, INS_movnti64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base_X64, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base_X64, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertToInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si64, INS_cvttsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, Crc32, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_crc32, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(X86Base_X64, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(X86Base_X64, Extract, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pextrq, INS_pextrq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base_X64, Insert, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pinsrq, INS_pinsrq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(X86Base_X64, PopCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti64, INS_movnti64, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) #define LAST_NI_X86Base_X64 NI_X86Base_X64_StoreNonTemporal // Intrinsics for AVX #define FIRST_NI_AVX NI_AVX_Add -HARDWARE_INTRINSIC(AVX, Add, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_addpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX, AddSubtract, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, And, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX, AndNot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, Blend, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, BlendVariable, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vblendvps, INS_vblendvpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_vbroadcastsd, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, BroadcastVector128ToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastf32x4, INS_vbroadcastf32x4, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Ceiling, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Compare, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareGreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareGreaterThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareLessThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareLessThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareNotEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareNotGreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareNotGreaterThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareNotLessThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareNotLessThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareOrdered, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd, INS_invalid}, HW_Category_IMM, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, CompareUnordered, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, ConvertToVector128Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX, Divide, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, DotProduct, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, DuplicateEvenIndexed, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, DuplicateOddIndexed, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, ExtractVector128, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, Floor, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, HorizontalAdd, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_haddps, INS_haddpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, HorizontalSubtract, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, InsertVector128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, LoadAlignedVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, LoadDquVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, LoadVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX, MaskLoad, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, MaskStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, Max, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX, Min, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX, MoveMask, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, Multiply, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_mulpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX, Or, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX, Permute, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_vpermilpd, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX, Permute2x128, 32, 3, {INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, PermuteVar, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_vpermilpdvar, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Reciprocal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, ReciprocalSqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, RoundCurrentDirection, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, RoundToNearestInteger, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, RoundToNegativeInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, RoundToPositiveInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, RoundToZero, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Shuffle, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_shufpd, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX, Sqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Store, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX, StoreAligned, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, StoreAlignedNonTemporal, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, Subtract, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_subpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, TestC, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, TestNotZAndNotC, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, TestZ, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, UnpackHigh, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_unpckhpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, UnpackLow, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_unpcklpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Xor, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AVX, Add, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX, AddSubtract, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, And, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX, AndNot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, Blend, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, BlendVariable, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vblendvps, INS_vblendvpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, BroadcastVector128ToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastf32x4, INS_vbroadcastf32x4}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Ceiling, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Compare, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareGreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareGreaterThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareLessThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareLessThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareNotEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareNotGreaterThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareNotGreaterThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareNotLessThan, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareNotLessThanOrEqual, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareOrdered, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_IMM, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, CompareUnordered, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, ConvertToVector128Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX, Divide, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, DotProduct, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, DuplicateEvenIndexed, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, DuplicateOddIndexed, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, ExtractVector128, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4, INS_vextractf32x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, Floor, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, HorizontalAdd, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_haddps, INS_haddpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, HorizontalSubtract, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, InsertVector128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4, INS_vinsertf32x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, LoadAlignedVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, LoadDquVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, LoadVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX, MaskLoad, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd}, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, MaskStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, Max, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX, Min, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX, MoveMask, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, Multiply, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX, Or, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX, Permute, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX, Permute2x128, 32, 3, {INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, PermuteVar, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_vpermilpdvar}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Reciprocal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, ReciprocalSqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, RoundCurrentDirection, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, RoundToNearestInteger, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, RoundToNegativeInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, RoundToPositiveInfinity, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, RoundToZero, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Shuffle, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX, Sqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Store, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX, StoreAligned, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, StoreAlignedNonTemporal, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, Subtract, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, TestC, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, TestNotZAndNotC, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, TestZ, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX, UnpackHigh, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, UnpackLow, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Xor, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) #define LAST_NI_AVX NI_AVX_Xor // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE #define FIRST_NI_AVX2 NI_AVX2_Abs -HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX2, Add, 32, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, AddSaturate, 32, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, AlignRight, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, And, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_pandd, INS_pandd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, AndNot, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_InvalidNodeId|HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(AVX2, Average, 32, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, BitFieldExtract, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bextr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Blend, -1, 3, {INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_vpblendd, INS_vpblendd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, BlendVariable, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX2, BroadcastScalarToVector128, 16, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_movddup, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX2, BroadcastScalarToVector256, 32, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX2, BroadcastVector128ToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, CompareEqual, 32, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX2, CompareGreaterThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX2, CompareLessThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX2, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int16, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int32, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int64, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX2, ExtractLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, ExtractVector128, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti32x4, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, GatherMaskVector128, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, GatherMaskVector256, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, GatherVector128, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, GatherVector256, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, GetMaskUpToLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsmsk, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(AVX2, HorizontalAdd, 32, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_phaddw, INS_phaddd, INS_phaddd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, HorizontalAddSaturate, 32, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, HorizontalSubtract, 32, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, HorizontalSubtractSaturate, 32, 2, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, InsertVector128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti32x4, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, LeadingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_lzcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX2, LoadAlignedVector256NonTemporal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, MaskLoad, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, MaskStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Max, 32, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, Min, 32, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, MoveMask, 32, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, MultipleSumAbsoluteDifferences, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_mpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Multiply, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, MultiplyAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX2, MultiplyAddAdjacent, 32, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, MultiplyAddNegated, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX2, MultiplyAddNegatedScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX2, MultiplyAddScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX2, MultiplyAddSubtract, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX2, MultiplyHigh, 32, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, MultiplyHighRoundScale, 32, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, MultiplyLow, 32, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, MultiplyNoFlags, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics|HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX2, MultiplySubtract, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX2, MultiplySubtractAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX2, MultiplySubtractNegated, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX2, MultiplySubtractNegatedScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX2, MultiplySubtractScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX2, Or, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_pord, INS_pord, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, PackSignedSaturate, 32, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, PackUnsignedSaturate, 32, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, ParallelBitDeposit, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, ParallelBitExtract, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Permute2x128, 32, 3, {INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Permute4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, PermuteVar8x32, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX2, ResetLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical, 32, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical128BitLane, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, ShiftRightArithmetic, 32, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravd, INS_vpsravd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, ShiftRightLogical, 32, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ShiftRightLogical128BitLane, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, ShiftRightLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, Shuffle, 32, 2, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_MaybeIMM) -HARDWARE_INTRINSIC(AVX2, ShuffleHigh, 32, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ShuffleLow, 32, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, Sign, 32, 2, {INS_psignb, INS_invalid, INS_psignw, INS_invalid, INS_psignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Subtract, 32, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, SubtractSaturate, 32, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, SumAbsoluteDifferences, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, TrailingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_tzcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_pxord, INS_pxord, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, ZeroHighBits, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bzhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX2, Add, 32, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, AddSaturate, 32, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, AlignRight, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, And, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_pandd, INS_pandd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, AndNot, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_InvalidNodeId|HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(AVX2, Average, 32, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, BitFieldExtract, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bextr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, Blend, -1, 3, {INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_vpblendd, INS_vpblendd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, BlendVariable, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX2, BroadcastScalarToVector128, 16, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_movddup}, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(AVX2, BroadcastScalarToVector256, 32, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(AVX2, BroadcastVector128ToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, CompareEqual, 32, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX2, CompareGreaterThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX2, CompareLessThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX2, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int16, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int32, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int64, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(AVX2, ExtractLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, ExtractVector128, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti32x4, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, GatherMaskVector128, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, GatherMaskVector256, 32, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, GatherVector128, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, GatherVector256, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, GetMaskUpToLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsmsk, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(AVX2, HorizontalAdd, 32, 2, {INS_invalid, INS_invalid, INS_phaddw, INS_phaddw, INS_phaddd, INS_phaddd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, HorizontalAddSaturate, 32, 2, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, HorizontalSubtract, 32, 2, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, HorizontalSubtractSaturate, 32, 2, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, InsertVector128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti32x4, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, LeadingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_lzcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2, LoadAlignedVector256NonTemporal, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, MaskLoad, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, MaskStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, Max, 32, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, Min, 32, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, MoveMask, 32, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, MultipleSumAbsoluteDifferences, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_mpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, Multiply, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, MultiplyAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX2, MultiplyAddAdjacent, 32, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, MultiplyAddNegated, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX2, MultiplyAddNegatedScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX2, MultiplyAddScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX2, MultiplyAddSubtract, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX2, MultiplyHigh, 32, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, MultiplyHighRoundScale, 32, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, MultiplyLow, 32, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, MultiplyNoFlags, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics|HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX2, MultiplySubtract, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX2, MultiplySubtractAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX2, MultiplySubtractNegated, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX2, MultiplySubtractNegatedScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX2, MultiplySubtractScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX2, Or, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_pord, INS_pord, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, PackSignedSaturate, 32, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, PackUnsignedSaturate, 32, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, ParallelBitDeposit, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, ParallelBitExtract, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, Permute2x128, 32, 3, {INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, Permute4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, PermuteVar8x32, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX2, ResetLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical, 32, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical128BitLane, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, ShiftRightArithmetic, 32, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravd, INS_vpsravd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, ShiftRightLogical, 32, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ShiftRightLogical128BitLane, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, ShiftRightLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, Shuffle, 32, 2, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_MaybeIMM) +HARDWARE_INTRINSIC(AVX2, ShuffleHigh, 32, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ShuffleLow, 32, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, Sign, 32, 2, {INS_psignb, INS_invalid, INS_psignw, INS_invalid, INS_psignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, Subtract, 32, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, SubtractSaturate, 32, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, SumAbsoluteDifferences, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, TrailingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_tzcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_pxord, INS_pxord, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, ZeroHighBits, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bzhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) #define LAST_NI_AVX2 NI_AVX2_ZeroHighBits // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // 64-bit only Intrinsics for AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE #define FIRST_NI_AVX2_X64 NI_AVX2_X64_AndNot -HARDWARE_INTRINSIC(AVX2_X64, AndNot, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_SpecialImport|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2_X64, BitFieldExtract, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bextr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2_X64, ExtractLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsi, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2_X64, GetMaskUpToLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsmsk, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(AVX2_X64, LeadingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_lzcnt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX2_X64, MultiplyNoFlags, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics|HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX2_X64, ParallelBitDeposit, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2_X64, ParallelBitExtract, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2_X64, ResetLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(AVX2_X64, TrailingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_tzcnt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX2_X64, ZeroHighBits, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bzhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2_X64, AndNot, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andn, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_SpecialImport|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2_X64, BitFieldExtract, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bextr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2_X64, ExtractLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsi, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2_X64, GetMaskUpToLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsmsk, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(AVX2_X64, LeadingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_lzcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2_X64, MultiplyNoFlags, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics|HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX2_X64, ParallelBitDeposit, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2_X64, ParallelBitExtract, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2_X64, ResetLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) +HARDWARE_INTRINSIC(AVX2_X64, TrailingZeroCount, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_tzcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2_X64, ZeroHighBits, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bzhi, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) #define LAST_NI_AVX2_X64 NI_AVX2_X64_ZeroHighBits // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL #define FIRST_NI_AVX512 NI_AVX512_Abs -HARDWARE_INTRINSIC(AVX512, Abs, -1, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, Add, 64, -1, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, AddSaturate, 64, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512, AddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, AlignRight, 64, 3, {INS_palignr, INS_palignr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, AlignRight32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, AlignRight64, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, And, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_vpandq, INS_vpandq, INS_andps, INS_andpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, Average, 64, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512, BlendVariable, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, BroadcastScalarToVector512, 64, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, BroadcastVector128ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti64x2, INS_vbroadcasti64x2, INS_vbroadcastf32x4, INS_vbroadcastf64x2, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, BroadcastVector256ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x8, INS_vbroadcasti32x8, INS_vbroadcasti64x4, INS_vbroadcasti64x4, INS_vbroadcastf32x8, INS_vbroadcastf64x4, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, Classify, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_InvalidNodeId|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ClassifyScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_InvalidNodeId|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, Compare, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareNotEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareNotLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareNotLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareOrdered, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompareUnordered, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, Compress, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, CompressStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_cvtsd2si32, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToUInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi32, INS_vcvtsd2usi32, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToUInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi32, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128ByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128SByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128Single, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Byte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256ByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_cvtpd2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256SByte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256SByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256Single, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_cvtpd2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512Double, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_cvtps2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int32, 64, -1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int64, 64, -1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512Single, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt32, 64, -1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt64, 64, -1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512, DetectConflicts, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, Divide, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, DivideScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, DuplicateEvenIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, DuplicateOddIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, Expand, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, ExpandLoad, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, ExtractVector128, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti64x2, INS_vextracti64x2, INS_vextractf32x4, INS_vextractf64x2, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, ExtractVector256, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti64x4, INS_vextracti64x4, INS_vextractf32x8, INS_vextractf64x4, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, Fixup, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd, INS_invalid}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, FixupScalar, 16, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmss, INS_vfixupimmsd, INS_invalid}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, FusedMultiplyAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddNegated, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddSubtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractNegated, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, GetExponent, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, GetExponentScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpss, INS_vgetexpsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, GetMantissa, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd, INS_invalid}, HW_Category_IMM, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, GetMantissaScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantss, INS_vgetmantsd, INS_invalid}, HW_Category_IMM, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, InsertVector128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti64x2, INS_vinserti64x2, INS_vinsertf32x4, INS_vinsertf64x2, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, InsertVector256, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf32x8, INS_vinsertf64x4, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, LeadingZeroCount, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, LoadAlignedVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, LoadAlignedVector512NonTemporal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, LoadVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, MaskLoad, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, MaskLoadAligned, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512, MaskStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512, MaskStoreAligned, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512, Max, -1, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_vpmaxsq, INS_vpmaxuq, INS_maxps, INS_maxpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX512, Min, -1, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_vpminsq, INS_vpminuq, INS_minps, INS_minpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX512, MoveMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoContainment|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, Multiply, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, MultiplyAddAdjacent, 64, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, MultiplyHigh, 64, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512, MultiplyHighRoundScale, 64, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, MultiplyLow, -1, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512, MultiplyScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, Or, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_vporq, INS_vporq, INS_orps, INS_orpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, PackSignedSaturate, 64, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, PackUnsignedSaturate, 64, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, Permute2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpd, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, Permute4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, Permute4x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, PermuteVar16x16, 32, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512, PermuteVar16x16x2, 32, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar16x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512, PermuteVar16x32x2, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpdvar, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, PermuteVar2x64x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar32x16, 64, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512, PermuteVar32x16x2, 64, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, PermuteVar4x32x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512, PermuteVar4x64x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar8x16 , 16, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512, PermuteVar8x16x2, 16, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar8x32x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, PermuteVar8x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512, PermuteVar8x64x2, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512, Range, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd, INS_invalid}, HW_Category_IMM, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, RangeScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangess, INS_vrangesd, INS_invalid}, HW_Category_IMM, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, Reciprocal14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, Reciprocal14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ss, INS_vrcp14sd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, ReciprocalSqrt14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, ReciprocalSqrt14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ss, INS_vrsqrt14sd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, Reduce, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ReduceScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreducess, INS_vreducesd, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, RotateLeft, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, RotateLeftVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, RotateRight, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, RotateRightVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, RoundScale, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, RoundScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaless, INS_vrndscalesd, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512, Scale, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefss, INS_vscalefsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, ShiftLeftLogical, 64, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ShiftLeftLogical128BitLane, 64, 2, {INS_pslldq, INS_pslldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, ShiftRightArithmetic, -1, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_vpsravd, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, ShiftRightLogical, 64, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ShiftRightLogical128BitLane, 64, 2, {INS_psrldq, INS_psrldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ShiftRightLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, Shuffle, 64, -1, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, Shuffle2x128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, Shuffle4x128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ShuffleHigh, 64, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ShuffleLow, 64, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, Sqrt, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, Store, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, Subtract, 64, -1, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, SubtractSaturate, 64, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, SubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512, SumAbsoluteDifferences, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, SumAbsoluteDifferencesInBlock32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, TernaryLogic, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq, INS_invalid}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512, UnpackHigh, 64, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, UnpackLow, 64, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, Xor, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, Abs, -1, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, Add, 64, -1, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, AddSaturate, 64, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, AddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, AlignRight, 64, 3, {INS_palignr, INS_palignr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, AlignRight32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, AlignRight64, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, And, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_vpandq, INS_vpandq, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, Average, 64, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, BlendVariable, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastScalarToVector512, 64, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastVector128ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti64x2, INS_vbroadcasti64x2, INS_vbroadcastf32x4, INS_vbroadcastf64x2}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastVector256ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x8, INS_vbroadcasti32x8, INS_vbroadcasti64x4, INS_vbroadcasti64x4, INS_vbroadcastf32x8, INS_vbroadcastf64x4}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Classify, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_InvalidNodeId|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ClassifyScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_InvalidNodeId|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Compare, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareOrdered, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareUnordered, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, Compress, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompressStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_cvtsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToUInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi32, INS_vcvtsd2usi32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToUInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128ByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128SByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Single, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Byte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256ByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256SByte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256SByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Single, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Double, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int32, 64, -1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int64, 64, -1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Single, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt32, 64, -1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt64, 64, -1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, DetectConflicts, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Divide, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, DivideScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, DuplicateEvenIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, DuplicateOddIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Expand, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, ExpandLoad, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, ExtractVector128, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti64x2, INS_vextracti64x2, INS_vextractf32x4, INS_vextractf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, ExtractVector256, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti64x4, INS_vextracti64x4, INS_vextractf32x8, INS_vextractf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, Fixup, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, FixupScalar, 16, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmss, INS_vfixupimmsd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddNegated, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddSubtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractNegated, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, GetExponent, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, GetExponentScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpss, INS_vgetexpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, GetMantissa, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, HW_Category_IMM, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, GetMantissaScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantss, INS_vgetmantsd}, HW_Category_IMM, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, InsertVector128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti64x2, INS_vinserti64x2, INS_vinsertf32x4, INS_vinsertf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, InsertVector256, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf32x8, INS_vinsertf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, LeadingZeroCount, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, LoadAlignedVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, LoadAlignedVector512NonTemporal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, LoadVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, MaskLoad, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, MaskLoadAligned, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, MaskStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, MaskStoreAligned, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, Max, -1, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_vpmaxsq, INS_vpmaxuq, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX512, Min, -1, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_vpminsq, INS_vpminuq, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX512, MoveMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoContainment|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, Multiply, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, MultiplyAddAdjacent, 64, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, MultiplyHigh, 64, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, MultiplyHighRoundScale, 64, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, MultiplyLow, -1, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, MultiplyScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, Or, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_vporq, INS_vporq, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, PackSignedSaturate, 64, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, PackUnsignedSaturate, 64, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Permute2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Permute4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Permute4x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x16, 32, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x16x2, 32, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x32x2, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpdvar}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, PermuteVar2x64x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar32x16, 64, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar32x16x2, 64, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x32x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x64x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x16 , 16, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x16x2, 16, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x32x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x64x2, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, Range, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, RangeScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangess, INS_vrangesd}, HW_Category_IMM, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, Reciprocal14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Reciprocal14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ss, INS_vrcp14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, ReciprocalSqrt14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ReciprocalSqrt14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ss, INS_vrsqrt14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, Reduce, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ReduceScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreducess, INS_vreducesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, RotateLeft, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, RotateLeftVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, RotateRight, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, RotateRightVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, RoundScale, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, RoundScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaless, INS_vrndscalesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, Scale, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefss, INS_vscalefsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ShiftLeftLogical, 64, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftLeftLogical128BitLane, 64, 2, {INS_pslldq, INS_pslldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ShiftRightArithmetic, -1, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_vpsravd, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ShiftRightLogical, 64, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftRightLogical128BitLane, 64, 2, {INS_psrldq, INS_psrldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftRightLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Shuffle, 64, -1, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Shuffle2x128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Shuffle4x128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShuffleHigh, 64, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShuffleLow, 64, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Sqrt, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, Store, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, Subtract, 64, -1, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, SubtractSaturate, 64, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, SubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, SumAbsoluteDifferences, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, SumAbsoluteDifferencesInBlock32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, TernaryLogic, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, UnpackHigh, 64, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, UnpackLow, 64, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Xor, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) #define LAST_NI_AVX512 NI_AVX512_Xor // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // 64-bit only Intrinsics for AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL #define FIRST_NI_AVX512_X64 NI_AVX512_X64_ConvertScalarToVector128Double -HARDWARE_INTRINSIC(AVX512_X64, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_vcvtusi2sd64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512_X64, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_vcvtusi2ss64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512_X64, ConvertToInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512_X64, ConvertToUInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi64, INS_vcvtsd2usi64, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi64, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512_X64, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_vcvtusi2sd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_vcvtusi2ss64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertToInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertToUInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi64, INS_vcvtsd2usi64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) #define LAST_NI_AVX512_X64 NI_AVX512_X64_ConvertToUInt64WithTruncation // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for AVX512-IFMA, AVX512-VBMI #define FIRST_NI_AVX512v2 NI_AVX512v2_MultiShift -HARDWARE_INTRINSIC(AVX512v2, MultiShift, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512v2, PermuteVar16x8, 16, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512v2, PermuteVar16x8x2, 16, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512v2, PermuteVar32x8, 32, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512v2, PermuteVar32x8x2, 32, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512v2, PermuteVar64x8, 64, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512v2, PermuteVar64x8x2, 64, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512v2, MultiShift, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512v2, PermuteVar16x8, 16, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512v2, PermuteVar16x8x2, 16, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512v2, PermuteVar32x8, 32, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512v2, PermuteVar32x8x2, 32, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512v2, PermuteVar64x8, 64, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512v2, PermuteVar64x8x2, 64, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) #define LAST_NI_AVX512v2 NI_AVX512v2_PermuteVar64x8x2 // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for AVX512-BITALG, AVX512-VBMI2, AVX512-VPOPCNTDQ #define FIRST_NI_AVX512v3 NI_AVX512v3_Compress -HARDWARE_INTRINSIC(AVX512v3, Compress, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512v3, CompressStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512v3, Expand, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512v3, ExpandLoad, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512v3, Compress, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512v3, CompressStore, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512v3, Expand, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512v3, ExpandLoad, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_InvalidNodeId) #define LAST_NI_AVX512v3 NI_AVX512v3_ExpandLoad -#define FIRST_NI_AVX10v1 NI_AVX10v1_AddScalar -HARDWARE_INTRINSIC(AVX10v1, AddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vaddsh}, HW_Category_SIMDScalar, HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, CompareScalarOrderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcomish}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, CompareScalarOrderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcomish}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, CompareScalarOrderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcomish}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, CompareScalarOrderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcomish}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, CompareScalarOrderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcomish}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, CompareScalarOrderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcomish}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, ConvertScalarToVector128Half, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtsi2sh32, INS_vcvtusi2sh32, INS_vcvtsi2sh64, INS_vcvtusi2sh64, INS_vcvtss2sh, INS_vcvtsd2sh, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtsh2ss}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtsh2sd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtsh2si32}, HW_Category_SIMDScalar, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToUInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtsh2usi32}, HW_Category_SIMDScalar, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtsh2si64}, HW_Category_SIMDScalar, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToUInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtsh2usi64}, HW_Category_SIMDScalar, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, CompareScalarUnorderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vucomish}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, CompareScalarUnorderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vucomish}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vucomish}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, CompareScalarUnorderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vucomish}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, CompareScalarUnorderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vucomish}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, CompareScalarUnorderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vucomish}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, DivideScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vdivsh}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) - -// LAST: come back to fix the overloaded intrinsics - -HARDWARE_INTRINSIC(AVX10v1, FusedMultiplyAddScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213sh}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, MaxScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaxsh}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, MinScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminsh}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, MultiplyScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmulsh}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, ReciprocalScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcpsh}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, ReciprocalSqrtScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrtsh}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, RoundScaleScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscalesh}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, SqrtScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vsqrtsh}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, SubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vsubsh}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -#define LAST_NI_AVX10v1 NI_AVX10v1_SubtractScalar - - // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for AVX10v2 #define FIRST_NI_AVX10v2 NI_AVX10v2_ConvertToByteWithSaturationAndZeroExtendToInt32 -HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2iubs, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2iubs, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, ConvertToInt32WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2sis32, INS_vcvttsd2sis32, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ibs, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2ibs, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, ConvertToUInt32WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usis32, INS_vcvttsd2usis32, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt32WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2dqs, INS_vcvttpd2dqs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt64WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qqs, INS_vcvttpd2qqs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt32WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udqs, INS_vcvttpd2udqs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt64WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqqs, INS_vcvttpd2uqqs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, MinMax, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxps, INS_vminmaxpd, INS_invalid}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, MinMaxScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxss, INS_vminmaxsd, INS_invalid}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_vmovd_simd, INS_vmovd_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX10v2, MultipleSumAbsoluteDifferences, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vmpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v2, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToInt32WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2sis32, INS_vcvttsd2sis32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToUInt32WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usis32, INS_vcvttsd2usis32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt32WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2dqs, INS_vcvttpd2dqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt64WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qqs, INS_vcvttpd2qqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt32WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udqs, INS_vcvttpd2udqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt64WithTruncatedSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqqs, INS_vcvttpd2uqqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, MinMax, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxps, INS_vminmaxpd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, MinMaxScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxss, INS_vminmaxsd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_vmovd_simd, INS_vmovd_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX10v2, MultipleSumAbsoluteDifferences, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vmpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX10v2, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) #define LAST_NI_AVX10v2 NI_AVX10v2_StoreScalar #define FIRST_NI_AVX512BMM NI_AVX512BMM_BitMultiplyMatrix16x16WithOrReduction -HARDWARE_INTRINSIC(AVX512BMM, BitMultiplyMatrix16x16WithOrReduction, -1, -1, {INS_invalid, INS_invalid, INS_vbmacor16x16x16, INS_vbmacor16x16x16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512BMM, BitMultiplyMatrix16x16WithXorReduction, -1, -1, {INS_invalid, INS_invalid, INS_vbmacxor16x16x16, INS_vbmacxor16x16x16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512BMM, ReverseBits, -1, -1, {INS_invalid, INS_vbitrev, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512BMM, BitMultiplyMatrix16x16WithOrReduction, -1, -1, {INS_invalid, INS_invalid, INS_vbmacor16x16x16, INS_vbmacor16x16x16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512BMM, BitMultiplyMatrix16x16WithXorReduction, -1, -1, {INS_invalid, INS_invalid, INS_vbmacxor16x16x16, INS_vbmacxor16x16x16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512BMM, ReverseBits, -1, -1, {INS_invalid, INS_vbitrev, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) #define LAST_NI_AVX512BMM NI_AVX512BMM_ReverseBits // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // 64-bit only intrinsics for AVX10v2 #define FIRST_NI_AVX10v2_X64 NI_AVX10v2_X64_ConvertToInt64WithTruncatedSaturation -HARDWARE_INTRINSIC(AVX10v2_X64, ConvertToInt64WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2sis64, INS_vcvttsd2sis64, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v2_X64, ConvertToUInt64WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usis64, INS_vcvttsd2usis64, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX10v2_X64, ConvertToInt64WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2sis64, INS_vcvttsd2sis64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX10v2_X64, ConvertToUInt64WithTruncatedSaturation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usis64, INS_vcvttsd2usis64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) #define LAST_NI_AVX10v2_X64 NI_AVX10v2_X64_ConvertToUInt64WithTruncatedSaturation // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for AVXVNNI #define FIRST_NI_AVXVNNI NI_AVXVNNI_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) #define LAST_NI_AVXVNNI NI_AVXVNNI_MultiplyWideningAndAddSaturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVXVNNIINT Intrinsics #define FIRST_NI_AVXVNNIINT NI_AVXVNNIINT_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVXVNNIINT, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) #define LAST_NI_AVXVNNIINT NI_AVXVNNIINT_MultiplyWideningAndAddSaturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVXVNNIINT_V512 Intrinsics #define FIRST_NI_AVXVNNIINT_V512 NI_AVXVNNIINT_V512_MultiplyWideningAndAdd -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVXVNNIINT_V512, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) #define LAST_NI_AVXVNNIINT_V512 NI_AVXVNNIINT_V512_MultiplyWideningAndAddSaturate // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for AES, PCLMULQDQ #define FIRST_NI_AES NI_AES_CarrylessMultiply -HARDWARE_INTRINSIC(AES, CarrylessMultiply, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AES, Decrypt, 16, 2, {INS_invalid, INS_aesdec, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AES, DecryptLast, 16, 2, {INS_invalid, INS_aesdeclast, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AES, Encrypt, 16, 2, {INS_invalid, INS_aesenc, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AES, EncryptLast, 16, 2, {INS_invalid, INS_aesenclast, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AES, InverseMixColumns, 16, 1, {INS_invalid, INS_aesimc, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AES, KeygenAssist, 16, 2, {INS_invalid, INS_aeskeygenassist, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AES, CarrylessMultiply, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AES, Decrypt, 16, 2, {INS_invalid, INS_aesdec, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AES, DecryptLast, 16, 2, {INS_invalid, INS_aesdeclast, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AES, Encrypt, 16, 2, {INS_invalid, INS_aesenc, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AES, EncryptLast, 16, 2, {INS_invalid, INS_aesenclast, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AES, InverseMixColumns, 16, 1, {INS_invalid, INS_aesimc, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AES, KeygenAssist, 16, 2, {INS_invalid, INS_aeskeygenassist, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) #define LAST_NI_AES NI_AES_KeygenAssist // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for VAES, VPCLMULQDQ #define FIRST_NI_AES_V256 NI_AES_V256_CarrylessMultiply -HARDWARE_INTRINSIC(AES_V256, CarrylessMultiply, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AES_V256, CarrylessMultiply, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) #define LAST_NI_AES_V256 NI_AES_V256_CarrylessMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX Intrinsics for VAES, VPCLMULQDQ #define FIRST_NI_AES_V512 NI_AES_V512_CarrylessMultiply -HARDWARE_INTRINSIC(AES_V512, CarrylessMultiply, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AES_V512, CarrylessMultiply, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) #define LAST_NI_AES_V512 NI_AES_V512_CarrylessMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512 Intrinsics for X86Serialize #define FIRST_NI_X86Serialize NI_X86Serialize_Serialize -HARDWARE_INTRINSIC(X86Serialize, Serialize, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Serialize, Serialize, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) #define LAST_NI_X86Serialize NI_X86Serialize_Serialize // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Intrinsics for GFNI #define FIRST_NI_GFNI NI_GFNI_GaloisFieldAffineTransform -HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransform, 16, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransformInverse, 16, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(GFNI, GaloisFieldMultiply, 16, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransform, 16, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI, GaloisFieldAffineTransformInverse, 16, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI, GaloisFieldMultiply, 16, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) #define LAST_NI_GFNI NI_GFNI_GaloisFieldMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX Intrinsics for GFNI #define FIRST_NI_GFNI_V256 NI_GFNI_V256_GaloisFieldAffineTransform -HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransform, 32, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransformInverse, 32, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldMultiply, 32, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransform, 32, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldAffineTransformInverse, 32, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI_V256, GaloisFieldMultiply, 32, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) #define LAST_NI_GFNI_V256 NI_GFNI_V256_GaloisFieldMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512 Intrinsics for GFNI #define FIRST_NI_GFNI_V512 NI_GFNI_V512_GaloisFieldAffineTransform -HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransform, 64, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransformInverse, 64, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldMultiply, 64, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransform, 64, 3, {INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldAffineTransformInverse, 64, 3, {INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_gf2p8affineinvqb, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldMultiply, 64, 2, {INS_invalid, INS_gf2p8mulb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) #define LAST_NI_GFNI_V512 NI_GFNI_V512_GaloisFieldMultiply // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE, TYP_HALF} +// ISA Function name SIMD size NumArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Special intrinsics that are generated during lowering -HARDWARE_INTRINSIC(X86Base, COMIS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, PTEST, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(X86Base, UCOMIS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(AVX, PTEST, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, AndNotVector, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_pandnd, INS_pandnd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX2, AndNotScalar, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX512, KORTEST, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX512, KTEST, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX512, PTESTM, 0, 2, {INS_vptestmb, INS_vptestmb, INS_vptestmw, INS_vptestmw, INS_vptestmd, INS_vptestmd, INS_vptestmq, INS_vptestmq, INS_vptestmd, INS_vptestmq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512, PTESTNM, 0, 2, {INS_vptestnmb, INS_vptestnmb, INS_vptestnmw, INS_vptestnmw, INS_vptestnmd, INS_vptestnmd, INS_vptestnmq, INS_vptestnmq, INS_vptestnmd, INS_vptestnmq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) - -HARDWARE_INTRINSIC(AVX512, AddMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, AndMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, AndNotMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, BlendVariableMask, -1, 3, {INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, ClassifyMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfpclassps, INS_vfpclasspd, INS_invalid}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, ClassifyScalarMask, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfpclassss, INS_vfpclasssd, INS_invalid}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512, CompareMask, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd, INS_invalid}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareEqualMask, -1, 2, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AVX512, CompareGreaterThanMask, -1, 2, {INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareGreaterThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareLessThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareLessThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareNotEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareNotLessThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareNotLessThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareOrderedMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompareUnorderedMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, CompressMask, -1, 3, {INS_vpcompressb, INS_vpcompressb, INS_vpcompressw, INS_vpcompressw, INS_vpcompressd, INS_vpcompressd, INS_vpcompressq, INS_vpcompressq, INS_vcompressps, INS_vcompresspd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512, CompressStoreMask, -1, 3, {INS_vpcompressb, INS_vpcompressb, INS_vpcompressw, INS_vpcompressw, INS_vpcompressd, INS_vpcompressd, INS_vpcompressq, INS_vpcompressq, INS_vcompressps, INS_vcompresspd, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, ConvertMaskToVector, -1, 1, {INS_vpmovm2b, INS_vpmovm2b, INS_vpmovm2w, INS_vpmovm2w, INS_vpmovm2d, INS_vpmovm2d, INS_vpmovm2q, INS_vpmovm2q, INS_vpmovm2d, INS_vpmovm2q, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, ConvertVectorToMask, -1, 1, {INS_vpmovb2m, INS_vpmovb2m, INS_vpmovw2m, INS_vpmovw2m, INS_vpmovd2m, INS_vpmovd2m, INS_vpmovq2m, INS_vpmovq2m, INS_vpmovd2m, INS_vpmovq2m, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512, ExpandLoadMask, -1, 3, {INS_vpexpandb, INS_vpexpandb, INS_vpexpandw, INS_vpexpandw, INS_vpexpandd, INS_vpexpandd, INS_vpexpandq, INS_vpexpandq, INS_vexpandps, INS_vexpandpd, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, ExpandMask, -1, 3, {INS_vpexpandb, INS_vpexpandb, INS_vpexpandw, INS_vpexpandw, INS_vpexpandd, INS_vpexpandd, INS_vpexpandq, INS_vpexpandq, INS_vexpandps, INS_vexpandpd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, MaskLoadMask, -1, 3, {INS_vmovdqu8, INS_vmovdqu8, INS_vmovdqu16, INS_vmovdqu16, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, MaskLoadAlignedMask, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512, MaskStoreMask, -1, 3, {INS_vmovdqu8, INS_vmovdqu8, INS_vmovdqu16, INS_vmovdqu16, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512, MaskStoreAlignedMask, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512, NotMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, op_EqualityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, op_InequalityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, OrMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ShiftLeftMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, ShiftRightMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, XorMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512, XnorMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base, COMIS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, PTEST, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, UCOMIS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(AVX, PTEST, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX2, AndNotVector, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_pandnd, INS_pandnd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX2, AndNotScalar, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andn, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX512, KORTEST, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX512, KTEST, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX512, PTESTM, 0, 2, {INS_vptestmb, INS_vptestmb, INS_vptestmw, INS_vptestmw, INS_vptestmd, INS_vptestmd, INS_vptestmq, INS_vptestmq, INS_vptestmd, INS_vptestmq}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, PTESTNM, 0, 2, {INS_vptestnmb, INS_vptestnmb, INS_vptestnmw, INS_vptestnmw, INS_vptestnmd, INS_vptestnmd, INS_vptestnmq, INS_vptestnmq, INS_vptestnmd, INS_vptestnmq}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX10v1, VCOMISH, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcomish}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(AVX10v1, VUCOMISH, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vucomish}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(AVX512, AddMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, AndMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, AndNotMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, BlendVariableMask, -1, 3, {INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ClassifyMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfpclassps, INS_vfpclasspd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ClassifyScalarMask, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfpclassss, INS_vfpclasssd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, CompareMask, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareEqualMask, -1, 2, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThanMask, -1, 2, {INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareLessThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareLessThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareOrderedMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareUnorderedMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompressMask, -1, 3, {INS_vpcompressb, INS_vpcompressb, INS_vpcompressw, INS_vpcompressw, INS_vpcompressd, INS_vpcompressd, INS_vpcompressq, INS_vpcompressq, INS_vcompressps, INS_vcompresspd}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, CompressStoreMask, -1, 3, {INS_vpcompressb, INS_vpcompressb, INS_vpcompressw, INS_vpcompressw, INS_vpcompressd, INS_vpcompressd, INS_vpcompressq, INS_vpcompressq, INS_vcompressps, INS_vcompresspd}, HW_Category_MemoryStore, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ConvertMaskToVector, -1, 1, {INS_vpmovm2b, INS_vpmovm2b, INS_vpmovm2w, INS_vpmovm2w, INS_vpmovm2d, INS_vpmovm2d, INS_vpmovm2q, INS_vpmovm2q, INS_vpmovm2d, INS_vpmovm2q}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, ConvertVectorToMask, -1, 1, {INS_vpmovb2m, INS_vpmovb2m, INS_vpmovw2m, INS_vpmovw2m, INS_vpmovd2m, INS_vpmovd2m, INS_vpmovq2m, INS_vpmovq2m, INS_vpmovd2m, INS_vpmovq2m}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, ExpandLoadMask, -1, 3, {INS_vpexpandb, INS_vpexpandb, INS_vpexpandw, INS_vpexpandw, INS_vpexpandd, INS_vpexpandd, INS_vpexpandq, INS_vpexpandq, INS_vexpandps, INS_vexpandpd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ExpandMask, -1, 3, {INS_vpexpandb, INS_vpexpandb, INS_vpexpandw, INS_vpexpandw, INS_vpexpandd, INS_vpexpandd, INS_vpexpandq, INS_vpexpandq, INS_vexpandps, INS_vexpandpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, MaskLoadMask, -1, 3, {INS_vmovdqu8, INS_vmovdqu8, INS_vmovdqu16, INS_vmovdqu16, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, MaskLoadAlignedMask, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, MaskStoreMask, -1, 3, {INS_vmovdqu8, INS_vmovdqu8, INS_vmovdqu16, INS_vmovdqu16, INS_movdqu32, INS_movdqu32, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, MaskStoreAlignedMask, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, NotMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, op_EqualityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, op_InequalityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, OrMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ShiftLeftMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ShiftRightMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, XorMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, XnorMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) #endif // FEATURE_HW_INTRINSIC #undef HARDWARE_INTRINSIC diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index e4c836f1e29268..bf542265421835 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -1173,7 +1173,7 @@ GenTree* Compiler::impGetNodeAddr(GenTree* val, // Normalizing the type involves examining the struct type to determine if it should // be modified to one that is handled specially by the JIT, possibly being a candidate // for full enregistration, e.g. TYP_SIMD16. If the size of the struct is already known -// call structSizeMightRepresentAcceleratedType to determine if this api needs to be called. +// call structSizeMightRepresentSIMDType to determine if this api needs to be called. // var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd, var_types* pSimdBaseJitType) { @@ -1185,39 +1185,22 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd, var_types* const DWORD structFlags = info.compCompHnd->getClassAttribs(structHnd); // Don't bother if the struct contains GC references of byrefs, it can't be a SIMD type. - if ((structFlags & (CORINFO_FLG_CONTAINS_GC_PTR | CORINFO_FLG_BYREF_LIKE)) == 0 && - (structFlags & CORINFO_FLG_INTRINSIC_TYPE) != 0) + if ((structFlags & (CORINFO_FLG_CONTAINS_GC_PTR | CORINFO_FLG_BYREF_LIKE)) == 0) { unsigned originalSize = info.compCompHnd->getClassSize(structHnd); - if (structSizeMightRepresentAcceleratedType(originalSize)) + if (structSizeMightRepresentSIMDType(originalSize)) { unsigned int sizeBytes; var_types simdBaseType = getBaseTypeAndSizeOfSIMDType(structHnd, &sizeBytes); - if (simdBaseType != TYP_UNDEF) { assert(sizeBytes == originalSize || sizeBytes == SIZE_UNKNOWN); - if (sizeBytes < getMinVectorByteLength()) + structType = getSIMDTypeForSize(sizeBytes); + if (pSimdBaseJitType != nullptr) { - // The struct itself is accelerated, in this case, it is `Half`. - // If future types are added, this assert will need to be removed. - assert(sizeBytes == 2); - structType = simdBaseType; - if (pSimdBaseJitType != nullptr) - { - *pSimdBaseJitType = TYP_UNDEF; - } + *pSimdBaseJitType = simdBaseType; } - else - { - structType = getSIMDTypeForSize(sizeBytes); - if (pSimdBaseJitType != nullptr) - { - *pSimdBaseJitType = simdBaseType; - } - } - // Also indicate that we use floating point registers. compFloatingPointUsed = true; } diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index e9baa2ff8f9bc6..ee5a882cd9bcf6 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -1888,7 +1888,7 @@ GenTree* Compiler::impFixupCallStructReturn(GenTreeCall* call, CORINFO_CLASS_HAN var_types simdReturnType = impNormStructType(call->gtRetClsHnd); if (simdReturnType != call->TypeGet()) { - assert(varTypeIsStructPrimitive(simdReturnType)); + assert(varTypeIsSIMD(simdReturnType)); JITDUMP("changing the type of a call [%06u] from %s to %s\n", dspTreeID(call), varTypeName(call->TypeGet()), varTypeName(simdReturnType)); call->ChangeType(simdReturnType); @@ -4421,284 +4421,6 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, break; } - - case NI_System_Half_FusedMultiplyAdd: - { -#if defined(TARGET_XARCH) - if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - // We are constructing a chain of intrinsics similar to: - // return FMA.MultiplyAddScalar( - // Vector128.CreateScalarUnsafe(x), - // Vector128.CreateScalarUnsafe(y), - // Vector128.CreateScalarUnsafe(z) - // ).ToScalar(); - - GenTree* op3 = impPopStack().val; - GenTree* op2 = impPopStack().val; - GenTree* op1 = impPopStack().val; - assert(op1->TypeGet() == TYP_HALF && op2->TypeGet() == TYP_HALF && op3->TypeGet() == TYP_HALF); - - op3 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op3, TYP_HALF, 16); - op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, TYP_HALF, 16); - op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, TYP_HALF, 16); - - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_AVX10v1_FusedMultiplyAddScalar, - TYP_HALF, 16); - - retNode = gtNewSimdToScalarNode(TYP_HALF, retNode, TYP_HALF, 16); - } -#endif - - break; - } - - case NI_System_Half_op_Addition: - case NI_System_Half_op_Subtraction: - case NI_System_Half_op_Multiply: - case NI_System_Half_op_Division: - case NI_System_Half_Max: - case NI_System_Half_Min: - { -#ifdef TARGET_XARCH - if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - GenTree* op2 = impPopStack().val; - GenTree* op1 = impPopStack().val; - assert(op1->TypeGet() == TYP_HALF); - assert(op2->TypeGet() == TYP_HALF); - - NamedIntrinsic opId = lookupHalfIntrinsic(ni); - assert(opId != NI_Illegal); - - op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, TYP_HALF, 16); - op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, TYP_HALF, 16); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, opId, TYP_HALF, 16); - retNode = gtNewSimdToScalarNode(TYP_HALF, retNode, TYP_HALF, 16); - } -#endif - break; - } - - case NI_System_Half_Sqrt: - case NI_System_Half_ReciprocalEstimate: - case NI_System_Half_ReciprocalSqrtEstimate: - { -#ifdef TARGET_XARCH - if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - GenTree* op1 = impPopStack().val; - assert(op1->TypeGet() == TYP_HALF); - - NamedIntrinsic opId = lookupHalfIntrinsic(ni); - assert(opId != NI_Illegal); - - GenTree* op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, gtNewDconNodeF(0.0f), TYP_FLOAT, 16); - op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, TYP_HALF, 16); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, opId, TYP_HALF, 16); - retNode = gtNewSimdToScalarNode(TYP_HALF, retNode, TYP_HALF, 16); - } -#endif - break; - } - - case NI_System_Half_Round: - case NI_System_Half_Ceiling: - case NI_System_Half_Floor: - case NI_System_Half_Truncate: - { -#ifdef TARGET_XARCH - if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - if (sig->numArgs == 1) - { - // todo-xarch-half: We only optimize `Round(Half)` for now, and leave the other `Round` overload - // signatures for later. - GenTree* op1 = impPopStack().val; - assert(op1->TypeGet() == TYP_HALF); - - GenTree* op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, gtNewDconNodeF(0.0f), TYP_FLOAT, 16); - op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, TYP_HALF, 16); - - int halfRoundingMode = lookupHalfRoundingMode(ni); - - retNode = - gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, gtNewIconNode(halfRoundingMode, TYP_INT), - NI_AVX10v1_RoundScaleScalar, TYP_HALF, 16); - retNode = gtNewSimdToScalarNode(TYP_HALF, retNode, TYP_HALF, 16); - } - } -#endif - break; - } - - case NI_System_Half_op_Increment: - case NI_System_Half_op_Decrement: - { -#ifdef TARGET_XARCH - if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - GenTree* op1 = impPopStack().val; - assert(op1->TypeGet() == TYP_HALF); - - NamedIntrinsic opId = lookupHalfIntrinsic(ni); - assert(opId != NI_Illegal); - - GenTree* onevec = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, gtNewDconNodeF(1.0f), TYP_FLOAT, 16); - GenTree* op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, gtNewDconNodeF(0.0f), TYP_FLOAT, 16); - onevec = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, onevec, NI_AVX10v1_ConvertScalarToVector128Half, - TYP_FLOAT, 16); - - op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, TYP_HALF, 16); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, onevec, opId, TYP_HALF, 16); - retNode = gtNewSimdToScalarNode(TYP_HALF, retNode, TYP_HALF, 16); - } -#endif - break; - } - - case NI_System_Half_op_Explicit: - { -#ifdef TARGET_XARCH - if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - GenTree* op1 = impPopStack().val; - - CORINFO_ARG_LIST_HANDLE args = sig->args; - CORINFO_CLASS_HANDLE op1ClsHnd; - CorInfoType baseJitType = strip(info.compCompHnd->getArgType(sig, args, &op1ClsHnd)); - var_types fromType = JitType2PreciseVarType(baseJitType); - if (fromType == TYP_STRUCT) - { - fromType = impNormStructType(op1ClsHnd); - } - - var_types toType = JitType2PreciseVarType(sig->retType); - if (toType == TYP_STRUCT) - { - toType = impNormStructType(sig->retTypeClass); - } - - NamedIntrinsic opId = lookupHalfConversionIntrinsic(fromType, toType); - if (opId == NI_Illegal) - { - break; - } - - if (toType == TYP_HALF) - { - GenTree* zeroVec = - gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, gtNewDconNodeF(0.0f), TYP_HALF, 16); - if (varTypeIsFloating(fromType)) - { - op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, fromType, 16); - } - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, zeroVec, op1, opId, fromType, 16); - retNode = gtNewSimdToScalarNode(TYP_HALF, retNode, TYP_HALF, 16); - } - else - { - if (varTypeIsFloating(toType)) - { - GenTree* zeroVec = - gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, gtNewDconNodeF(0.0f), fromType, 16); - op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, fromType, 16); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, zeroVec, op1, opId, fromType, 16); - retNode = gtNewSimdToScalarNode(toType, retNode, toType, 16); - } - else - { - op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, fromType, 16); - retNode = gtNewSimdHWIntrinsicNode(JITtype2varType(sig->retType), op1, opId, fromType, 16); - } - } - - break; - } -#endif - break; - } - - case NI_System_Half_op_GreaterThan: - case NI_System_Half_op_GreaterThanOrEqual: - case NI_System_Half_op_LessThan: - case NI_System_Half_op_LessThanOrEqual: - case NI_System_Half_op_Equality: - case NI_System_Half_op_Inequality: - { -#ifdef TARGET_XARCH - if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - NamedIntrinsic id = lookupHalfIntrinsic(ni); - assert(id != NI_Illegal); - - GenTree* op2 = impPopStack().val; - GenTree* op1 = impPopStack().val; - - assert(op1->TypeGet() == TYP_HALF); - assert(op2->TypeGet() == TYP_HALF); - - op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, TYP_HALF, 16); - op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, TYP_HALF, 16); - - retNode = gtNewSimdHWIntrinsicNode(TYP_INT, op1, op2, id, TYP_HALF, 16); - break; - } -#endif - break; - } - - case NI_System_Half_get_MaxValue: - case NI_System_Half_get_MinValue: - case NI_System_Half_get_Epsilon: - case NI_System_Half_get_NaN: - case NI_System_Half_get_PositiveInfinity: - case NI_System_Half_get_NegativeInfinity: - case NI_System_Half_get_One: - case NI_System_Half_get_Zero: - { -#ifdef TARGET_XARCH - if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - uint16_t halfBits = 0; - - switch (ni) - { - case NI_System_Half_get_MaxValue: - halfBits = 0x7BFF; // 65504 - break; - case NI_System_Half_get_MinValue: - halfBits = 0xFBFF; // -65504 - break; - case NI_System_Half_get_Epsilon: - halfBits = 0x0001; // ~5.9604645e-08 (smallest positive subnormal) - break; - case NI_System_Half_get_NaN: - halfBits = 0xFE00; // Negative NaN - break; - case NI_System_Half_get_PositiveInfinity: - halfBits = 0x7C00; // +Infinity - break; - case NI_System_Half_get_NegativeInfinity: - halfBits = 0xFC00; // -Infinity - break; - case NI_System_Half_get_One: - halfBits = 0x3C00; // 1.0 - break; - case NI_System_Half_get_Zero: - halfBits = 0x0000; // 0.0 - break; - default: - noway_assert(!"Unknown Half static property"); - } - - retNode = gtNewSimdCreateScalarNode(TYP_SIMD16, gtNewIconNode(halfBits, TYP_INT), TYP_USHORT, 16); - retNode = gtNewSimdToScalarNode(TYP_HALF, retNode, TYP_HALF, 16); - } -#endif - break; - } - #endif // FEATURE_HW_INTRINSICS case NI_System_Math_Abs: @@ -6779,14 +6501,6 @@ void Compiler::impPopCallArgs(CORINFO_SIG_INFO* sig, GenTreeCall* call) var_types jitSigType = JITtype2varType(params[i - 1].CorType); CORINFO_CLASS_HANDLE classHnd = params[i - 1].ClassHandle; -#if defined(TARGET_XARCH) - if (jitSigType == TYP_STRUCT) - { - var_types normSigType = impNormStructType(classHnd); - jitSigType = (normSigType == TYP_HALF) ? TYP_HALF : jitSigType; - } -#endif - if (!impCheckImplicitArgumentCoercion(jitSigType, argNode->TypeGet())) { BADCODE("the call argument has a type that can't be implicitly converted to the signature type"); @@ -7790,15 +7504,7 @@ bool Compiler::isCompatibleMethodGDV(GenTreeCall* call, CORINFO_METHOD_HANDLE gd CORINFO_CLASS_HANDLE classHnd = NO_CLASS_HANDLE; CorInfoType corType = strip(info.compCompHnd->getArgType(&sig, sigParam, &classHnd)); - - var_types sigType = JITtype2varType(corType); -#if defined(TARGET_XARCH) - if (sigType == TYP_STRUCT) - { - var_types normSigType = impNormStructType(classHnd); - sigType = (normSigType == TYP_HALF) ? TYP_HALF : sigType; - } -#endif + var_types sigType = JITtype2varType(corType); if (!impCheckImplicitArgumentCoercion(sigType, arg.GetNode()->TypeGet())) { @@ -10736,141 +10442,6 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) break; } - case 'H': - { - if (strcmp(className, "Half") == 0) - { - if (strcmp(methodName, "op_Addition") == 0) - { - result = NI_System_Half_op_Addition; - } - else if (strcmp(methodName, "op_Decrement") == 0) - { - result = NI_System_Half_op_Decrement; - } - else if (strcmp(methodName, "op_Division") == 0) - { - result = NI_System_Half_op_Division; - } - else if (strcmp(methodName, "op_Equality") == 0) - { - result = NI_System_Half_op_Equality; - } - else if (strcmp(methodName, "op_Explicit") == 0) - { - result = NI_System_Half_op_Explicit; - } - else if (strcmp(methodName, "op_GreaterThan") == 0) - { - result = NI_System_Half_op_GreaterThan; - } - else if (strcmp(methodName, "op_GreaterThanOrEqual") == 0) - { - result = NI_System_Half_op_GreaterThanOrEqual; - } - else if (strcmp(methodName, "op_Inequality") == 0) - { - result = NI_System_Half_op_Inequality; - } - else if (strcmp(methodName, "op_Increment") == 0) - { - result = NI_System_Half_op_Increment; - } - else if (strcmp(methodName, "op_LessThan") == 0) - { - result = NI_System_Half_op_LessThan; - } - else if (strcmp(methodName, "op_LessThanOrEqual") == 0) - { - result = NI_System_Half_op_LessThanOrEqual; - } - else if (strcmp(methodName, "Max") == 0) - { - result = NI_System_Half_Max; - } - else if (strcmp(methodName, "Min") == 0) - { - result = NI_System_Half_Min; - } - else if (strcmp(methodName, "Round") == 0) - { - result = NI_System_Half_Round; - } - else if (strcmp(methodName, "op_Multiply") == 0) - { - result = NI_System_Half_op_Multiply; - } - else if (strcmp(methodName, "ReciprocalEstimate") == 0) - { - result = NI_System_Half_ReciprocalEstimate; - } - else if (strcmp(methodName, "ReciprocalSqrtEstimate") == 0) - { - result = NI_System_Half_ReciprocalSqrtEstimate; - } - else if (strcmp(methodName, "FusedMultiplyAdd") == 0) - { - result = NI_System_Half_FusedMultiplyAdd; - } - else if (strcmp(methodName, "op_Subtraction") == 0) - { - result = NI_System_Half_op_Subtraction; - } - else if (strcmp(methodName, "Sqrt") == 0) - { - result = NI_System_Half_Sqrt; - } - else if (strcmp(methodName, "get_MinValue") == 0) - { - result = NI_System_Half_get_MinValue; - } - else if (strcmp(methodName, "get_MaxValue") == 0) - { - result = NI_System_Half_get_MaxValue; - } - else if (strcmp(methodName, "get_Epsilon") == 0) - { - result = NI_System_Half_get_Epsilon; - } - else if (strcmp(methodName, "get_NaN") == 0) - { - result = NI_System_Half_get_NaN; - } - else if (strcmp(methodName, "get_PositiveInfinity") == 0) - { - result = NI_System_Half_get_PositiveInfinity; - } - else if (strcmp(methodName, "get_NegativeInfinity") == 0) - { - result = NI_System_Half_get_NegativeInfinity; - } - else if (strcmp(methodName, "get_One") == 0) - { - result = NI_System_Half_get_One; - } - else if (strcmp(methodName, "get_Zero") == 0) - { - result = NI_System_Half_get_Zero; - } - - else if (strcmp(methodName, "Ceiling") == 0) - { - result = NI_System_Half_Ceiling; - } - else if (strcmp(methodName, "Floor") == 0) - { - result = NI_System_Half_Floor; - } - else if (strcmp(methodName, "Truncate") == 0) - { - result = NI_System_Half_Truncate; - } - - break; - } - break; - } - case 'G': { if (strcmp(className, "GC") == 0) @@ -12438,160 +12009,3 @@ GenTree* Compiler::impKeepAliveIntrinsic(GenTree* objToKeepAlive) return gtNewKeepAliveNode(objToKeepAlive); } - -NamedIntrinsic Compiler::lookupHalfIntrinsic(NamedIntrinsic ni) -{ -#ifdef FEATURE_HW_INTRINSICS -#if defined(TARGET_XARCH) - assert(compOpportunisticallyDependsOn(InstructionSet_AVX10v1)); - - switch (ni) - { - case NI_System_Half_op_Addition: - return NI_AVX10v1_AddScalar; - case NI_System_Half_op_Subtraction: - return NI_AVX10v1_SubtractScalar; - case NI_System_Half_Sqrt: - return NI_AVX10v1_SqrtScalar; - - case NI_System_Half_Max: - return NI_AVX10v1_MaxScalar; - - case NI_System_Half_Min: - return NI_AVX10v1_MinScalar; - - case NI_System_Half_ReciprocalEstimate: - return NI_AVX10v1_ReciprocalScalar; - - case NI_System_Half_ReciprocalSqrtEstimate: - return NI_AVX10v1_ReciprocalSqrtScalar; - - case NI_System_Half_FusedMultiplyAdd: - return NI_AVX10v1_FusedMultiplyAddScalar; - - case NI_System_Half_op_Multiply: - return NI_AVX10v1_MultiplyScalar; - case NI_System_Half_op_Division: - return NI_AVX10v1_DivideScalar; - case NI_System_Half_op_GreaterThan: - return NI_AVX10v1_CompareScalarOrderedGreaterThan; - case NI_System_Half_op_GreaterThanOrEqual: - return NI_AVX10v1_CompareScalarOrderedGreaterThanOrEqual; - case NI_System_Half_op_LessThan: - return NI_AVX10v1_CompareScalarOrderedLessThan; - case NI_System_Half_op_LessThanOrEqual: - return NI_AVX10v1_CompareScalarOrderedLessThanOrEqual; - case NI_System_Half_op_Equality: - return NI_AVX10v1_CompareScalarOrderedEqual; - case NI_System_Half_op_Inequality: - return NI_AVX10v1_CompareScalarOrderedNotEqual; - - case NI_System_Half_Round: - case NI_System_Half_Ceiling: - case NI_System_Half_Floor: - case NI_System_Half_Truncate: - return NI_AVX10v1_RoundScaleScalar; - - default: - break; - } -#endif -#endif - return NI_Illegal; -} - -int Compiler::lookupHalfRoundingMode(NamedIntrinsic ni) -{ -#ifdef FEATURE_HW_INTRINSICS -#if defined(TARGET_XARCH) - switch (ni) - { - case NI_System_Half_Round: - return 0; // Round to nearest - case NI_System_Half_Ceiling: - return 1; // Round towards +infinity - case NI_System_Half_Floor: - return 2; // Round towards -infinity - case NI_System_Half_Truncate: - return 3; // Round towards zero - default: - noway_assert(!"Should have one of the above Half intrinsics"); - return -1; - } -#endif -#endif - return -1; -} - -NamedIntrinsic Compiler::lookupHalfConversionIntrinsic(var_types fromType, var_types toType) -{ -#ifdef FEATURE_HW_INTRINSICS -#if defined(TARGET_XARCH) - assert(compOpportunisticallyDependsOn(InstructionSet_AVX10v1)); - - switch (toType) - { - case TYP_HALF: - switch (fromType) - { - case TYP_FLOAT: - case TYP_DOUBLE: - case TYP_LONG: - case TYP_ULONG: - case TYP_INT: - case TYP_UINT: - return NI_AVX10v1_ConvertScalarToVector128Half; - default: - return NI_Illegal; - } - break; - case TYP_FLOAT: - if (fromType == TYP_HALF) - { - return NI_AVX10v1_ConvertScalarToVector128Single; - } - break; - - case TYP_DOUBLE: - if (fromType == TYP_HALF) - { - return NI_AVX10v1_ConvertScalarToVector128Double; - } - break; - - case TYP_INT: - if (fromType == TYP_HALF) - { - return NI_AVX10v1_ConvertToInt32; - } - break; - - case TYP_LONG: - if (fromType == TYP_HALF) - { - return NI_AVX10v1_ConvertToInt64; - } - break; - - case TYP_UINT: - if (fromType == TYP_HALF) - { - return NI_AVX10v1_ConvertToUInt32; - } - break; - - case TYP_ULONG: - if (fromType == TYP_HALF) - { - return NI_AVX10v1_ConvertToUInt64; - } - break; - - default: - break; - } - -#endif -#endif - return NI_Illegal; -} diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index bbb7f41e1a3c9c..8e7451a9bc1a86 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -2220,12 +2220,7 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* #if defined(TARGET_XARCH) unsigned srcSize = genTypeSize(srcType); - if (srcSize == 2) - { - assert(srcType == TYP_HALF); - return INS_vmovsh; - } - else if (srcSize == 4) + if (srcSize == 4) { return INS_movss; } @@ -2313,10 +2308,6 @@ instruction CodeGen::ins_Copy(var_types dstType) assert(varTypeUsesFloatReg(dstType)); #if defined(TARGET_XARCH) - if (dstType == TYP_HALF) - { - return INS_vmovsh; - } return INS_movaps; #elif defined(TARGET_ARM64) if (varTypeIsSIMD(dstType)) @@ -2586,12 +2577,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false #if defined(TARGET_XARCH) unsigned dstSize = genTypeSize(dstType); - if (dstSize == 2) - { - assert(dstType == TYP_HALF); - return INS_vmovsh; - } - else if (dstSize == 4) + if (dstSize == 4) { return INS_movss; } diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 4c594d65ced1bf..ce5f1f63320b67 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -477,13 +477,13 @@ INST3(aeskeygenassist, "vaeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, INST3(pclmulqdq, "vpclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), 7C, 1C, INS_TT_FULL_MEM, KMask_Base1 | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_FLAGS_HasPseudoName) // Perform a carry-less multiplication of two quadwords // Instructions for SHA -INST3(sha1msg1, "sha1msg1", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xC9), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Perform an Intermediate Calculation for the Next Four SHA1 Message Dwords -INST3(sha1msg2, "sha1msg2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCA), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Perform a Final Calculation for the Next Four SHA1 Message Dwords -INST3(sha1nexte, "sha1nexte", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xC8), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Calculate SHA1 State Variable E After Four Rounds -INST3(sha1rnds4, "sha1rnds4", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0xCC), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Perform Four Rounds of SHA1 Operation -INST3(sha256msg1, "sha256msg1", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCC), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Perform an Intermediate Calculation for the Next Four SHA256 Message Dwords -INST3(sha256msg2, "sha256msg2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCD), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Perform a Final Calculation for the Next Four SHA256 Message Dwords -INST3(sha256rnds2, "sha256rnds2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCB), ILLEGAL, ILLEGAL, INS_TT_FULL, REX_WIG) // Perform Two Rounds of SHA256 Operation +INST3(sha1msg1, "sha1msg1", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xC9), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG) // Perform an Intermediate Calculation for the Next Four SHA1 Message Dwords +INST3(sha1msg2, "sha1msg2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCA), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG) // Perform a Final Calculation for the Next Four SHA1 Message Dwords +INST3(sha1nexte, "sha1nexte", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xC8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG) // Calculate SHA1 State Variable E After Four Rounds +INST3(sha1rnds4, "sha1rnds4", IUM_RW, BAD_CODE, BAD_CODE, SSE3A(0xCC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG) // Perform Four Rounds of SHA1 Operation +INST3(sha256msg1, "sha256msg1", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCC), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG) // Perform an Intermediate Calculation for the Next Four SHA256 Message Dwords +INST3(sha256msg2, "sha256msg2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCD), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG) // Perform a Final Calculation for the Next Four SHA256 Message Dwords +INST3(sha256rnds2, "sha256rnds2", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xCB), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, REX_WIG) // Perform Two Rounds of SHA256 Operation // Instructions for GFNI INST3(gf2p8affineinvqb, "vgf2p8affineinvqb",IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xCF), 5C, 2X, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Galois Field Affine Transformation Inverse @@ -986,7 +986,7 @@ INST3(vpshufbitqmb, "vpshufbitqmb", IUM_WR, BAD_CODE, BAD_ // Instructions for AVX512-BF16, AVX512-FP16 INST3(vaddph, "vaddph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x58), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Add Packed FP16 Values -INST3(vaddsh, "vaddsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x58), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Add Scalar FP16 Values +INST3(vaddsh, "vaddsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x58), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Add Scalar FP16 Values INST3(vcmpph, "vcmpph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0xC2), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compare Packed FP16 Values INST3(vcmpsh, "vcmpsh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0xF3, 0xC2), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compare Scalar FP16 Values INST3(vcomish, "vcomish", IUM_RD, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x2F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Compare Scalar Ordered FP16 Values and Set EFLAGS @@ -1004,16 +1004,16 @@ INST3(vcvtph2uw, "vcvtph2uw", IUM_WR, BAD_CODE, BAD_ INST3(vcvtph2w, "vcvtph2w", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x7D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed FP16 Values to Packed Signed WORD Integers INST3(vcvtps2phx, "vcvtps2phx", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x1D), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values INST3(vcvtqq2ph, "vcvtqq2ph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5B), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Convert Packed Signed QWORD Integers to Packed FP16 Values -INST3(vcvtsd2sh, "vcvtsd2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x5A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Convert Scalar Double Precision FP Value to Scalar FP16 Value -INST3(vcvtsh2sd, "vcvtsh2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Convert Scalar FP16 Value to Scalar Double Precision FP Value +INST3(vcvtsd2sh, "vcvtsd2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x5A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar Double Precision FP Value to Scalar FP16 Value +INST3(vcvtsh2sd, "vcvtsh2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Double Precision FP Value INST3(vcvtsh2si32, "vcvtsh2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Signed DWORD Integer INST3(vcvtsh2si64, "vcvtsh2si", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Signed QWORD Integer -INST3(vcvtsh2ss, "vcvtsh2ss", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x06, 0x13), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Convert Scalar FP16 Value to Scalar Single Precision FP Value +INST3(vcvtsh2ss, "vcvtsh2ss", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x06, 0x13), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Single Precision FP Value INST3(vcvtsh2usi32, "vcvtsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x79), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Unsigned DWORD Integer INST3(vcvtsh2usi64, "vcvtsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x79), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar FP16 Value to Scalar Unsigned QWORD Integer -INST3(vcvtsi2sh32, "vcvtsi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Convert Scalar Signed DWORD Integer to Scalar FP16 Value -INST3(vcvtsi2sh64, "vcvtsi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Convert Scalar Signed QWORD Integer to Scalar FP16 Value -INST3(vcvtss2sh, "vcvtss2sh", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x1D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Convert Scalar Single Precision FP Value to Scalar FP16 Value +INST3(vcvtsi2sh32, "vcvtsi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar Signed DWORD Integer to Scalar FP16 Value +INST3(vcvtsi2sh64, "vcvtsi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x2A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar Signed QWORD Integer to Scalar FP16 Value +INST3(vcvtss2sh, "vcvtss2sh", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x1D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar Single Precision FP Value to Scalar FP16 Value INST3(vcvttph2dq, "vcvttph2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5B), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Signed DWORD Integers INST3(vcvttph2qq, "vcvttph2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x05, 0x7A), ILLEGAL, ILLEGAL, INS_TT_QUARTER_MEM, Input_16Bit | KMask_Base2 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Signed QWORD Integers INST3(vcvttph2udq, "vcvttph2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x78), ILLEGAL, ILLEGAL, INS_TT_HALF_MEM, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert with Truncation Packed FP16 Values to Packed Unsigned DWORD Integers @@ -1026,24 +1026,21 @@ INST3(vcvttsh2usi32, "vcvttsh2usi", IUM_WR, BAD_CODE, BAD_ INST3(vcvttsh2usi64, "vcvttsh2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x78), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert with Truncation Scalar FP16 Value to Scalar Unsigned QWORD Integer INST3(vcvtudq2ph, "vcvtudq2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x7A), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values INST3(vcvtuqq2ph, "vcvtuqq2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x7A), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values -INST3(vcvtusi2sh32, "vcvtusi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x7B), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Convert Scalar Unsigned DWORD Integer to Scalar FP16 Value -INST3(vcvtusi2sh64, "vcvtusi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x7B), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Convert Scalar Unsigned QWORD Integer to Scalar FP16 Value +INST3(vcvtusi2sh32, "vcvtusi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x7B), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Convert Scalar Unsigned DWORD Integer to Scalar FP16 Value +INST3(vcvtusi2sh64, "vcvtusi2sh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x7B), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_64Bit | KMask_Base1 | REX_W1 | Encoding_EVEX) // Convert Scalar Unsigned QWORD Integer to Scalar FP16 Value INST3(vcvtuw2ph, "vcvtuw2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x05, 0x7D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values INST3(vcvtw2ph, "vcvtw2ph", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x7D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Packed Single Precision FP Values to Packed FP16 Values INST3(vdivph, "vdivph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5E), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Divide Packed FP16 Values -INST3(vdivsh, "vdivsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Divide Scalar FP16 Values +INST3(vdivsh, "vdivsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Divide Scalar FP16 Values INST3(vdpbf16ps, "vdpbf16ps", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x52), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_16Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Dot Product of BF16 Pairs Accumulated Into Packed Single Precision INST3(vfcmaddcph, "vfcmaddcph", IUM_RW, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0x56), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Packed FP16 Values INST3(vfcmaddcsh, "vfcmaddcsh", IUM_RW, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0x57), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Scalar FP16 Values INST3(vfcmulcph, "vfcmulcph", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0xD6), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply Packed FP16 Values INST3(vfcmulcsh, "vfcmulcsh", IUM_WR, BAD_CODE, BAD_CODE, SSEDBLMAP(0x06, 0xD7), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_32Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Complex Multiply Scalar FP16 Values - -#define FIRST_AVX10V1_FMA_INSTR INS_vfmadd132ph - INST3(vfmadd132ph, "vvfmadd132ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x98), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Packed FP16 Values INST3(vfmadd132sh, "vvfmadd132sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x99), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Scalar FP16 Values -INST3(vfmadd213ph, "vvfmadd213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed FP16 Values -INST3(vfmadd213sh, "vvfmadd213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA9), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar FP16 Values +INST3(vfmadd213ph, "vvfmadd213ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Packed FP16 Values +INST3(vfmadd213sh, "vvfmadd213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xA9), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Scalar FP16 Values INST3(vfmadd231ph, "vvfmadd231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xB8), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Packed FP16 Values INST3(vfmadd231sh, "vvfmadd231sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xB9), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Multiply-Add of Scalar FP16 Values INST3(vfmaddcph, "vfmaddcph", IUM_RW, BAD_CODE, BAD_CODE, SSEFLTMAP(0x06, 0x56), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Complex Multiply and Accumulate Packed FP16 Values @@ -1074,9 +1071,6 @@ INST3(vfnmsub213ph, "vvfnmsub213ph", IUM_RW, BAD_CODE, BAD_ INST3(vfnmsub213sh, "vvfnmsub213sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xAF), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Scalar FP16 Values INST3(vfnmsub231ph, "vvfnmsub231ph", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBE), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Packed FP16 Values INST3(vfnmsub231sh, "vvfnmsub231sh", IUM_RW, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0xBF), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Fused Negative Multiply-Subtract of Scalar FP16 Values - -#define LAST_AVX10V1_FMA_INSTR INS_vfnmsub231sh - INST3(vfpclassph, "vfpclassph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x66), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Test Types of Packed FP16 Values INST3(vfpclasssh, "vfpclasssh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x67), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Test Types of Scalar FP16 Values INST3(vgetexpph, "vgetexpph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x42), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Convert Exponents of Packed FP16 Values to FP16 Values @@ -1084,27 +1078,27 @@ INST3(vgetexpsh, "vgetexpsh", IUM_WR, BAD_CODE, BAD_ INST3(vgetmantph, "vgetmantph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x26), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Extract Normalized Mantissas from Packed FP16 Values INST3(vgetmantsh, "vgetmantsh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x27), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Extract Normalized Mantissas from Scalar FP16 Values INST3(vmaxph, "vmaxph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5F), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Return Maximum of Packed FP16 Values -INST3(vmaxsh, "vmaxsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Maximum of Scalar FP16 Values -INST3(vminph, "vminph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Return Minimum of Packed FP16 Values -INST3(vminsh, "vminsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Return Minimum of Scalar FP16 Values -INST3(vmovsh, "vmovsh", IUM_WR, SSEFLTMAP(0x05, 0x11), BAD_CODE, SSEFLTMAP(0x05, 0x10), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Move Scalar FP16 Value +INST3(vmaxsh, "vmaxsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Return Maximum of Scalar FP16 Values +INST3(vminph, "vminph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x00, 0x5D), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Return Minimum of Packed FP16 Values +INST3(vminsh, "vminsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x00, 0x5D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Return Minimum of Scalar FP16 Values +INST3(vmovsh, "vmovsh", IUM_WR, SSEFLTMAP(0x00, 0x11), BAD_CODE, SSEFLTMAP(0x00, 0x10), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Move Scalar FP16 Value INST3(vmovw, "vmovw", IUM_WR, PCKDBLMAP(0x06, 0x7E), BAD_CODE, PCKDBLMAP(0x00, 0x6E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_WIG | Encoding_EVEX) // Move Word INST3(vmulph, "vmulph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x59), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Multiply Packed FP16 Values -INST3(vmulsh, "vmulsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x59), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply Scalar FP16 Values +INST3(vmulsh, "vmulsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x59), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Multiply Scalar FP16 Values INST3(vrcpph, "vrcpph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Packed FP16 Values -INST3(vrcpsh, "vrcpsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute REciprocals of Scalar FP16 Values +INST3(vrcpsh, "vrcpsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Scalar FP16 Values INST3(vreduceph, "vreduceph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x56), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Perform Reduction Transformation on Packed FP16 Values INST3(vreducesh, "vreducesh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x57), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Perform Reduction Transformation on Scalar FP16 Values INST3(vrndscaleph, "vrndscaleph", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x08), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Round Packed FP16 Values to Include a Given Number of Fraction Bits -INST3(vrndscalesh, "vrndscalesh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x0A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round Scalar FP16 Values to Include a Given Number of Fraction Bits +INST3(vrndscalesh, "vrndscalesh", IUM_WR, BAD_CODE, BAD_CODE, PSSE3A(0x00, 0x0A), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Round Scalar FP16 Values to Include a Given Number of Fraction Bits INST3(vrsqrtph, "vrsqrtph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4E), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Square Roots of Packed FP16 Values -INST3(vrsqrtsh, "vrsqrtsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute REciprocals of Square Roots of Scalar FP16 Values +INST3(vrsqrtsh, "vrsqrtsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x4F), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compute REciprocals of Square Roots of Scalar FP16 Values INST3(vscalefph, "vscalefph", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x2C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Scale Packed FP16 Values with FP16 Values INST3(vscalefsh, "vscalefsh", IUM_WR, BAD_CODE, BAD_CODE, PCKDBLMAP(0x06, 0x2D), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Scale Scalar FP16 Values with FP16 Values INST3(vsqrtph, "vsqrtph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x51), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Compute Square Root of Packed FP16 Values -INST3(vsqrtsh, "vsqrtsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x51), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute Square Root of Scalar FP16 Values +INST3(vsqrtsh, "vsqrtsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x51), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Compute Square Root of Scalar FP16 Values INST3(vsubph, "vsubph", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x5C), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | KMask_Base8 | REX_W0 | Encoding_EVEX) // Subtract Packed FP16 Values -INST3(vsubsh, "vsubsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Subtract Scalar FP16 Values +INST3(vsubsh, "vsubsh", IUM_WR, BAD_CODE, BAD_CODE, SSEFLTMAP(0x05, 0x5C), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | KMask_Base1 | REX_W0 | Encoding_EVEX) // Subtract Scalar FP16 Values INST3(vucomish, "vucomish", IUM_RD, BAD_CODE, BAD_CODE, PCKFLTMAP(0x05, 0x2E), ILLEGAL, ILLEGAL, INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_EVEX) // Compare Scalar Unordered FP16 Values and Set EFLAGS // AVX512-VP2INTERSECT @@ -1143,7 +1137,6 @@ INST3(vmovw_simd, "vmovw", IUM_WR, SSEFLTMAP(0x05, 0x7E), BAD_ INST3(vmpsadbw, "vmpsadbw", IUM_WR, BAD_CODE, BAD_CODE, AVX3A(0x42), 4C, 2C, INS_TT_FULL_MEM, KMask_Base8 | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference INST3(vucomxsd, "vucomxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of double precision floating point values and set flags INST3(vucomxss, "vucomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2E), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of single precision floating point values and set flags - #define LAST_AVX512_INSTRUCTION INS_vucomxss diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 4c7a21f0e2bcc4..c53e8a69078cc4 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1580,7 +1580,7 @@ var_types Compiler::StructPromotionHelper::TryPromoteValueClassAsPrimitive(CORIN // We will only promote fields of SIMD types that fit into a SIMD register. if (simdBaseType != TYP_UNDEF) { - if (m_compiler->structSizeMightRepresentAcceleratedType(simdSize)) + if (m_compiler->structSizeMightRepresentSIMDType(simdSize)) { return m_compiler->getSIMDTypeForSize(simdSize); } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 22c4e945872696..e66cf2cbfebb20 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -6230,7 +6230,7 @@ void Lowering::LowerCallStruct(GenTreeCall* call) case GT_STOREIND: #ifdef FEATURE_SIMD - if (varTypeIsSIMD(user) || user->TypeIs(TYP_HALF)) + if (varTypeIsSIMD(user)) { user->ChangeType(returnType); break; @@ -11917,7 +11917,7 @@ void Lowering::TryRetypingFloatingPointStoreToIntegerStore(GenTree* store) { assert(store->OperIsStore()); - if (!varTypeIsFloating(store) || store->TypeIs(TYP_HALF)) + if (!varTypeIsFloating(store)) { return; } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 27e20499e121b8..8fbed3c79d41d3 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -1337,8 +1337,6 @@ void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIn { case NI_X86Base_COMIS: case NI_X86Base_UCOMIS: - case NI_AVX10v1_VCOMISH: - case NI_AVX10v1_VUCOMISH: // In some cases we can generate better code if we swap the operands: // - If the condition is not one of the "preferred" floating point conditions we can swap // the operands and change the condition to avoid generating an extra JP/JNP branch. @@ -2794,43 +2792,6 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } break; } - case NI_AVX10v1_CompareScalarOrderedEqual: - LowerHWIntrinsicCC(node, NI_AVX10v1_VCOMISH, GenCondition::FEQ); - break; - case NI_AVX10v1_CompareScalarOrderedNotEqual: - LowerHWIntrinsicCC(node, NI_AVX10v1_VCOMISH, GenCondition::FNEU); - break; - case NI_AVX10v1_CompareScalarOrderedLessThan: - LowerHWIntrinsicCC(node, NI_AVX10v1_VCOMISH, GenCondition::FLT); - break; - case NI_AVX10v1_CompareScalarOrderedLessThanOrEqual: - LowerHWIntrinsicCC(node, NI_AVX10v1_VCOMISH, GenCondition::FLE); - break; - case NI_AVX10v1_CompareScalarOrderedGreaterThan: - LowerHWIntrinsicCC(node, NI_AVX10v1_VCOMISH, GenCondition::FGT); - break; - case NI_AVX10v1_CompareScalarOrderedGreaterThanOrEqual: - LowerHWIntrinsicCC(node, NI_AVX10v1_VCOMISH, GenCondition::FGE); - break; - - case NI_AVX10v1_CompareScalarUnorderedEqual: - LowerHWIntrinsicCC(node, NI_AVX10v1_VUCOMISH, GenCondition::FEQ); - break; - case NI_AVX10v1_CompareScalarUnorderedNotEqual: - LowerHWIntrinsicCC(node, NI_AVX10v1_VUCOMISH, GenCondition::FNEU); - break; - case NI_AVX10v1_CompareScalarUnorderedLessThan: - LowerHWIntrinsicCC(node, NI_AVX10v1_VUCOMISH, GenCondition::FLT); - break; - case NI_AVX10v1_CompareScalarUnorderedLessThanOrEqual: - LowerHWIntrinsicCC(node, NI_AVX10v1_VUCOMISH, GenCondition::FLE); - break; - case NI_AVX10v1_CompareScalarUnorderedGreaterThan: - LowerHWIntrinsicCC(node, NI_AVX10v1_VUCOMISH, GenCondition::FGT); - break; - case NI_AVX10v1_CompareScalarUnorderedGreaterThanOrEqual: - LowerHWIntrinsicCC(node, NI_AVX10v1_VUCOMISH, GenCondition::FGE); - break; default: break; @@ -6344,7 +6305,7 @@ GenTree* Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) assert(HWIntrinsicInfo::IsVectorToScalar(intrinsicId)); assert(varTypeIsSIMD(simdType)); - assert(varTypeIsArithmetic(simdBaseType) || TypeGet(simdBaseType) == TYP_HALF); + assert(varTypeIsArithmetic(simdBaseType)); assert(simdSize != 0); GenTree* op1 = node->Op(1); @@ -9834,7 +9795,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX512_GetMantissaScalar: case NI_AVX512_RoundScaleScalar: case NI_AVX512_ReduceScalar: - case NI_AVX10v1_RoundScaleScalar: { // These intrinsics have both 2 and 3-operand overloads. // @@ -10515,7 +10475,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AES_CarrylessMultiply: case NI_AES_V256_CarrylessMultiply: case NI_AES_V512_CarrylessMultiply: - case NI_AVX10v1_RoundScaleScalar: case NI_AVX10v2_MinMax: case NI_AVX10v2_MinMaxScalar: case NI_AVX10v2_MultipleSumAbsoluteDifferences: diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 653bace93d8078..e6e9a6a67ee26b 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -4382,7 +4382,6 @@ int LinearScan::BuildReturn(GenTree* tree) case TYP_VOID: useCandidates = RBM_NONE; break; - case TYP_HALF: case TYP_FLOAT: #ifdef TARGET_X86 useCandidates = RBM_FLOATRET; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index af50fb5f873d6a..947e838fcd799a 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2503,7 +2503,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_AVX512_FusedMultiplySubtractAdd: case NI_AVX512_FusedMultiplySubtractNegated: case NI_AVX512_FusedMultiplySubtractNegatedScalar: - case NI_AVX10v1_FusedMultiplyAddScalar: { assert((numArgs == 3) || (intrinsicTree->OperIsEmbRoundingEnabled())); assert(isRMW); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index a307224f46242a..4f370aae0883d5 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -2926,7 +2926,7 @@ GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr) } #ifdef FEATURE_SIMD - if (varTypeIsStruct(elemTyp) && structSizeMightRepresentAcceleratedType(elemSize)) + if (varTypeIsStruct(elemTyp) && structSizeMightRepresentSIMDType(elemSize)) { elemTyp = impNormStructType(elemStructType); } @@ -14974,7 +14974,7 @@ PhaseStatus Compiler::fgPromoteStructs() } tooManyLocalsReported = true; } - else if (varTypeIsStruct(varDsc) && TypeGet(varDsc) != TYP_HALF) + else if (varTypeIsStruct(varDsc)) { assert(structPromotionHelper != nullptr); promotedVar = structPromotionHelper->TryPromoteStructVar(lclNum); diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index fd46de8f2caf95..5856f644e20e89 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -168,45 +168,10 @@ enum NamedIntrinsic : unsigned short NI_System_Numerics_Intrinsic, NI_System_Runtime_Intrinsics_Intrinsic, - NI_System_Half_FusedMultiplyAdd, - NI_System_Half_Max, - NI_System_Half_Min, - NI_System_Half_ReciprocalEstimate, - NI_System_Half_ReciprocalSqrtEstimate, - NI_System_Half_Round, - NI_System_Half_Sqrt, - - NI_System_Half_op_Addition, - NI_System_Half_op_Decrement, - NI_System_Half_op_Division, - NI_System_Half_op_Equality, - NI_System_Half_op_Explicit, - NI_System_Half_op_GreaterThan, - NI_System_Half_op_GreaterThanOrEqual, - NI_System_Half_op_Increment, - NI_System_Half_op_Inequality, - NI_System_Half_op_LessThan, - NI_System_Half_op_LessThanOrEqual, - NI_System_Half_op_Multiply, - NI_System_Half_op_Subtraction, - - NI_System_Half_get_MinValue, - NI_System_Half_get_MaxValue, - NI_System_Half_get_Epsilon, - NI_System_Half_get_NaN, - NI_System_Half_get_PositiveInfinity, - NI_System_Half_get_NegativeInfinity, - NI_System_Half_get_One, - NI_System_Half_get_Zero, - - NI_System_Half_Ceiling, - NI_System_Half_Floor, - NI_System_Half_Truncate, - #ifdef FEATURE_HW_INTRINSICS NI_HW_INTRINSIC_START, #if defined(TARGET_XARCH) -#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, category, flag) \ +#define HARDWARE_INTRINSIC(isa, name, size, numarg, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ NI_##isa##_##name, #include "hwintrinsiclistxarch.h" #elif defined(TARGET_ARM64) diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp index b035519f69d456..315bedffc55a4a 100644 --- a/src/coreclr/jit/regalloc.cpp +++ b/src/coreclr/jit/regalloc.cpp @@ -433,7 +433,6 @@ bool RegAllocImpl::isRegCandidate(LclVarDsc* varDsc) switch (genActualType(varDsc->TypeGet())) { - case TYP_HALF: case TYP_FLOAT: case TYP_DOUBLE: return !compiler->opts.compDbgCode; diff --git a/src/coreclr/jit/registeropswasm.cpp b/src/coreclr/jit/registeropswasm.cpp index d69d0f3a541917..8a2dff3fa2113a 100644 --- a/src/coreclr/jit/registeropswasm.cpp +++ b/src/coreclr/jit/registeropswasm.cpp @@ -79,7 +79,6 @@ WasmValueType TypeToWasmValueType(var_types type) WasmValueType::Invalid, // TYP_ULONG, WasmValueType::F32, // TYP_FLOAT, WasmValueType::F64, // TYP_DOUBLE, - WasmValueType::Invalid, // TYP_HALF, WasmValueType::I, // TYP_REF, WasmValueType::I, // TYP_BYREF, WasmValueType::Invalid, // TYP_STRUCT @@ -120,7 +119,6 @@ WasmValueType ActualTypeToWasmValueType(var_types type) WasmValueType::Invalid, // TYP_ULONG, WasmValueType::F32, // TYP_FLOAT, WasmValueType::F64, // TYP_DOUBLE, - WasmValueType::Invalid, // TYP_HALF, WasmValueType::I, // TYP_REF, WasmValueType::I, // TYP_BYREF, WasmValueType::Invalid, // TYP_STRUCT diff --git a/src/coreclr/jit/scopeinfo.cpp b/src/coreclr/jit/scopeinfo.cpp index a24434998e5421..8d173971d84849 100644 --- a/src/coreclr/jit/scopeinfo.cpp +++ b/src/coreclr/jit/scopeinfo.cpp @@ -297,9 +297,6 @@ void CodeGenInterface::siVarLoc::siFillStackVarLoc( case TYP_SIMD64: #endif // TARGET_XARCH #endif // FEATURE_SIMD -#if defined(TARGET_XARCH) - case TYP_HALF: -#endif #ifdef TARGET_64BIT case TYP_LONG: case TYP_DOUBLE: @@ -410,7 +407,6 @@ void CodeGenInterface::siVarLoc::siFillRegisterVarLoc( #endif // !TARGET_64BIT #ifdef TARGET_64BIT - case TYP_HALF: case TYP_FLOAT: case TYP_DOUBLE: // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15, @@ -420,7 +416,7 @@ void CodeGenInterface::siVarLoc::siFillRegisterVarLoc( break; #else // !TARGET_64BIT - case TYP_HALF: + case TYP_FLOAT: case TYP_DOUBLE: if (isFloatRegType(type)) diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 1c87864b0930e8..66a65af1403865 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -174,7 +174,7 @@ var_types Compiler::getBaseTypeForPrimitiveNumericClass(CORINFO_CLASS_HANDLE cls // sizeBytes if non-null is set to size in bytes. // // Notes: -// If the size of the struct is already known call structSizeMightRepresentAcceleratedType +// If the size of the struct is already known call structSizeMightRepresentSIMDType // to determine if this api needs to be called. // // The type handle passed here can only be used in a subset of JIT-EE calls @@ -352,40 +352,6 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u break; } - default: - { - return TYP_UNDEF; - } - } - } - else if (strcmp(namespaceName, "System") == 0) - { - switch (className[0]) - { - case 'H': - { - if (strcmp(className, "Half") != 0) - { - return TYP_UNDEF; - } - - JITDUMP(" Known type Half\n"); - -#if defined(TARGET_XARCH) - if (!compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - // We must treat as a regular struct if AVX isn't supported - return TYP_UNDEF; - } - - simdBaseType = TYP_HALF; - size = genTypeSize(TYP_HALF); - break; -#else - return TYP_UNDEF; -#endif - } - default: { return TYP_UNDEF; diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 9fc4b0bc4600f3..43999e89edcebf 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -4,12 +4,6 @@ #ifndef _SIMD_H_ #define _SIMD_H_ -#include "float16.h" - -#ifndef SIZE_UNKNOWN -#define SIZE_UNKNOWN UINT8_MAX -#endif - template static bool ElementsAreSame(const T* array, size_t size) { @@ -148,18 +142,17 @@ struct simd16_t { union { - float16_t f16[8]; - float f32[4]; - double f64[2]; - int8_t i8[16]; - int16_t i16[8]; - int32_t i32[4]; - int64_t i64[2]; - uint8_t u8[16]; - uint16_t u16[8]; - uint32_t u32[4]; - uint64_t u64[2]; - simd8_t v64[2]; + float f32[4]; + double f64[2]; + int8_t i8[16]; + int16_t i16[8]; + int32_t i32[4]; + int64_t i64[2]; + uint8_t u8[16]; + uint16_t u16[8]; + uint32_t u32[4]; + uint64_t u64[2]; + simd8_t v64[2]; }; bool operator==(const simd16_t& other) const @@ -204,19 +197,18 @@ struct simd32_t { union { - float16_t f16[16]; - float f32[8]; - double f64[4]; - int8_t i8[32]; - int16_t i16[16]; - int32_t i32[8]; - int64_t i64[4]; - uint8_t u8[32]; - uint16_t u16[16]; - uint32_t u32[8]; - uint64_t u64[4]; - simd8_t v64[4]; - simd16_t v128[2]; + float f32[8]; + double f64[4]; + int8_t i8[32]; + int16_t i16[16]; + int32_t i32[8]; + int64_t i64[4]; + uint8_t u8[32]; + uint16_t u16[16]; + uint32_t u32[8]; + uint64_t u64[4]; + simd8_t v64[4]; + simd16_t v128[2]; }; bool operator==(const simd32_t& other) const @@ -260,20 +252,19 @@ struct simd64_t { union { - float16_t f16[32]; - float f32[16]; - double f64[8]; - int8_t i8[64]; - int16_t i16[32]; - int32_t i32[16]; - int64_t i64[8]; - uint8_t u8[64]; - uint16_t u16[32]; - uint32_t u32[16]; - uint64_t u64[8]; - simd8_t v64[8]; - simd16_t v128[4]; - simd32_t v256[2]; + float f32[16]; + double f64[8]; + int8_t i8[64]; + int16_t i16[32]; + int32_t i32[16]; + int64_t i64[8]; + uint8_t u8[64]; + uint16_t u16[32]; + uint32_t u32[16]; + uint64_t u64[8]; + simd8_t v64[8]; + simd16_t v128[4]; + simd32_t v256[2]; }; bool operator==(const simd64_t& other) const diff --git a/src/coreclr/jit/typelist.h b/src/coreclr/jit/typelist.h index c6428d9b31df49..b2cf2ffa1390f4 100644 --- a/src/coreclr/jit/typelist.h +++ b/src/coreclr/jit/typelist.h @@ -54,8 +54,6 @@ DEF_TP(ULONG ,"ulong" , TYP_LONG, 8,EPS,EPS, 2, 8, VTR_INT, available DEF_TP(FLOAT ,"float" , TYP_FLOAT, 4, 4, 4, 1, 4, VTR_FLOAT, availableFloatRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_FLT) DEF_TP(DOUBLE ,"double" , TYP_DOUBLE, 8, 8, 8, 2, 8, VTR_FLOAT, availableDoubleRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_FLT) -DEF_TP(HALF ,"half" , TYP_HALF, 2, 2, 2, 1, 2, VTR_FLOAT, availableFloatRegs, RBM_FLT_CALLEE_SAVED, RBM_FLT_CALLEE_TRASH, VTF_S) - DEF_TP(REF ,"ref" , TYP_REF, PS,GCS,GCS, PST,PS,VTR_INT, availableIntRegs, RBM_INT_CALLEE_SAVED, RBM_INT_CALLEE_TRASH, VTF_ANY|VTF_GCR|VTF_I) DEF_TP(BYREF ,"byref" , TYP_BYREF, PS,BRS,BRS, PST,PS,VTR_INT, availableIntRegs, RBM_INT_CALLEE_SAVED, RBM_INT_CALLEE_TRASH, VTF_ANY|VTF_BYR|VTF_I) DEF_TP(STRUCT ,"struct" , TYP_STRUCT, 0, 0, 0, 1, 4, VTR_INT, availableIntRegs, RBM_INT_CALLEE_SAVED, RBM_INT_CALLEE_TRASH, VTF_S) diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index f19d170b9e063e..f1380f35a176c0 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -2186,89 +2186,6 @@ uint64_t FloatingPointUtils::convertDoubleToUInt64(double d) return (uint64_t)d; } -static constexpr uint64_t DBL_SIGN_MASK = 0x8000000000000000ULL; -static constexpr uint64_t DBL_EXP_MASK = 0x7FF0000000000000ULL; -static constexpr uint64_t DBL_MANTISSA_MASK = 0x000FFFFFFFFFFFFFULL; -static constexpr int DBL_EXP_SHIFT = 52; -static constexpr int DBL_MAX_BIASED_EXP = 0x7FF; - -static constexpr uint16_t HALF_SIGN_MASK = 0x8000; -static constexpr uint16_t HALF_EXP_MASK = 0x7C00; - -static constexpr uint64_t HALF_POSITIVE_INFINITY_BITS = 0x7C00; -static constexpr uint64_t HALF_NEGATIVE_INFINITY_BITS = 0xFC00; - -static constexpr int16_t HALF_EXP_BIAS_ADJUST = 0x3F1; - -static constexpr uint16_t IMPLICIT_BIT = 0x4000; // Leading 1 for normalized numbers - -static uint64_t shiftRightJam(uint64_t l, int32_t dist) -{ - return dist < 63 ? (l >> dist) | (l << (-dist & 63) != 0 ? 1UL : 0UL) : (l != 0 ? 1UL : 0UL); -} - -static float16_t roundPackToHalf(bool sign, int16_t exp, uint16_t sig) -{ - const int RoundIncrement = 0x8; - int roundBits = sig & 0xF; - - if ((uint16_t)exp >= 0x1D) - { - if (exp < 0) - { - sig = (uint16_t)shiftRightJam(sig, -exp); - exp = 0; - roundBits = sig & 0xF; - } - else if (exp > 0x1D || sig + RoundIncrement >= 0x8000) // Overflow - { - return sign ? HALF_NEGATIVE_INFINITY_BITS : HALF_POSITIVE_INFINITY_BITS; - } - } - - sig = (uint16_t)((sig + RoundIncrement) >> 4); - sig &= (uint16_t) ~(((roundBits ^ 8) != 0 ? 0 : 1) & 1); - - if (sig == 0) - { - exp = 0; - } - - return (((sign ? 1 : 0) << 15) + (exp << 10) + sig); -} - -float16_t FloatingPointUtils::convertDoubleToFloat16(double value) -{ - uint64_t doubleInt; - memcpy(&doubleInt, &value, sizeof(double)); - - bool sign = (doubleInt & DBL_SIGN_MASK) != 0; - int exp = static_cast((doubleInt & DBL_EXP_MASK) >> DBL_EXP_SHIFT); - uint64_t sig = doubleInt & DBL_MANTISSA_MASK; - - // Handle NaN / Infinity - if (exp == DBL_MAX_BIASED_EXP) - { - if (sig != 0) - { - const uint16_t NaNBits = HALF_EXP_MASK | 0x200; // Most significant significand bit - uint16_t signInt = (sign ? 1U : 0U) << 15; - uint16_t sigInt = (uint16_t)((sig << 12) >> 54); - return signInt | NaNBits | sigInt; - } - return (sign ? HALF_NEGATIVE_INFINITY_BITS : HALF_POSITIVE_INFINITY_BITS); - } - - uint32_t sigHalf = static_cast(shiftRightJam(sig, 38)); - if ((exp | (int32_t)sigHalf) == 0) - { - return (sign ? HALF_SIGN_MASK : 0); - } - - return roundPackToHalf(sign, static_cast(exp - HALF_EXP_BIAS_ADJUST), - static_cast(sigHalf | IMPLICIT_BIT)); -} - //------------------------------------------------------------------------ // convertToDouble: Convert a single to a double with platform independent // preservation of payload bits. @@ -3301,22 +3218,6 @@ uint64_t BitOperations::DoubleToUInt64Bits(double value) return result; } -//------------------------------------------------------------------------ -// BitOperations::HalfToUInt16Bits: Gets the underlying bits for a half-precision floating-point value. -// -// Arguments: -// value - The number to convert -// -// Return Value: -// The underlying bits for value. -// -uint16_t BitOperations::HalfToUInt16Bits(float16_t value) -{ - uint16_t result; - memcpy(&result, &value, sizeof(float16_t)); - return result; -} - //------------------------------------------------------------------------ // BitOperations::LeadingZeroCount: Count the number of leading zero bits in a mask. // @@ -3668,22 +3569,6 @@ uint32_t BitOperations::TrailingZeroCount(uint64_t value) #endif } -//------------------------------------------------------------------------ -// BitOperations::UInt16BitsToHalf: Gets a half-precision floating-point from its underlying bit value. -// -// Arguments: -// value - The underlying bit value. -// -// Return Value: -// The half-precision floating-point from value. -// -float16_t BitOperations::UInt16BitsToHalf(uint16_t value) -{ - float16_t result; - memcpy(&result, &value, sizeof(uint16_t)); - return result; -} - //------------------------------------------------------------------------ // BitOperations::UInt32BitsToSingle: Gets a single-precision floating-point from its underlying bit value. // diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index be4ca4638dfdee..38c819e0672896 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -21,7 +21,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "hostallocator.h" #include "cycletimer.h" #include "vartypesdef.h" -#include "float16.h" // Needed for unreached() #include "error.h" @@ -843,8 +842,6 @@ class FloatingPointUtils static uint64_t convertDoubleToUInt64(double d); - static float16_t convertDoubleToFloat16(double d); - static double convertToDouble(float f); static float convertToSingle(double d); @@ -993,8 +990,6 @@ class BitOperations static uint64_t DoubleToUInt64Bits(double value); - static uint16_t HalfToUInt16Bits(float16_t value); - static uint32_t LeadingZeroCount(uint32_t value); static uint32_t LeadingZeroCount(uint64_t value); @@ -1025,8 +1020,6 @@ class BitOperations static uint32_t TrailingZeroCount(uint64_t value); - static float16_t UInt16BitsToHalf(uint16_t value); - static float UInt32BitsToSingle(uint32_t value); static double UInt64BitsToDouble(uint64_t value); diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 7e0e50688cd267..3d76fd6333b04f 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -437,7 +437,6 @@ ValueNumStore::ValueNumStore(Compiler* comp, CompAllocator alloc) , m_handleMap(nullptr) , m_embeddedToCompileTimeHandleMap(alloc) , m_fieldAddressToFieldSeqMap(alloc) - , m_halfCnsMap(nullptr) , m_floatCnsMap(nullptr) , m_doubleCnsMap(nullptr) , m_byrefCnsMap(nullptr) @@ -1671,9 +1670,6 @@ ValueNumStore::Chunk::Chunk(CompAllocator alloc, ValueNum* pNextBaseVN, var_type case TYP_INT: m_defs = new (alloc) Alloc::Type[ChunkSize]; break; - case TYP_HALF: - m_defs = new (alloc) Alloc::Type[ChunkSize]; - break; case TYP_FLOAT: m_defs = new (alloc) Alloc::Type[ChunkSize]; break; @@ -1862,11 +1858,6 @@ ValueNum ValueNumStore::VNForLongCon(INT64 cnsVal) return VnForConst(cnsVal, GetLongCnsMap(), TYP_LONG); } -ValueNum ValueNumStore::VNForHalfCon(float16_t cnsVal) -{ - return VnForConst(cnsVal, GetHalfCnsMap(), TYP_HALF); -} - ValueNum ValueNumStore::VNForFloatCon(float cnsVal) { return VnForConst(cnsVal, GetFloatCnsMap(), TYP_FLOAT); @@ -2140,8 +2131,6 @@ ValueNum ValueNumStore::VNZeroForType(var_types typ) case TYP_LONG: case TYP_ULONG: return VNForLongCon(0); - case TYP_HALF: - return VNForHalfCon(static_cast(0)); case TYP_FLOAT: return VNForFloatCon(0.0f); case TYP_DOUBLE: @@ -3763,7 +3752,7 @@ ValueNum ValueNumStore::VNForFieldSelector(CORINFO_FIELD_HANDLE fieldHnd, var_ty structSize = m_compiler->info.compCompHnd->getClassSize(structHnd); // We have to normalize here since there is no CorInfoType for vectors... - if (m_compiler->structSizeMightRepresentAcceleratedType(structSize)) + if (m_compiler->structSizeMightRepresentSIMDType(structSize)) { fieldType = m_compiler->impNormStructType(structHnd); } diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index 90804ca11311d9..47c7af240ce290 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -469,7 +469,6 @@ class ValueNumStore ValueNum VNForIntCon(INT32 cnsVal); ValueNum VNForIntPtrCon(ssize_t cnsVal); ValueNum VNForLongCon(INT64 cnsVal); - ValueNum VNForHalfCon(float16_t cnsVal); ValueNum VNForFloatCon(float cnsVal); ValueNum VNForDoubleCon(double cnsVal); ValueNum VNForByrefCon(target_size_t byrefVal); @@ -1211,7 +1210,6 @@ class ValueNumStore case TYP_INT: case TYP_LONG: - case TYP_HALF: case TYP_FLOAT: case TYP_DOUBLE: if (c->m_attribs == CEA_Handle) @@ -1694,17 +1692,6 @@ class ValueNumStore typedef SmallHashTable FieldAddressToFieldSeqMap; FieldAddressToFieldSeqMap m_fieldAddressToFieldSeqMap; - typedef VNMap HalfToValueNumMap; - HalfToValueNumMap* m_halfCnsMap; - HalfToValueNumMap* GetHalfCnsMap() - { - if (m_halfCnsMap == nullptr) - { - m_halfCnsMap = new (m_alloc) HalfToValueNumMap(m_alloc); - } - return m_halfCnsMap; - } - struct LargePrimitiveKeyFuncsFloat : public JitLargePrimitiveKeyFuncs { static bool Equals(float x, float y) @@ -2086,14 +2073,6 @@ struct ValueNumStore::VarTypConv typedef INT32 Type; typedef int Lang; }; - -template <> -struct ValueNumStore::VarTypConv -{ - typedef float16_t Type; - typedef float16_t Lang; -}; - template <> struct ValueNumStore::VarTypConv { @@ -2185,8 +2164,6 @@ FORCEINLINE T ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset) return static_cast(reinterpret_cast::Type*>(c->m_defs)[offset]); case TYP_LONG: return static_cast(reinterpret_cast::Type*>(c->m_defs)[offset]); - case TYP_HALF: - return static_cast(reinterpret_cast::Type*>(c->m_defs)[offset]); case TYP_FLOAT: return static_cast(reinterpret_cast::Lang*>(c->m_defs)[offset]); case TYP_DOUBLE: diff --git a/src/coreclr/jit/valuenumfuncs.h b/src/coreclr/jit/valuenumfuncs.h index b60d43f9025a7b..1b78e12315f572 100644 --- a/src/coreclr/jit/valuenumfuncs.h +++ b/src/coreclr/jit/valuenumfuncs.h @@ -190,11 +190,11 @@ ValueNumFuncDef(SimdType, 2, false, false) // A value number function to compos // In VN all HW intrinsics encode an extra arg for the base type (except when // they are variadic), hence the +1 to the arg count below here. #if defined(TARGET_XARCH) -#define HARDWARE_INTRINSIC(isa, name, size, argCount, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, category, flag) \ +#define HARDWARE_INTRINSIC(isa, name, size, argCount, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ ValueNumFuncDef(HWI_##isa##_##name, ((argCount == -1) ? -1 : (argCount + 1)), ((flag) & HW_Flag_Commutative) >> 0, false) // All of the HARDWARE_INTRINSICS for x86/x64 #include "hwintrinsiclistxarch.h" #define VNF_HWI_FIRST VNF_HWI_Vector128_Abs -#define VNF_HWI_LAST VNF_HWI_AVX10v1_VUCOMISH +#define VNF_HWI_LAST VNF_HWI_AVX512_XnorMask #elif defined(TARGET_ARM64) #define HARDWARE_INTRINSIC(isa, name, size, argCount, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, category, flag) \ diff --git a/src/coreclr/jit/vartype.h b/src/coreclr/jit/vartype.h index 7820d49ce639cc..c501dea656ebda 100644 --- a/src/coreclr/jit/vartype.h +++ b/src/coreclr/jit/vartype.h @@ -77,17 +77,6 @@ inline bool varTypeIsSIMD(T vt) #endif } -template -inline bool varTypeIsStructPrimitive(T vt) -{ -#ifdef FEATURE_SIMD - return (varTypeIsSIMD(vt) || (TypeGet(vt) == TYP_HALF)); -#else - // Always return false if FEATURE_SIMD is not enabled - return false; -#endif -} - template inline bool varTypeIsMask(T vt) { @@ -352,7 +341,7 @@ inline bool varTypeUsesFloatArgReg(T vt) return varTypeUsesFloatReg(vt); #else // Other targets pass them as regular structs - by reference or by value. - return varTypeIsFloating(vt) || TypeGet(vt) == TYP_HALF; + return varTypeIsFloating(vt); #endif } diff --git a/src/coreclr/vm/amd64/profiler.cpp b/src/coreclr/vm/amd64/profiler.cpp index 16bd695c901b02..bf01c87aba6503 100644 --- a/src/coreclr/vm/amd64/profiler.cpp +++ b/src/coreclr/vm/amd64/profiler.cpp @@ -324,19 +324,7 @@ LPVOID ProfileArgIterator::GetNextArgAddr() _ASSERTE(regStructOfs < ARGUMENTREGISTERS_SIZE); _ASSERTE(IS_ALIGNED(regStructOfs, sizeof(SLOT))); - - bool isFloatArg = (argType == ELEMENT_TYPE_R4 || argType == ELEMENT_TYPE_R8); -#ifdef TARGET_XARCH - if (!isFloatArg && argType == ELEMENT_TYPE_VALUETYPE) - { - TypeHandle argTypeHandle; - m_argIterator.GetArgType(&argTypeHandle); - if (argTypeHandle.IsNativeHalfType()) - isFloatArg = true; - } -#endif // TARGET_XARCH - - if (isFloatArg) + if (argType == ELEMENT_TYPE_R4 || argType == ELEMENT_TYPE_R8) { return (LPBYTE)&pData->flt0 + regStructOfs; } @@ -510,13 +498,7 @@ LPVOID ProfileArgIterator::GetReturnBufferAddr(void) } #endif // UNIX_AMD64_ABI - bool isFloatReturn = (ELEMENT_TYPE_R4 == t || ELEMENT_TYPE_R8 == t); -#ifdef TARGET_XARCH - if (!isFloatReturn && ELEMENT_TYPE_VALUETYPE == t && thReturnType.IsNativeHalfType()) - isFloatReturn = true; -#endif // TARGET_XARCH - - if (isFloatReturn) + if (ELEMENT_TYPE_R4 == t || ELEMENT_TYPE_R8 == t) { pData->rax = pData->flt0; } diff --git a/src/coreclr/vm/callhelpers.cpp b/src/coreclr/vm/callhelpers.cpp index ecead367c6b90e..2568a1483d5714 100644 --- a/src/coreclr/vm/callhelpers.cpp +++ b/src/coreclr/vm/callhelpers.cpp @@ -370,20 +370,7 @@ void MethodDescCallSite::CallTargetWorker(const ARG_SLOT *pArguments, ARG_SLOT * for (; TransitionBlock::InvalidOffset != (ofs = m_argIt.GetNextOffset()); arg++) { #ifdef CALLDESCR_REGTYPEMAP - { - CorElementType regMapType = m_argIt.GetArgType(); -#if defined(TARGET_AMD64) || defined(TARGET_X86) - // System.Half is passed in floating point registers like a float - if (regMapType == ELEMENT_TYPE_VALUETYPE) - { - TypeHandle th; - m_argIt.GetArgType(&th); - if (th.IsNativeHalfType()) - regMapType = ELEMENT_TYPE_R4; - } -#endif // TARGET_AMD64 || TARGET_X86 - FillInRegTypeMap(ofs, regMapType, pMap); - } + FillInRegTypeMap(ofs, m_argIt.GetArgType(), pMap); #endif #ifdef CALLDESCR_FPARGREGS diff --git a/src/coreclr/vm/callingconvention.h b/src/coreclr/vm/callingconvention.h index 085e1a58fb7e82..f7a76c516713d5 100644 --- a/src/coreclr/vm/callingconvention.h +++ b/src/coreclr/vm/callingconvention.h @@ -878,12 +878,7 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE { #if !defined(UNIX_AMD64_ABI) // On Windows x64, we re-use the location in the transition block for both the integer and floating point registers - if ((m_argType == ELEMENT_TYPE_R4) || (m_argType == ELEMENT_TYPE_R8) -#if defined(TARGET_AMD64) || defined(TARGET_X86) - // System.Half is passed in floating point registers like a float - || (m_argType == ELEMENT_TYPE_VALUETYPE && m_argTypeHandle.IsNativeHalfType()) -#endif // TARGET_XARCH - ) + if ((m_argType == ELEMENT_TYPE_R4) || (m_argType == ELEMENT_TYPE_R8)) { pLoc->m_idxFloatReg = TransitionBlock::GetArgumentIndexFromOffset(argOffset); pLoc->m_cFloatReg = 1; @@ -2040,15 +2035,6 @@ void ArgIteratorTemplate::ComputeReturnFlags() } #endif -#if defined(TARGET_AMD64) || defined(TARGET_X86) - // System.Half is returned in xmm0 like a float - if (thValueType.IsNativeHalfType()) - { - flags |= sizeof(float) << RETURN_FP_SIZE_SHIFT; - break; - } -#endif // TARGET_XARCH - size_t size = thValueType.GetSize(); #if defined(TARGET_X86) || defined(TARGET_AMD64) diff --git a/src/coreclr/vm/class.cpp b/src/coreclr/vm/class.cpp index b608613e3d22a1..d80b89a354b7e9 100644 --- a/src/coreclr/vm/class.cpp +++ b/src/coreclr/vm/class.cpp @@ -1712,39 +1712,6 @@ int MethodTable::GetVectorSize() return 0; } -#if defined(TARGET_AMD64) || defined(TARGET_X86) -//******************************************************************************* -// Returns true if this is the System.Half type and the CPU supports AVX10v1. -// System.Half is passed and returned in floating point registers on xarch -// when AVX10v1 is available, matching the JIT's behavior in isNativeHalfStructType(). -bool MethodTable::IsNativeHalfType() -{ - CONTRACTL - { - WRAPPER(THROWS); - WRAPPER(GC_TRIGGERS); - } - CONTRACTL_END; - - if (!IsIntrinsicType()) - return false; - - if (GetNumInstanceFieldBytes() != 2) - return false; - - // The JIT only passes Half in FP registers when AVX10v1 is available. - // The VM must match to avoid calling convention mismatches. - CORJIT_FLAGS cpuFlags = ExecutionManager::GetEEJitManager()->GetCPUCompileFlags(); - if (!cpuFlags.IsSet(InstructionSet_AVX10v1)) - return false; - - LPCUTF8 namespaceName; - LPCUTF8 className = GetFullyQualifiedNameInfo(&namespaceName); - - return (strcmp(className, "Half") == 0) && (strcmp(namespaceName, "System") == 0); -} -#endif // TARGET_XARCH - //******************************************************************************* CorInfoHFAElemType MethodTable::GetHFAType() { diff --git a/src/coreclr/vm/invokeutil.cpp b/src/coreclr/vm/invokeutil.cpp index 186d2616591374..ea289619bcf902 100644 --- a/src/coreclr/vm/invokeutil.cpp +++ b/src/coreclr/vm/invokeutil.cpp @@ -220,17 +220,6 @@ void InvokeUtil::CopyArg(TypeHandle th, PVOID argRef, ArgDestination *argDest) { case ELEMENT_TYPE_VALUETYPE: { MethodTable* pMT = th.GetMethodTable(); -#if defined(TARGET_AMD64) || defined(TARGET_X86) - // System.Half is passed in floating point registers. Zero-extend the 2-byte value - // to 4 bytes so that the movss load in CallDescrWorker reads clean data. - if (pMT->IsNativeHalfType()) - { - _ASSERTE(argRef != NULL); - *(UINT32 *)pArgDst = 0; - *(UINT16 *)pArgDst = *(UINT16 *)argRef; - break; - } -#endif // TARGET_XARCH CopyValueClassArg(argDest, argRef, pMT, 0); break; } diff --git a/src/coreclr/vm/methodtable.h b/src/coreclr/vm/methodtable.h index 16bc652bb890a5..88cd6ceacbb1ae 100644 --- a/src/coreclr/vm/methodtable.h +++ b/src/coreclr/vm/methodtable.h @@ -2057,12 +2057,6 @@ class MethodTable bool IsNativeHFA(); CorInfoHFAElemType GetNativeHFAType(); -#if defined(TARGET_AMD64) || defined(TARGET_X86) - // Returns true if this is the System.Half type, which is passed and returned - // in floating point registers on xarch platforms. - bool IsNativeHalfType(); -#endif // TARGET_XARCH - #ifdef UNIX_AMD64_ABI inline bool IsRegPassedStruct() { diff --git a/src/coreclr/vm/reflectioninvocation.cpp b/src/coreclr/vm/reflectioninvocation.cpp index 4720f0a9019737..37343b4fcb9cc2 100644 --- a/src/coreclr/vm/reflectioninvocation.cpp +++ b/src/coreclr/vm/reflectioninvocation.cpp @@ -554,15 +554,7 @@ extern "C" void QCALLTYPE RuntimeMethodHandle_InvokeMethod( _ASSERTE(ofs != TransitionBlock::InvalidOffset); #ifdef CALLDESCR_REGTYPEMAP - { - CorElementType regMapType = argit.GetArgType(); -#if defined(TARGET_AMD64) || defined(TARGET_X86) - // System.Half is passed in floating point registers like a float - if (regMapType == ELEMENT_TYPE_VALUETYPE && th.IsNativeHalfType()) - regMapType = ELEMENT_TYPE_R4; -#endif // TARGET_XARCH - FillInRegTypeMap(ofs, regMapType, (BYTE *)&callDescrData.dwRegTypeMap); - } + FillInRegTypeMap(ofs, argit.GetArgType(), (BYTE *)&callDescrData.dwRegTypeMap); #endif #ifdef CALLDESCR_FPARGREGS diff --git a/src/coreclr/vm/typehandle.cpp b/src/coreclr/vm/typehandle.cpp index c098fe868094df..5dd7d4e85d468a 100644 --- a/src/coreclr/vm/typehandle.cpp +++ b/src/coreclr/vm/typehandle.cpp @@ -456,15 +456,6 @@ bool TypeHandle::IsFloatHfa() const return (GetHFAType() == CORINFO_HFA_ELEM_FLOAT); } -#if defined(TARGET_AMD64) || defined(TARGET_X86) -bool TypeHandle::IsNativeHalfType() const -{ - WRAPPER_NO_CONTRACT; - - return !IsNull() && AsMethodTable()->IsNativeHalfType(); -} -#endif // TARGET_XARCH - #ifdef FEATURE_64BIT_ALIGNMENT bool TypeHandle::RequiresAlign8() const diff --git a/src/coreclr/vm/typehandle.h b/src/coreclr/vm/typehandle.h index c95cded9c94b6f..1fb2f944bb509d 100644 --- a/src/coreclr/vm/typehandle.h +++ b/src/coreclr/vm/typehandle.h @@ -357,10 +357,6 @@ class TypeHandle bool IsFloatHfa() const; -#if defined(TARGET_AMD64) || defined(TARGET_X86) - bool IsNativeHalfType() const; -#endif // TARGET_XARCH - #ifdef FEATURE_64BIT_ALIGNMENT bool RequiresAlign8() const; #endif // FEATURE_64BIT_ALIGNMENT diff --git a/src/libraries/System.Private.CoreLib/src/System/Half.cs b/src/libraries/System.Private.CoreLib/src/System/Half.cs index f2d3d2f8aa975e..de4b6b925a544d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Half.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Half.cs @@ -8,7 +8,6 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; namespace System { @@ -17,7 +16,6 @@ namespace System /// /// Represents a half-precision floating-point number. /// - [Intrinsic] [StructLayout(LayoutKind.Sequential)] public readonly struct Half : IComparable, @@ -88,43 +86,19 @@ public readonly struct Half // Well-defined and commonly used values - public static Half Epsilon - { - [Intrinsic] - get => new Half(EpsilonBits); // 5.9604645E-08 - } + public static Half Epsilon => new Half(EpsilonBits); // 5.9604645E-08 - public static Half PositiveInfinity - { - [Intrinsic] - get => new Half(PositiveInfinityBits); // 1.0 / 0.0; - } + public static Half PositiveInfinity => new Half(PositiveInfinityBits); // 1.0 / 0.0; - public static Half NegativeInfinity - { - [Intrinsic] - get => new Half(NegativeInfinityBits); // -1.0 / 0.0 - } + public static Half NegativeInfinity => new Half(NegativeInfinityBits); // -1.0 / 0.0 - public static Half NaN - { - [Intrinsic] - get => new Half(NegativeQNaNBits); // 0.0 / 0.0 - } + public static Half NaN => new Half(NegativeQNaNBits); // 0.0 / 0.0 /// - public static Half MinValue - { - [Intrinsic] - get => new Half(MinValueBits); // -65504 - } + public static Half MinValue => new Half(MinValueBits); // -65504 /// - public static Half MaxValue - { - [Intrinsic] - get => new Half(MaxValueBits); // 65504 - } + public static Half MaxValue => new Half(MaxValueBits); // 65504 internal readonly ushort _value; @@ -180,7 +154,6 @@ internal static ushort ExtractTrailingSignificandFromBits(ushort bits) } /// - [Intrinsic] public static bool operator <(Half left, Half right) { if (IsNaN(left) || IsNaN(right)) @@ -203,14 +176,12 @@ internal static ushort ExtractTrailingSignificandFromBits(ushort bits) } /// - [Intrinsic] public static bool operator >(Half left, Half right) { return right < left; } /// - [Intrinsic] public static bool operator <=(Half left, Half right) { if (IsNaN(left) || IsNaN(right)) @@ -233,14 +204,12 @@ internal static ushort ExtractTrailingSignificandFromBits(ushort bits) } /// - [Intrinsic] public static bool operator >=(Half left, Half right) { return right <= left; } /// - [Intrinsic] public static bool operator ==(Half left, Half right) { if (IsNaN(left) || IsNaN(right)) @@ -254,7 +223,6 @@ internal static ushort ExtractTrailingSignificandFromBits(ushort bits) } /// - [Intrinsic] public static bool operator !=(Half left, Half right) { return !(left == right); @@ -582,6 +550,7 @@ public bool TryFormat(Span utf8Destination, out int bytesWritten, [StringS { return Number.TryFormatFloat(this, format, NumberFormatInfo.GetInstance(provider), utf8Destination, out bytesWritten); } + // // Explicit Convert To Half // @@ -599,7 +568,6 @@ public bool TryFormat(Span utf8Destination, out int bytesWritten, [StringS /// Explicitly converts a value to its nearest representable half-precision floating-point value. /// The value to convert. /// converted to its nearest representable half-precision floating-point value. - [Intrinsic] public static explicit operator Half(double value) { const int DoubleMaxExponent = 0x7FF; @@ -634,13 +602,11 @@ public static explicit operator Half(double value) /// Explicitly converts a value to its nearest representable half-precision floating-point value. /// The value to convert. /// converted to its nearest representable half-precision floating-point value. - [Intrinsic] public static explicit operator Half(int value) => (Half)(float)value; /// Explicitly converts a value to its nearest representable half-precision floating-point value. /// The value to convert. /// converted to its nearest representable half-precision floating-point value. - [Intrinsic] public static explicit operator Half(long value) => (Half)(float)value; /// Explicitly converts a value to its nearest representable half-precision floating-point value. @@ -651,7 +617,6 @@ public static explicit operator Half(double value) /// Explicitly converts a value to its nearest representable half-precision floating-point value. /// The value to convert. /// converted to its nearest representable half-precision floating-point value. - [Intrinsic] public static explicit operator Half(float value) { #region Explanation of this algorithm @@ -825,14 +790,12 @@ public static explicit operator Half(float value) /// The value to convert. /// converted to its nearest representable half-precision floating-point value. [CLSCompliant(false)] - [Intrinsic] public static explicit operator Half(uint value) => (Half)(float)value; /// Explicitly converts a value to its nearest representable half-precision floating-point value. /// The value to convert. /// converted to its nearest representable half-precision floating-point value. [CLSCompliant(false)] - [Intrinsic] public static explicit operator Half(ulong value) => (Half)(float)value; /// Explicitly converts a value to its nearest representable half-precision floating-point value. @@ -886,7 +849,6 @@ public static explicit operator Half(float value) /// Explicitly converts a half-precision floating-point value to its nearest representable value. /// The value to convert. /// converted to its nearest representable value. - [Intrinsic] public static explicit operator int(Half value) => (int)(float)value; /// Explicitly converts a half-precision floating-point value to its nearest representable value, throwing an overflow exception for any values that fall outside the representable range. @@ -898,7 +860,6 @@ public static explicit operator Half(float value) /// Explicitly converts a half-precision floating-point value to its nearest representable value. /// The value to convert. /// converted to its nearest representable value. - [Intrinsic] public static explicit operator long(Half value) => (long)(float)value; /// Explicitly converts a half-precision floating-point value to its nearest representable value, throwing an overflow exception for any values that fall outside the representable range. @@ -959,7 +920,6 @@ public static explicit operator Half(float value) /// The value to convert. /// converted to its nearest representable value. [CLSCompliant(false)] - [Intrinsic] public static explicit operator uint(Half value) => (uint)(float)value; /// Explicitly converts a half-precision floating-point value to its nearest representable value, throwing an overflow exception for any values that fall outside the representable range. @@ -973,7 +933,6 @@ public static explicit operator Half(float value) /// The value to convert. /// converted to its nearest representable value. [CLSCompliant(false)] - [Intrinsic] public static explicit operator ulong(Half value) => (ulong)(float)value; /// Explicitly converts a half-precision floating-point value to its nearest representable value, throwing an overflow exception for any values that fall outside the representable range. @@ -1031,7 +990,6 @@ public static explicit operator Half(float value) /// Explicitly converts a half-precision floating-point value to its nearest representable value. /// The value to convert. /// converted to its nearest representable value. - [Intrinsic] public static explicit operator double(Half value) { bool sign = IsNegative(value); @@ -1063,7 +1021,6 @@ public static explicit operator double(Half value) /// Explicitly converts a half-precision floating-point value to its nearest representable value. /// The value to convert. /// converted to its nearest representable value. - [Intrinsic] public static explicit operator float(Half value) { #region Explanation of this algorithm @@ -1227,7 +1184,6 @@ private static double CreateDoubleNaN(bool sign, ulong significand) // /// - [Intrinsic] public static Half operator +(Half left, Half right) => (Half)((float)left + (float)right); // @@ -1309,7 +1265,6 @@ public static bool IsPow2(Half value) // /// - [Intrinsic] public static Half operator --(Half value) { var tmp = (float)value; @@ -1322,7 +1277,6 @@ public static bool IsPow2(Half value) // /// - [Intrinsic] public static Half operator /(Half left, Half right) => (Half)((float)left / (float)right); // @@ -1352,7 +1306,6 @@ public static bool IsPow2(Half value) // /// - [Intrinsic] public static Half Ceiling(Half x) => (Half)MathF.Ceiling((float)x); /// @@ -1364,11 +1317,9 @@ public static TInteger ConvertToIntegerNative(Half value) where TInteger : IBinaryInteger => TInteger.CreateSaturating(value); /// - [Intrinsic] public static Half Floor(Half x) => (Half)MathF.Floor((float)x); /// - [Intrinsic] public static Half Round(Half x) => (Half)MathF.Round((float)x); /// @@ -1381,7 +1332,6 @@ public static TInteger ConvertToIntegerNative(Half value) public static Half Round(Half x, int digits, MidpointRounding mode) => (Half)MathF.Round((float)x, digits, mode); /// - [Intrinsic] public static Half Truncate(Half x) => (Half)MathF.Truncate((float)x); /// @@ -1555,7 +1505,6 @@ public static Half BitIncrement(Half x) } /// - [Intrinsic] public static Half FusedMultiplyAdd(Half left, Half right, Half addend) => (Half)MathF.FusedMultiplyAdd((float)left, (float)right, (float)addend); /// @@ -1593,11 +1542,9 @@ public static int ILogB(Half x) public static Half Lerp(Half value1, Half value2, Half amount) => (Half)float.Lerp((float)value1, (float)value2, (float)amount); /// - [Intrinsic] public static Half ReciprocalEstimate(Half x) => (Half)MathF.ReciprocalEstimate((float)x); /// - [Intrinsic] public static Half ReciprocalSqrtEstimate(Half x) => (Half)MathF.ReciprocalSqrtEstimate((float)x); /// @@ -1633,7 +1580,6 @@ public static int ILogB(Half x) // /// - [Intrinsic] public static Half operator ++(Half value) { var tmp = (float)value; @@ -1682,7 +1628,6 @@ public static int ILogB(Half x) // /// - [Intrinsic] public static Half operator *(Half left, Half right) => (Half)((float)left * (float)right); // @@ -1716,7 +1661,6 @@ public static Half CopySign(Half value, Half sign) } /// - [Intrinsic] public static Half Max(Half x, Half y) => (Half)float.Max((float)x, (float)y); /// @@ -1745,7 +1689,6 @@ public static Half MaxNumber(Half x, Half y) } /// - [Intrinsic] public static Half Min(Half x, Half y) => (Half)float.Min((float)x, (float)y); /// @@ -1798,21 +1741,13 @@ public static int Sign(Half value) // /// - public static Half One - { - [Intrinsic] - get => new Half(PositiveOneBits); - } + public static Half One => new Half(PositiveOneBits); /// static int INumberBase.Radix => 2; /// - public static Half Zero - { - [Intrinsic] - get => new Half(PositiveZeroBits); - } + public static Half Zero => new Half(PositiveZeroBits); /// public static Half Abs(Half value) => new Half((ushort)(value._value & ~SignMask)); @@ -2257,7 +2192,6 @@ private static bool TryConvertTo(Half value, [MaybeNullWhen(false)] out public static Half RootN(Half x, int n) => (Half)float.RootN((float)x, n); /// - [Intrinsic] public static Half Sqrt(Half x) => (Half)MathF.Sqrt((float)x); // @@ -2282,7 +2216,6 @@ private static bool TryConvertTo(Half value, [MaybeNullWhen(false)] out // /// - [Intrinsic] public static Half operator -(Half left, Half right) => (Half)((float)left - (float)right); // @@ -2296,7 +2229,6 @@ private static bool TryConvertTo(Half value, [MaybeNullWhen(false)] out public static Half AcosPi(Half x) => (Half)float.AcosPi((float)x); /// - [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Half Asin(Half x) => (Half)MathF.Asin((float)x); ///