From 7c21b16d74fe06e24bd4bbed4e0e5da70888d250 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 25 May 2025 18:20:20 -0700 Subject: [PATCH 1/7] Rework how EVEX+AVX512+AVX10 are supported by the JIT to greatly simplify things --- src/coreclr/inc/clrconfigvalues.h | 28 +- src/coreclr/inc/corinfoinstructionset.h | 446 +++------ src/coreclr/inc/jiteeversionguid.h | 10 +- src/coreclr/inc/readytoruninstructionset.h | 13 +- src/coreclr/jit/codegencommon.cpp | 99 +- src/coreclr/jit/codegenxarch.cpp | 84 +- src/coreclr/jit/compiler.cpp | 90 +- src/coreclr/jit/compiler.h | 149 +-- src/coreclr/jit/decomposelongs.cpp | 27 +- src/coreclr/jit/emit.cpp | 4 +- src/coreclr/jit/gentree.cpp | 896 +++++++----------- src/coreclr/jit/gentree.h | 4 +- src/coreclr/jit/hwintrinsic.cpp | 155 +-- src/coreclr/jit/hwintrinsic.h | 20 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 190 ++-- src/coreclr/jit/hwintrinsiclistxarch.h | 814 +++++----------- src/coreclr/jit/hwintrinsicxarch.cpp | 458 ++++----- src/coreclr/jit/importer.cpp | 2 +- src/coreclr/jit/importercalls.cpp | 69 +- src/coreclr/jit/instr.cpp | 2 +- src/coreclr/jit/jitconfigvalues.h | 28 +- src/coreclr/jit/lclmorph.cpp | 2 +- src/coreclr/jit/lower.cpp | 2 +- src/coreclr/jit/lowerxarch.cpp | 583 ++++-------- src/coreclr/jit/lsraxarch.cpp | 57 +- src/coreclr/jit/morph.cpp | 8 +- src/coreclr/jit/simd.cpp | 4 +- src/coreclr/jit/valuenum.cpp | 5 +- .../Compiler/HardwareIntrinsicHelpers.cs | 49 +- .../Common/Compiler/InstructionSetSupport.cs | 28 +- .../tools/Common/InstructionSetHelpers.cs | 16 +- .../Runtime/ReadyToRunInstructionSet.cs | 13 +- .../Runtime/ReadyToRunInstructionSetHelper.cs | 40 +- .../JitInterface/CorInfoInstructionSet.cs | 727 ++++---------- .../ThunkGenerator/InstructionSetDesc.txt | 149 ++- .../ThunkGenerator/InstructionSetGenerator.cs | 50 +- src/coreclr/vm/codeman.cpp | 35 +- src/native/minipal/cpufeatures.c | 46 +- src/native/minipal/cpufeatures.h | 9 +- .../HardwareIntrinsics/X86/X86Base/CpuId.cs | 37 +- .../HardwareIntrinsics/X64Avx512.csproj | 2 +- .../X64Avx512_VectorT512.csproj | 2 +- .../X86/CpuId_R2R_Avx512.csproj | 2 +- 43 files changed, 1777 insertions(+), 3677 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 8571280acc06c2..dc7570512bd0fc 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -682,16 +682,8 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntri RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAES, W("EnableAES"), 1, "Allows AES+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX, W("EnableAVX"), 1, "Allows AVX+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX2, W("EnableAVX2"), 1, "Allows AVX2+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BW, W("EnableAVX512BW"), 1, "Allows AVX512BW+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BW_VL, W("EnableAVX512BW_VL"), 1, "Allows AVX512BW_VL+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD, W("EnableAVX512CD"), 1, "Allows AVX512CD+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD_VL, W("EnableAVX512CD_VL"), 1, "Allows AVX512CD_VL+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ, W("EnableAVX512DQ"), 1, "Allows AVX512DQ+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ_VL, W("EnableAVX512DQ_VL"), 1, "Allows AVX512DQ_VL+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F, W("EnableAVX512F"), 1, "Allows AVX512F+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F_VL, W("EnableAVX512F_VL"), 1, "Allows AVX512F_VL+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512, W("EnableAVX512"), 1, "Allows AVX512+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VBMI, W("EnableAVX512VBMI"), 1, "Allows AVX512VBMI+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VBMI_VL, W("EnableAVX512VBMI_VL"), 1, "Allows AVX512VBMI_VL+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v1, W("EnableAVX10v1"), 1, "Allows AVX10v1+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX10v2, W("EnableAVX10v2"), 0, "Allows AVX10v2+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI"), 1, "Allows AVXVNNI+ hardware intrinsics to be disabled") @@ -732,6 +724,24 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableRiscV64Zba, W("EnableRiscV64 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableRiscV64Zbb, W("EnableRiscV64Zbb"), 1, "Allows RiscV64 Zbb hardware intrinsics to be disabled") #endif +// +// These are "legacy" ISA enablement knobs that aren't recommended for use anymore +// +#if defined(TARGET_AMD64) || defined(TARGET_X86) +// These have been superceded by EnableAVX512 as you get all of them or none of them +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BW, W("EnableAVX512BW"), 1, "Allows AVX512BW+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BW_VL, W("EnableAVX512BW_VL"), 1, "Allows AVX512BW_VL+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD, W("EnableAVX512CD"), 1, "Allows AVX512CD+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD_VL, W("EnableAVX512CD_VL"), 1, "Allows AVX512CD_VL+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ, W("EnableAVX512DQ"), 1, "Allows AVX512DQ+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ_VL, W("EnableAVX512DQ_VL"), 1, "Allows AVX512DQ_VL+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F, W("EnableAVX512F"), 1, "Allows AVX512F+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F_VL, W("EnableAVX512F_VL"), 1, "Allows AVX512F_VL+ hardware intrinsics to be disabled") + +// These have been superceded by EnableAVX512VBMI as you get all of them or none of them +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VBMI_VL, W("EnableAVX512VBMI_VL"), 1, "Allows AVX512VBMI_VL+ hardware intrinsics to be disabled") +#endif + /// /// Uncategorized /// diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index cc446a6571f586..73676c6e84dfc3 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -74,56 +74,40 @@ enum CORINFO_InstructionSet InstructionSet_AVXVNNI=22, InstructionSet_MOVBE=23, InstructionSet_X86Serialize=24, - InstructionSet_EVEX=25, - InstructionSet_AVX512F=26, - InstructionSet_AVX512F_VL=27, - InstructionSet_AVX512BW=28, - InstructionSet_AVX512BW_VL=29, - InstructionSet_AVX512CD=30, - InstructionSet_AVX512CD_VL=31, - InstructionSet_AVX512DQ=32, - InstructionSet_AVX512DQ_VL=33, - InstructionSet_AVX512VBMI=34, - InstructionSet_AVX512VBMI_VL=35, - InstructionSet_AVX10v1=36, - InstructionSet_AVX10v1_V512=37, - InstructionSet_VectorT128=38, - InstructionSet_VectorT256=39, - InstructionSet_VectorT512=40, - InstructionSet_APX=41, - InstructionSet_AVX10v2=42, - InstructionSet_AVX10v2_V512=43, - InstructionSet_GFNI=44, - InstructionSet_GFNI_V256=45, - InstructionSet_GFNI_V512=46, - InstructionSet_X86Base_X64=47, - InstructionSet_SSE_X64=48, - InstructionSet_SSE2_X64=49, - InstructionSet_SSE3_X64=50, - InstructionSet_SSSE3_X64=51, - InstructionSet_SSE41_X64=52, - InstructionSet_SSE42_X64=53, - InstructionSet_AVX_X64=54, - InstructionSet_AVX2_X64=55, - InstructionSet_AES_X64=56, - InstructionSet_BMI1_X64=57, - InstructionSet_BMI2_X64=58, - InstructionSet_FMA_X64=59, - InstructionSet_LZCNT_X64=60, - InstructionSet_PCLMULQDQ_X64=61, - InstructionSet_POPCNT_X64=62, - InstructionSet_AVXVNNI_X64=63, - InstructionSet_X86Serialize_X64=64, - InstructionSet_AVX512F_X64=65, - InstructionSet_AVX512BW_X64=66, - InstructionSet_AVX512CD_X64=67, - InstructionSet_AVX512DQ_X64=68, - InstructionSet_AVX512VBMI_X64=69, - InstructionSet_AVX10v1_X64=70, - InstructionSet_AVX10v1_V512_X64=71, - InstructionSet_AVX10v2_X64=72, - InstructionSet_AVX10v2_V512_X64=73, - InstructionSet_GFNI_X64=74, + InstructionSet_AVX512=25, + InstructionSet_AVX512VBMI=26, + InstructionSet_AVX10v1=27, + InstructionSet_VectorT128=28, + InstructionSet_VectorT256=29, + InstructionSet_VectorT512=30, + InstructionSet_APX=31, + InstructionSet_AVX10v2=32, + InstructionSet_GFNI=33, + InstructionSet_GFNI_V256=34, + InstructionSet_GFNI_V512=35, + InstructionSet_X86Base_X64=36, + InstructionSet_SSE_X64=37, + InstructionSet_SSE2_X64=38, + InstructionSet_SSE3_X64=39, + InstructionSet_SSSE3_X64=40, + InstructionSet_SSE41_X64=41, + InstructionSet_SSE42_X64=42, + InstructionSet_AVX_X64=43, + InstructionSet_AVX2_X64=44, + InstructionSet_AES_X64=45, + InstructionSet_BMI1_X64=46, + InstructionSet_BMI2_X64=47, + InstructionSet_FMA_X64=48, + InstructionSet_LZCNT_X64=49, + InstructionSet_PCLMULQDQ_X64=50, + InstructionSet_POPCNT_X64=51, + InstructionSet_AVXVNNI_X64=52, + InstructionSet_X86Serialize_X64=53, + InstructionSet_AVX512_X64=54, + InstructionSet_AVX512VBMI_X64=55, + InstructionSet_AVX10v1_X64=56, + InstructionSet_AVX10v2_X64=57, + InstructionSet_GFNI_X64=58, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -150,56 +134,40 @@ enum CORINFO_InstructionSet InstructionSet_AVXVNNI=22, InstructionSet_MOVBE=23, InstructionSet_X86Serialize=24, - InstructionSet_EVEX=25, - InstructionSet_AVX512F=26, - InstructionSet_AVX512F_VL=27, - InstructionSet_AVX512BW=28, - InstructionSet_AVX512BW_VL=29, - InstructionSet_AVX512CD=30, - InstructionSet_AVX512CD_VL=31, - InstructionSet_AVX512DQ=32, - InstructionSet_AVX512DQ_VL=33, - InstructionSet_AVX512VBMI=34, - InstructionSet_AVX512VBMI_VL=35, - InstructionSet_AVX10v1=36, - InstructionSet_AVX10v1_V512=37, - InstructionSet_VectorT128=38, - InstructionSet_VectorT256=39, - InstructionSet_VectorT512=40, - InstructionSet_APX=41, - InstructionSet_AVX10v2=42, - InstructionSet_AVX10v2_V512=43, - InstructionSet_GFNI=44, - InstructionSet_GFNI_V256=45, - InstructionSet_GFNI_V512=46, - InstructionSet_X86Base_X64=47, - InstructionSet_SSE_X64=48, - InstructionSet_SSE2_X64=49, - InstructionSet_SSE3_X64=50, - InstructionSet_SSSE3_X64=51, - InstructionSet_SSE41_X64=52, - InstructionSet_SSE42_X64=53, - InstructionSet_AVX_X64=54, - InstructionSet_AVX2_X64=55, - InstructionSet_AES_X64=56, - InstructionSet_BMI1_X64=57, - InstructionSet_BMI2_X64=58, - InstructionSet_FMA_X64=59, - InstructionSet_LZCNT_X64=60, - InstructionSet_PCLMULQDQ_X64=61, - InstructionSet_POPCNT_X64=62, - InstructionSet_AVXVNNI_X64=63, - InstructionSet_X86Serialize_X64=64, - InstructionSet_AVX512F_X64=65, - InstructionSet_AVX512BW_X64=66, - InstructionSet_AVX512CD_X64=67, - InstructionSet_AVX512DQ_X64=68, - InstructionSet_AVX512VBMI_X64=69, - InstructionSet_AVX10v1_X64=70, - InstructionSet_AVX10v1_V512_X64=71, - InstructionSet_AVX10v2_X64=72, - InstructionSet_AVX10v2_V512_X64=73, - InstructionSet_GFNI_X64=74, + InstructionSet_AVX512=25, + InstructionSet_AVX512VBMI=26, + InstructionSet_AVX10v1=27, + InstructionSet_VectorT128=28, + InstructionSet_VectorT256=29, + InstructionSet_VectorT512=30, + InstructionSet_APX=31, + InstructionSet_AVX10v2=32, + InstructionSet_GFNI=33, + InstructionSet_GFNI_V256=34, + InstructionSet_GFNI_V512=35, + InstructionSet_X86Base_X64=36, + InstructionSet_SSE_X64=37, + InstructionSet_SSE2_X64=38, + InstructionSet_SSE3_X64=39, + InstructionSet_SSSE3_X64=40, + InstructionSet_SSE41_X64=41, + InstructionSet_SSE42_X64=42, + InstructionSet_AVX_X64=43, + InstructionSet_AVX2_X64=44, + InstructionSet_AES_X64=45, + InstructionSet_BMI1_X64=46, + InstructionSet_BMI2_X64=47, + InstructionSet_FMA_X64=48, + InstructionSet_LZCNT_X64=49, + InstructionSet_PCLMULQDQ_X64=50, + InstructionSet_POPCNT_X64=51, + InstructionSet_AVXVNNI_X64=52, + InstructionSet_X86Serialize_X64=53, + InstructionSet_AVX512_X64=54, + InstructionSet_AVX512VBMI_X64=55, + InstructionSet_AVX10v1_X64=56, + InstructionSet_AVX10v2_X64=57, + InstructionSet_GFNI_X64=58, #endif // TARGET_X86 }; @@ -355,24 +323,14 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_AVXVNNI_X64); if (HasInstructionSet(InstructionSet_X86Serialize)) AddInstructionSet(InstructionSet_X86Serialize_X64); - if (HasInstructionSet(InstructionSet_AVX512F)) - AddInstructionSet(InstructionSet_AVX512F_X64); - if (HasInstructionSet(InstructionSet_AVX512BW)) - AddInstructionSet(InstructionSet_AVX512BW_X64); - if (HasInstructionSet(InstructionSet_AVX512CD)) - AddInstructionSet(InstructionSet_AVX512CD_X64); - if (HasInstructionSet(InstructionSet_AVX512DQ)) - AddInstructionSet(InstructionSet_AVX512DQ_X64); + if (HasInstructionSet(InstructionSet_AVX512)) + AddInstructionSet(InstructionSet_AVX512_X64); if (HasInstructionSet(InstructionSet_AVX512VBMI)) AddInstructionSet(InstructionSet_AVX512VBMI_X64); if (HasInstructionSet(InstructionSet_AVX10v1)) AddInstructionSet(InstructionSet_AVX10v1_X64); - if (HasInstructionSet(InstructionSet_AVX10v1_V512)) - AddInstructionSet(InstructionSet_AVX10v1_V512_X64); if (HasInstructionSet(InstructionSet_AVX10v2)) AddInstructionSet(InstructionSet_AVX10v2_X64); - if (HasInstructionSet(InstructionSet_AVX10v2_V512)) - AddInstructionSet(InstructionSet_AVX10v2_V512_X64); if (HasInstructionSet(InstructionSet_GFNI)) AddInstructionSet(InstructionSet_GFNI_X64); #endif // TARGET_AMD64 @@ -539,22 +497,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_X86Serialize_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Serialize)) resultflags.RemoveInstructionSet(InstructionSet_X86Serialize_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_X64); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_AVX512_X64)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512_X64); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_X64)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) @@ -563,18 +509,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512_X64); if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2_X64)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_X64); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512_X64)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512_X64) && !resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512_X64); if (resultflags.HasInstructionSet(InstructionSet_GFNI) && !resultflags.HasInstructionSet(InstructionSet_GFNI_X64)) resultflags.RemoveInstructionSet(InstructionSet_GFNI); if (resultflags.HasInstructionSet(InstructionSet_GFNI_X64) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) @@ -607,38 +545,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_LZCNT); if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_MOVBE); - if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) - resultflags.RemoveInstructionSet(InstructionSet_EVEX); - if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_FMA)) - resultflags.RemoveInstructionSet(InstructionSet_EVEX); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_FMA)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) resultflags.RemoveInstructionSet(InstructionSet_AES); if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) @@ -649,7 +561,7 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256)) resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); @@ -663,47 +575,25 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256); if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) + if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) + resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); + if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_SSE)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_Vector256); - if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_Vector512); if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) resultflags.RemoveInstructionSet(InstructionSet_VectorT128); if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_VectorT256); - if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_VectorT512); #endif // TARGET_AMD64 #ifdef TARGET_X86 @@ -735,38 +625,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_LZCNT); if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_MOVBE); - if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) - resultflags.RemoveInstructionSet(InstructionSet_EVEX); - if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_FMA)) - resultflags.RemoveInstructionSet(InstructionSet_EVEX); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512) && !resultflags.HasInstructionSet(InstructionSet_FMA)) + resultflags.RemoveInstructionSet(InstructionSet_AVX512); + if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); - if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) resultflags.RemoveInstructionSet(InstructionSet_AES); if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) @@ -777,7 +641,7 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256)) resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); @@ -791,47 +655,25 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_GFNI_V256); if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet_GFNI_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) + if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) + resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); + if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet_AVX10v2_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512)) - resultflags.RemoveInstructionSet(InstructionSet_AVX10v2_V512); if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_SSE)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_Vector256); - if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_Vector512); if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) resultflags.RemoveInstructionSet(InstructionSet_VectorT128); if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_VectorT256); - if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_VectorT512); #endif // TARGET_X86 @@ -997,46 +839,18 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "X86Serialize"; case InstructionSet_X86Serialize_X64 : return "X86Serialize_X64"; - case InstructionSet_EVEX : - return "EVEX"; - case InstructionSet_AVX512F : - return "AVX512F"; - case InstructionSet_AVX512F_X64 : - return "AVX512F_X64"; - case InstructionSet_AVX512F_VL : - return "AVX512F_VL"; - case InstructionSet_AVX512BW : - return "AVX512BW"; - case InstructionSet_AVX512BW_X64 : - return "AVX512BW_X64"; - case InstructionSet_AVX512BW_VL : - return "AVX512BW_VL"; - case InstructionSet_AVX512CD : - return "AVX512CD"; - case InstructionSet_AVX512CD_X64 : - return "AVX512CD_X64"; - case InstructionSet_AVX512CD_VL : - return "AVX512CD_VL"; - case InstructionSet_AVX512DQ : - return "AVX512DQ"; - case InstructionSet_AVX512DQ_X64 : - return "AVX512DQ_X64"; - case InstructionSet_AVX512DQ_VL : - return "AVX512DQ_VL"; + case InstructionSet_AVX512 : + return "AVX512"; + case InstructionSet_AVX512_X64 : + return "AVX512_X64"; case InstructionSet_AVX512VBMI : return "AVX512VBMI"; case InstructionSet_AVX512VBMI_X64 : return "AVX512VBMI_X64"; - case InstructionSet_AVX512VBMI_VL : - return "AVX512VBMI_VL"; case InstructionSet_AVX10v1 : return "AVX10v1"; case InstructionSet_AVX10v1_X64 : return "AVX10v1_X64"; - case InstructionSet_AVX10v1_V512 : - return "AVX10v1_V512"; - case InstructionSet_AVX10v1_V512_X64 : - return "AVX10v1_V512_X64"; case InstructionSet_VectorT128 : return "VectorT128"; case InstructionSet_VectorT256 : @@ -1049,10 +863,6 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "AVX10v2"; case InstructionSet_AVX10v2_X64 : return "AVX10v2_X64"; - case InstructionSet_AVX10v2_V512 : - return "AVX10v2_V512"; - case InstructionSet_AVX10v2_V512_X64 : - return "AVX10v2_V512_X64"; case InstructionSet_GFNI : return "GFNI"; case InstructionSet_GFNI_X64 : @@ -1111,32 +921,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "MOVBE"; case InstructionSet_X86Serialize : return "X86Serialize"; - case InstructionSet_EVEX : - return "EVEX"; - case InstructionSet_AVX512F : - return "AVX512F"; - case InstructionSet_AVX512F_VL : - return "AVX512F_VL"; - case InstructionSet_AVX512BW : - return "AVX512BW"; - case InstructionSet_AVX512BW_VL : - return "AVX512BW_VL"; - case InstructionSet_AVX512CD : - return "AVX512CD"; - case InstructionSet_AVX512CD_VL : - return "AVX512CD_VL"; - case InstructionSet_AVX512DQ : - return "AVX512DQ"; - case InstructionSet_AVX512DQ_VL : - return "AVX512DQ_VL"; + case InstructionSet_AVX512 : + return "AVX512"; case InstructionSet_AVX512VBMI : return "AVX512VBMI"; - case InstructionSet_AVX512VBMI_VL : - return "AVX512VBMI_VL"; case InstructionSet_AVX10v1 : return "AVX10v1"; - case InstructionSet_AVX10v1_V512 : - return "AVX10v1_V512"; case InstructionSet_VectorT128 : return "VectorT128"; case InstructionSet_VectorT256 : @@ -1147,8 +937,6 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "APX"; case InstructionSet_AVX10v2 : return "AVX10v2"; - case InstructionSet_AVX10v2_V512 : - return "AVX10v2_V512"; case InstructionSet_GFNI : return "GFNI"; case InstructionSet_GFNI_V256 : @@ -1217,25 +1005,14 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; case READYTORUN_INSTRUCTION_X86Serialize: return InstructionSet_X86Serialize; - case READYTORUN_INSTRUCTION_EVEX: return InstructionSet_EVEX; - case READYTORUN_INSTRUCTION_Avx512F: return InstructionSet_AVX512F; - case READYTORUN_INSTRUCTION_Avx512F_VL: return InstructionSet_AVX512F_VL; - case READYTORUN_INSTRUCTION_Avx512BW: return InstructionSet_AVX512BW; - case READYTORUN_INSTRUCTION_Avx512BW_VL: return InstructionSet_AVX512BW_VL; - case READYTORUN_INSTRUCTION_Avx512CD: return InstructionSet_AVX512CD; - case READYTORUN_INSTRUCTION_Avx512CD_VL: return InstructionSet_AVX512CD_VL; - case READYTORUN_INSTRUCTION_Avx512DQ: return InstructionSet_AVX512DQ; - case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL; + case READYTORUN_INSTRUCTION_Avx512: return InstructionSet_AVX512; case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI; - case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL; case READYTORUN_INSTRUCTION_Avx10v1: return InstructionSet_AVX10v1; - case READYTORUN_INSTRUCTION_Avx10v1_V512: return InstructionSet_AVX10v1_V512; case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; case READYTORUN_INSTRUCTION_Avx10v2: return InstructionSet_AVX10v2; - case READYTORUN_INSTRUCTION_Avx10v2_V512: return InstructionSet_AVX10v2_V512; case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI; case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256; case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512; @@ -1262,25 +1039,14 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; case READYTORUN_INSTRUCTION_X86Serialize: return InstructionSet_X86Serialize; - case READYTORUN_INSTRUCTION_EVEX: return InstructionSet_EVEX; - case READYTORUN_INSTRUCTION_Avx512F: return InstructionSet_AVX512F; - case READYTORUN_INSTRUCTION_Avx512F_VL: return InstructionSet_AVX512F_VL; - case READYTORUN_INSTRUCTION_Avx512BW: return InstructionSet_AVX512BW; - case READYTORUN_INSTRUCTION_Avx512BW_VL: return InstructionSet_AVX512BW_VL; - case READYTORUN_INSTRUCTION_Avx512CD: return InstructionSet_AVX512CD; - case READYTORUN_INSTRUCTION_Avx512CD_VL: return InstructionSet_AVX512CD_VL; - case READYTORUN_INSTRUCTION_Avx512DQ: return InstructionSet_AVX512DQ; - case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL; + case READYTORUN_INSTRUCTION_Avx512: return InstructionSet_AVX512; case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI; - case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL; case READYTORUN_INSTRUCTION_Avx10v1: return InstructionSet_AVX10v1; - case READYTORUN_INSTRUCTION_Avx10v1_V512: return InstructionSet_AVX10v1_V512; case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; case READYTORUN_INSTRUCTION_Avx10v2: return InstructionSet_AVX10v2; - case READYTORUN_INSTRUCTION_Avx10v2_V512: return InstructionSet_AVX10v2_V512; case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI; case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256; case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512; diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index cec97de5e8d212..b887aebc6d5ec5 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -37,11 +37,11 @@ #include -constexpr GUID JITEEVersionIdentifier = { /* bffedb4e-ed47-4df3-8156-7ad8fc8521f1 */ - 0xbffedb4e, - 0xed47, - 0x4df3, - {0x81, 0x56, 0x7a, 0xd8, 0xfc, 0x85, 0x21, 0xf1} +constexpr GUID JITEEVersionIdentifier = { /* 124f7514-194f-4924-9d70-25d41ca17947 */ + 0x124f7514, + 0x194f, + 0x4924, + {0x9d, 0x70, 0x25, 0xd4, 0x1c, 0xa1, 0x79, 0x47} }; #endif // JIT_EE_VERSIONING_GUID_H diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index ea250df0125e47..592b602d1f762a 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -37,29 +37,18 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Rcpc=26, READYTORUN_INSTRUCTION_Movbe=27, READYTORUN_INSTRUCTION_X86Serialize=28, - READYTORUN_INSTRUCTION_Avx512F=29, - READYTORUN_INSTRUCTION_Avx512F_VL=30, - READYTORUN_INSTRUCTION_Avx512BW=31, - READYTORUN_INSTRUCTION_Avx512BW_VL=32, - READYTORUN_INSTRUCTION_Avx512CD=33, - READYTORUN_INSTRUCTION_Avx512CD_VL=34, - READYTORUN_INSTRUCTION_Avx512DQ=35, - READYTORUN_INSTRUCTION_Avx512DQ_VL=36, + READYTORUN_INSTRUCTION_Avx512=29, READYTORUN_INSTRUCTION_Avx512Vbmi=37, - READYTORUN_INSTRUCTION_Avx512Vbmi_VL=38, READYTORUN_INSTRUCTION_VectorT128=39, READYTORUN_INSTRUCTION_VectorT256=40, READYTORUN_INSTRUCTION_VectorT512=41, READYTORUN_INSTRUCTION_Rcpc2=42, READYTORUN_INSTRUCTION_Sve=43, READYTORUN_INSTRUCTION_Avx10v1=44, - READYTORUN_INSTRUCTION_Avx10v1_V512=46, - READYTORUN_INSTRUCTION_EVEX=47, READYTORUN_INSTRUCTION_Apx=48, READYTORUN_INSTRUCTION_Pclmulqdq_V256=49, READYTORUN_INSTRUCTION_Pclmulqdq_V512=50, READYTORUN_INSTRUCTION_Avx10v2=51, - READYTORUN_INSTRUCTION_Avx10v2_V512=52, READYTORUN_INSTRUCTION_Gfni=53, READYTORUN_INSTRUCTION_Gfni_V256=54, READYTORUN_INSTRUCTION_Gfni_V512=55, diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index a56f8edcc9e391..13a642edba04b2 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -1826,91 +1826,40 @@ void CodeGen::genGenerateMachineCode() printf(" for "); -#if defined(TARGET_X86) +#if defined(TARGET_XARCH) +#if defined(TARGET_64BIT) + printf("generic X64"); +#else + printf("generic X86"); +#endif + // Check ISA directly here instead of using // compOpportunisticallyDependsOn to avoid JIT-EE calls that could make // us miss in SPMI - if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_EVEX)) - { - if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_AVX10v2)) - { - if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_AVX10v2_V512)) - { - printf("X86 with AVX10.2/512"); - } - else - { - printf("X86 with AVX10.2/256"); - } - } - else if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_AVX10v1)) - { - if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_AVX10v1_V512)) - { - printf("X86 with AVX10.1/512"); - } - else - { - printf("X86 with AVX10.1/256"); - } - } - else - { - assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - printf("X86 with AVX512"); - } - } - else if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_AVX)) - { - printf("X86 with AVX"); - } - else + + if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_AVX)) { - printf("generic X86"); + printf(" + VEX"); } -#elif defined(TARGET_AMD64) - if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_EVEX)) - { - if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_AVX10v2)) - { - if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_AVX10v2_V512)) - { - printf("X64 with AVX10.2/512"); - } - else - { - printf("X64 with AVX10.2/256"); - } - } - else if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_AVX10v1)) - { - if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_AVX10v1_V512)) - { - printf("X64 with AVX10.1/512"); - } - else - { - printf("X64 with AVX10.1/256"); - } - } - else - { - assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - printf("X64 with AVX512"); - } - } - else if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_AVX)) + + if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_AVX512)) { - printf("X64 with AVX"); + printf(" + EVEX"); } - else + + if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_APX)) { - printf("generic X64"); + printf(" + APX"); } #elif defined(TARGET_ARM) printf("generic ARM"); #elif defined(TARGET_ARM64) printf("generic ARM64"); + + if (compiler->opts.compSupportsISA.HasInstructionSet(InstructionSet_Sve)) + { + printf(" + SVE"); + } #elif defined(TARGET_LOONGARCH64) printf("generic LOONGARCH64"); #elif defined(TARGET_RISCV64) @@ -1921,15 +1870,15 @@ void CodeGen::genGenerateMachineCode() if (TargetOS::IsWindows) { - printf(" - Windows"); + printf(" on Windows"); } else if (TargetOS::IsApplePlatform) { - printf(" - Apple"); + printf(" on Apple"); } else if (TargetOS::IsUnix) { - printf(" - Unix"); + printf(" on Unix"); } printf("\n"); diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 0fb9e384053533..3b15589856c0ac 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -522,7 +522,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t case TYP_SIMD64: { simd64_t val64 = *(simd64_t*)val; - if (val64.IsAllBitsSet() && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + if (val64.IsAllBitsSet() && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg, static_cast(0xFF), INS_OPTS_NONE); @@ -5723,12 +5723,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) case NI_SSE41_X64_Extract: case NI_AVX_ExtractVector128: case NI_AVX2_ExtractVector128: - case NI_AVX512F_ExtractVector128: - case NI_AVX512F_ExtractVector256: - case NI_AVX512DQ_ExtractVector128: - case NI_AVX512DQ_ExtractVector256: - case NI_AVX10v1_V512_ExtractVector128: - case NI_AVX10v1_V512_ExtractVector256: + case NI_AVX512_ExtractVector128: + case NI_AVX512_ExtractVector256: { // These intrinsics are "ins reg/mem, xmm, imm8" ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); @@ -5753,59 +5749,35 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) break; } - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation: - case NI_AVX10v1_ConvertToVector128UInt32: - case NI_AVX10v1_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector128UInt32: + case NI_AVX512_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector256Int32: + case NI_AVX512_ConvertToVector256UInt32: { assert(!varTypeIsFloating(baseType)); FALLTHROUGH; } - case NI_AVX512F_ConvertToVector128Byte: - case NI_AVX512F_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_ConvertToVector128Int16: - case NI_AVX512F_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_ConvertToVector128SByte: - case NI_AVX512F_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_ConvertToVector128UInt16: - case NI_AVX512F_ConvertToVector128UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256Int16: - case NI_AVX512F_ConvertToVector256Int16WithSaturation: - case NI_AVX512F_ConvertToVector256Int32WithSaturation: - case NI_AVX512F_ConvertToVector256UInt16: - case NI_AVX512F_ConvertToVector256UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256UInt32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Byte: - case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int16: - case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int32: - case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128SByte: - case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128UInt16: - case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation: - case NI_AVX512BW_ConvertToVector256Byte: - case NI_AVX512BW_ConvertToVector256ByteWithSaturation: - case NI_AVX512BW_ConvertToVector256SByte: - case NI_AVX512BW_ConvertToVector256SByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128Byte: - case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128SByte: - case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Byte: - case NI_AVX10v1_ConvertToVector128ByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Int16: - case NI_AVX10v1_ConvertToVector128Int16WithSaturation: - case NI_AVX10v1_ConvertToVector128Int32: - case NI_AVX10v1_ConvertToVector128Int32WithSaturation: - case NI_AVX10v1_ConvertToVector128SByte: - case NI_AVX10v1_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128UInt16: - case NI_AVX10v1_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector128Byte: + case NI_AVX512_ConvertToVector128ByteWithSaturation: + case NI_AVX512_ConvertToVector128Int16: + case NI_AVX512_ConvertToVector128Int16WithSaturation: + case NI_AVX512_ConvertToVector128Int32: + case NI_AVX512_ConvertToVector128Int32WithSaturation: + case NI_AVX512_ConvertToVector128SByte: + case NI_AVX512_ConvertToVector128SByteWithSaturation: + case NI_AVX512_ConvertToVector128UInt16: + case NI_AVX512_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector256Byte: + case NI_AVX512_ConvertToVector256ByteWithSaturation: + case NI_AVX512_ConvertToVector256Int16: + case NI_AVX512_ConvertToVector256Int16WithSaturation: + case NI_AVX512_ConvertToVector256Int32WithSaturation: + case NI_AVX512_ConvertToVector256SByte: + case NI_AVX512_ConvertToVector256SByteWithSaturation: + case NI_AVX512_ConvertToVector256UInt16: + case NI_AVX512_ConvertToVector256UInt16WithSaturation: + case NI_AVX512_ConvertToVector256UInt32WithSaturation: { // These intrinsics are "ins reg/mem, xmm" ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); @@ -7411,7 +7383,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG)))); // We shouldn't be seeing uint64 here as it should have been converted - // into a helper call by either front-end or lowering phase, unless we have AVX512F/AVX10.x + // into a helper call by either front-end or lowering phase, unless we have AVX512 // accelerated conversions. assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) || compiler->canUseEvexEncodingDebugOnly()); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index a4b3ae17b3d9f4..43c2fea00cd1f7 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2025,32 +2025,9 @@ void Compiler::compSetProcessor() instructionSetFlags.AddInstructionSet(InstructionSet_Vector256); } - if (instructionSetFlags.HasInstructionSet(InstructionSet_EVEX)) + if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512)) { - if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)) - { - // x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL - // These have been shipped together historically and at the time of this writing - // there exists no hardware which doesn't support the entire feature set. To simplify - // the overall JIT implementation, we currently require the entire set of ISAs to be - // supported and disable AVX512 support otherwise. - - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL)); - - instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); - } - else - { - // We shouldn't have EVEX enabled if neither AVX512 nor AVX10v1 are supported - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX10v1)); - } + instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); } #elif defined(TARGET_ARM64) if (instructionSetFlags.HasInstructionSet(InstructionSet_AdvSimd)) @@ -6209,62 +6186,33 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, instructionSetFlags.AddInstructionSet(InstructionSet_AVXVNNI); } - if (JitConfig.EnableAVX512F() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512F); - instructionSetFlags.AddInstructionSet(InstructionSet_EVEX); - } - - if (JitConfig.EnableAVX512F_VL() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512F_VL); - } - - if (JitConfig.EnableAVX512BW() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512BW); - } - - if (JitConfig.EnableAVX512BW_VL() != 0) + if ((JitConfig.EnableAVX512() != 0) && + (JitConfig.EnableAVX512F() != 0) && + (JitConfig.EnableAVX512F_VL() != 0) && + (JitConfig.EnableAVX512BW() != 0) && + (JitConfig.EnableAVX512BW_VL() != 0) && + (JitConfig.EnableAVX512CD() != 0) && + (JitConfig.EnableAVX512CD_VL() != 0) && + (JitConfig.EnableAVX512DQ() != 0) && + (JitConfig.EnableAVX512DQ_VL() != 0)) { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512BW_VL); + // These ISAs are grouped together and if any are disabled then + // you lose access to all of them. We recommend modern code just + // use EnableAVX512, but we continue checking the older knobs for + // back-compat + instructionSetFlags.AddInstructionSet(InstructionSet_AVX512); } - if (JitConfig.EnableAVX512CD() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512CD); - } - - if (JitConfig.EnableAVX512CD_VL() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512CD_VL); - } - - if (JitConfig.EnableAVX512DQ() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512DQ); - } - - if (JitConfig.EnableAVX512DQ_VL() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512DQ_VL); - } - - if (JitConfig.EnableAVX512VBMI() != 0) + if ((JitConfig.EnableAVX512VBMI() != 0) && + (JitConfig.EnableAVX512VBMI_VL() != 0)) { + // These ISAs are likewise grouped together instructionSetFlags.AddInstructionSet(InstructionSet_AVX512VBMI); } - if (JitConfig.EnableAVX512VBMI_VL() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_AVX512VBMI_VL); - } - if (JitConfig.EnableAVX10v1() != 0) { instructionSetFlags.AddInstructionSet(InstructionSet_AVX10v1); - instructionSetFlags.AddInstructionSet(InstructionSet_AVX10v1_V512); - instructionSetFlags.AddInstructionSet(InstructionSet_EVEX); } if (JitConfig.EnableAPX() != 0) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 70edd4d4ae2e57..8b3f435562b415 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9222,11 +9222,11 @@ class Compiler // X86.SSE: 16-byte Vector and Vector128 // X86.AVX: 16-byte Vector and Vector256 // X86.AVX2: 32-byte Vector and Vector256 - // X86.AVX512F: 32-byte Vector and Vector512 + // X86.AVX512: 32-byte Vector and Vector512 uint32_t getMaxVectorByteLength() const { #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { return ZMM_REGSIZE_BYTES; } @@ -9716,45 +9716,7 @@ class Compiler return opts.compSupportsISA.HasInstructionSet(isa); } - // Following cases should be taken into consideration when using the below APIs: - // InstructionSet_EVEX implies Avx10v1 -or- Avx512F+CD+DQ+BW+VL and can be used for 128-bit or 256-bit EVEX encoding - // instructions in these instruction sets InstructionSet_Avx10v1_V512 should never be queried directly, it is - // covered by querying Avx512* InstructionSet_Avx512F (and same for BW, CD, DQ) is only queried for 512-bit EVEX - // encoded instructions - // InstructionSet_Avx10v1 is only queried for cases like 128-bit/256-bit instructions that wouldn't be in - // F+CD+DQ+BW+VL (such as VBMI) and should appear with a corresponding query around AVX512*_VL (i.e. AVX512_VBMI_VL) - #ifdef DEBUG - //------------------------------------------------------------------------ - // IsBaselineVector256IsaSupportedDebugOnly - Does isa support exist for Vector256. - // - // Returns: - // `true` if AVX. - // - bool IsBaselineVector256IsaSupportedDebugOnly() const - { -#ifdef TARGET_XARCH - return compIsaSupportedDebugOnly(InstructionSet_AVX); -#else - return false; -#endif - } - - //------------------------------------------------------------------------ - // IsBaselineVector512IsaSupportedDebugOnly - Does isa support exist for Vector512. - // - // Returns: - // `true` if AVX512F, AVX512BW, AVX512CD, AVX512DQ, and AVX512VL are supported. - // - bool IsBaselineVector512IsaSupportedDebugOnly() const - { -#ifdef TARGET_XARCH - return compIsaSupportedDebugOnly(InstructionSet_AVX512F); -#else - return false; -#endif - } - //------------------------------------------------------------------------ // canUseEvexEncodingDebugOnly - Answer the question: Is Evex encoding supported on this target. // @@ -9764,64 +9726,13 @@ class Compiler bool canUseEvexEncodingDebugOnly() const { #ifdef TARGET_XARCH - return (compIsaSupportedDebugOnly(InstructionSet_EVEX)); -#else - return false; -#endif - } - - //------------------------------------------------------------------------ - // IsAvx10OrIsaSupportedDebugOnly - Answer the question: Is AVX10v1 or the given ISA supported. - // - // Returns: - // `true` if AVX10v1 or the given ISA is supported, `false` if not. - // - bool IsAvx10OrIsaSupportedDebugOnly(CORINFO_InstructionSet isa) const - { -#ifdef TARGET_XARCH - // For the below cases, check for evex encoding should be used. - assert(isa != InstructionSet_AVX512F || isa != InstructionSet_AVX512F_VL || isa != InstructionSet_AVX512BW || - isa != InstructionSet_AVX512BW_VL || isa != InstructionSet_AVX512CD || - isa != InstructionSet_AVX512CD_VL || isa != InstructionSet_AVX512DQ || - isa != InstructionSet_AVX512DQ_VL); - - return (compIsaSupportedDebugOnly(InstructionSet_AVX10v1) || compIsaSupportedDebugOnly(isa)); + return compIsaSupportedDebugOnly(InstructionSet_AVX512); #else return false; #endif } #endif // DEBUG - //------------------------------------------------------------------------ - // IsBaselineVector512IsaSupportedOpportunistically - Does opportunistic isa support exist for Vector512. - // - // Returns: - // `true` if AVX512F, AVX512BW, AVX512CD, AVX512DQ, and AVX512VL are supported. - // - bool IsBaselineVector512IsaSupportedOpportunistically() const - { -#ifdef TARGET_XARCH - return compOpportunisticallyDependsOn(InstructionSet_AVX512F); -#else - return false; -#endif - } - - //------------------------------------------------------------------------ - // IsAvx10OrIsaSupportedOpportunistically - Does opportunistic isa support exist for AVX10v1 or the given ISA. - // - // Returns: - // `true` if AVX10v1 or the given ISA is supported, `false` if not. - // - bool IsAvx10OrIsaSupportedOpportunistically(CORINFO_InstructionSet isa) const - { -#ifdef TARGET_XARCH - return (compOpportunisticallyDependsOn(InstructionSet_AVX10v1) || compOpportunisticallyDependsOn(isa)); -#else - return false; -#endif - } - bool canUseEmbeddedBroadcast() const { return JitConfig.EnableEmbeddedBroadcast(); @@ -9835,34 +9746,6 @@ class Compiler #ifdef TARGET_XARCH public: - //------------------------------------------------------------------------ - // compIsEvexOpportunisticallySupported - Checks for whether AVX10v1 or avx512InstructionSet is supported - // opportunistically. - // - // Returns: - // returns true if AVX10v1 or avx512InstructionSet is supported opportunistically and - // sets isV512Supported to true if AVX512F is supported, false otherwise. - // - bool compIsEvexOpportunisticallySupported(bool& isV512Supported, - CORINFO_InstructionSet avx512InstructionSet = InstructionSet_AVX512F) - { - assert(avx512InstructionSet == InstructionSet_AVX512F || avx512InstructionSet == InstructionSet_AVX512F_VL || - avx512InstructionSet == InstructionSet_AVX512BW || avx512InstructionSet == InstructionSet_AVX512BW_VL || - avx512InstructionSet == InstructionSet_AVX512CD || avx512InstructionSet == InstructionSet_AVX512CD_VL || - avx512InstructionSet == InstructionSet_AVX512DQ || avx512InstructionSet == InstructionSet_AVX512DQ_VL || - avx512InstructionSet == InstructionSet_AVX512VBMI || - avx512InstructionSet == InstructionSet_AVX512VBMI_VL); - - if (compOpportunisticallyDependsOn(avx512InstructionSet)) - { - isV512Supported = true; - return true; - } - - isV512Supported = false; - return compOpportunisticallyDependsOn(InstructionSet_AVX10v1); - } - bool canUseVexEncoding() const { return compOpportunisticallyDependsOn(InstructionSet_AVX); @@ -9876,7 +9759,7 @@ class Compiler // bool canUseEvexEncoding() const { - return (compOpportunisticallyDependsOn(InstructionSet_EVEX)); + return compOpportunisticallyDependsOn(InstructionSet_AVX512); } //------------------------------------------------------------------------ @@ -9902,25 +9785,7 @@ class Compiler #ifdef DEBUG // Using JitStressEVEXEncoding flag will force instructions which would // otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding - // This requires AVX512F, AVX512BW, AVX512CD, AVX512DQ, and AVX512VL support - - if (JitStressEvexEncoding() && IsBaselineVector512IsaSupportedOpportunistically()) - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F_VL)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW_VL)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512CD)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512CD_VL)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ_VL)); - - return true; - } - else if (JitStressEvexEncoding() && compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - return true; - } + return JitStressEvexEncoding() && canUseEvexEncoding(); #endif // DEBUG return false; @@ -11640,7 +11505,7 @@ class Compiler #if defined(TARGET_AMD64) private: // The following are for initializing register allocator "constants" defined in targetamd64.h - // that now depend upon runtime ISA information, e.g., the presence of AVX512F/VL, which increases + // that now depend upon runtime ISA information, e.g., the presence of AVX512, which increases // the number of SIMD (xmm, ymm, and zmm) registers from 16 to 32. // As only 64-bit xarch has the capability to have the additional registers, we limit the changes // to TARGET_AMD64 only. @@ -11705,7 +11570,7 @@ class Compiler #if defined(TARGET_XARCH) private: // The following are for initializing register allocator "constants" defined in targetamd64.h - // that now depend upon runtime ISA information, e.g., the presence of AVX512F/VL, which adds + // that now depend upon runtime ISA information, e.g., the presence of AVX512, which adds // 8 mask registers for use. // // Users of these values need to define four accessor functions: diff --git a/src/coreclr/jit/decomposelongs.cpp b/src/coreclr/jit/decomposelongs.cpp index ddc55e633292fa..12466cc8cb390c 100644 --- a/src/coreclr/jit/decomposelongs.cpp +++ b/src/coreclr/jit/decomposelongs.cpp @@ -596,18 +596,9 @@ GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use) CorInfoType baseIntegralType = cast->IsUnsigned() ? CORINFO_TYPE_ULONG : CORINFO_TYPE_LONG; assert(!cast->gtOverflow()); + assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512)); - if (m_compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ_VL)) - { - intrinsicId = (dstType == TYP_FLOAT) ? NI_AVX512DQ_VL_ConvertToVector128Single - : NI_AVX512DQ_VL_ConvertToVector128Double; - } - else - { - assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v1)); - intrinsicId = - (dstType == TYP_FLOAT) ? NI_AVX10v1_ConvertToVector128Single : NI_AVX10v1_ConvertToVector128Double; - } + intrinsicId = (dstType == TYP_FLOAT) ? NI_AVX512_ConvertToVector128Single : NI_AVX512_ConvertToVector128Double; GenTree* createScalar = m_compiler->gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, srcOp, baseIntegralType, 16); GenTree* convert = @@ -1799,7 +1790,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsic(LIR::Use& use) return DecomposeHWIntrinsicToScalar(use, hwintrinsicTree); } - case NI_EVEX_MoveMask: + case NI_AVX512_MoveMask: { return DecomposeHWIntrinsicMoveMask(use, hwintrinsicTree); } @@ -1995,7 +1986,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicToScalar(LIR::Use& use, GenTreeHWIn } //------------------------------------------------------------------------ -// DecomposeHWIntrinsicMoveMask: Decompose GT_HWINTRINSIC -- NI_EVEX_MoveMask +// DecomposeHWIntrinsicMoveMask: Decompose GT_HWINTRINSIC -- NI_AVX512_MoveMask // // Decompose a MoveMask(x) node on Vector512<*>. For: // @@ -2024,7 +2015,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicMoveMask(LIR::Use& use, GenTreeHWIn { assert(node == use.Def()); assert(varTypeIsLong(node)); - assert(node->GetHWIntrinsicId() == NI_EVEX_MoveMask); + assert(node->GetHWIntrinsicId() == NI_AVX512_MoveMask); GenTree* op1 = node->Op(1); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); @@ -2054,7 +2045,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicMoveMask(LIR::Use& use, GenTreeHWIn // Create: // loResult = GT_HWINTRINSIC{MoveMask}(simdTmpVar) - loResult = m_compiler->gtNewSimdHWIntrinsicNode(TYP_INT, simdTmpVar, NI_EVEX_MoveMask, simdBaseJitType, 32); + loResult = m_compiler->gtNewSimdHWIntrinsicNode(TYP_INT, simdTmpVar, NI_AVX512_MoveMask, simdBaseJitType, 32); Range().InsertBefore(node, loResult); simdTmpVar = m_compiler->gtNewLclLNode(simdTmpVarNum, simdTmpVar->TypeGet()); @@ -2067,11 +2058,11 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicMoveMask(LIR::Use& use, GenTreeHWIn GenTree* shiftIcon = m_compiler->gtNewIconNode(32, TYP_INT); Range().InsertBefore(node, shiftIcon); - simdTmpVar = m_compiler->gtNewSimdHWIntrinsicNode(TYP_MASK, simdTmpVar, shiftIcon, NI_EVEX_ShiftRightMask, + simdTmpVar = m_compiler->gtNewSimdHWIntrinsicNode(TYP_MASK, simdTmpVar, shiftIcon, NI_AVX512_ShiftRightMask, simdBaseJitType, 64); Range().InsertBefore(node, simdTmpVar); - hiResult = m_compiler->gtNewSimdHWIntrinsicNode(TYP_INT, simdTmpVar, NI_EVEX_MoveMask, simdBaseJitType, 32); + hiResult = m_compiler->gtNewSimdHWIntrinsicNode(TYP_INT, simdTmpVar, NI_AVX512_MoveMask, simdBaseJitType, 32); Range().InsertBefore(node, hiResult); } else @@ -2079,7 +2070,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicMoveMask(LIR::Use& use, GenTreeHWIn // Create: // loResult = GT_HWINTRINSIC{MoveMask}(op1) - loResult = m_compiler->gtNewSimdHWIntrinsicNode(TYP_INT, op1, NI_EVEX_MoveMask, simdBaseJitType, simdSize); + loResult = m_compiler->gtNewSimdHWIntrinsicNode(TYP_INT, op1, NI_AVX512_MoveMask, simdBaseJitType, simdSize); Range().InsertBefore(node, loResult); // Create: diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 5184c86e2e45f2..66039545e92a9d 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -8251,14 +8251,14 @@ void emitter::emitSimdConstCompressedLoad(simd_t* constValue, emitAttr attr, reg if ((dataSize == 64) && (constValue->v256[1] == constValue->v256[0])) { - assert(emitComp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); dataSize = 32; ins = INS_vbroadcastf32x8; } if ((dataSize == 32) && (constValue->v128[1] == constValue->v128[0])) { - assert(emitComp->IsBaselineVector256IsaSupportedDebugOnly()); + assert(emitComp->compIsaSupportedDebugOnly(InstructionSet_AVX)); dataSize = 16; ins = INS_vbroadcastf32x4; } diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 241f741eb1bd01..faae11cfec630c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20310,14 +20310,14 @@ bool GenTree::isCommutativeHWIntrinsic() const return false; } - case NI_AVX512F_Max: - case NI_AVX512F_Min: + case NI_AVX512_Max: + case NI_AVX512_Min: { return !varTypeIsFloating(node->GetSimdBaseType()); } - case NI_AVX512F_Add: - case NI_AVX512F_Multiply: + case NI_AVX512_Add: + case NI_AVX512_Multiply: case NI_BMI2_MultiplyNoFlags: case NI_BMI2_X64_MultiplyNoFlags: { @@ -20347,18 +20347,16 @@ bool GenTree::isContainableHWIntrinsic() const case NI_SSE2_LoadAlignedVector128: case NI_SSE2_LoadScalarVector128: case NI_AVX_LoadAlignedVector256: - case NI_AVX512F_LoadAlignedVector512: + case NI_AVX512_LoadAlignedVector512: { // These loads are contained as part of a HWIntrinsic operation return true; } - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation: - case NI_AVX10v1_ConvertToVector128UInt32: - case NI_AVX10v1_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector128UInt32: + case NI_AVX512_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector256Int32: + case NI_AVX512_ConvertToVector256UInt32: { if (varTypeIsFloating(AsHWIntrinsic()->GetSimdBaseType())) { @@ -20382,54 +20380,28 @@ bool GenTree::isContainableHWIntrinsic() const case NI_AVX2_ConvertToInt32: case NI_AVX2_ConvertToUInt32: case NI_AVX2_ExtractVector128: - case NI_AVX512F_ExtractVector128: - case NI_AVX512F_ExtractVector256: - case NI_AVX512F_ConvertToVector128Byte: - case NI_AVX512F_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_ConvertToVector128Int16: - case NI_AVX512F_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_ConvertToVector128SByte: - case NI_AVX512F_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_ConvertToVector128UInt16: - case NI_AVX512F_ConvertToVector128UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256Int16: - case NI_AVX512F_ConvertToVector256Int16WithSaturation: - case NI_AVX512F_ConvertToVector256Int32WithSaturation: - case NI_AVX512F_ConvertToVector256UInt16: - case NI_AVX512F_ConvertToVector256UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256UInt32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Byte: - case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int16: - case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int32: - case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128SByte: - case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128UInt16: - case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation: - case NI_AVX512BW_ConvertToVector256Byte: - case NI_AVX512BW_ConvertToVector256ByteWithSaturation: - case NI_AVX512BW_ConvertToVector256SByte: - case NI_AVX512BW_ConvertToVector256SByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128Byte: - case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128SByte: - case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX512DQ_ExtractVector128: - case NI_AVX512DQ_ExtractVector256: - case NI_AVX10v1_ConvertToVector128Byte: - case NI_AVX10v1_ConvertToVector128ByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Int16: - case NI_AVX10v1_ConvertToVector128Int16WithSaturation: - case NI_AVX10v1_ConvertToVector128Int32: - case NI_AVX10v1_ConvertToVector128Int32WithSaturation: - case NI_AVX10v1_ConvertToVector128SByte: - case NI_AVX10v1_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128UInt16: - case NI_AVX10v1_ConvertToVector128UInt16WithSaturation: - case NI_AVX10v1_V512_ExtractVector128: - case NI_AVX10v1_V512_ExtractVector256: + case NI_AVX512_ConvertToVector128Byte: + case NI_AVX512_ConvertToVector128ByteWithSaturation: + case NI_AVX512_ConvertToVector128Int16: + case NI_AVX512_ConvertToVector128Int16WithSaturation: + case NI_AVX512_ConvertToVector128Int32: + case NI_AVX512_ConvertToVector128Int32WithSaturation: + case NI_AVX512_ConvertToVector128SByte: + case NI_AVX512_ConvertToVector128SByteWithSaturation: + case NI_AVX512_ConvertToVector128UInt16: + case NI_AVX512_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector256Byte: + case NI_AVX512_ConvertToVector256ByteWithSaturation: + case NI_AVX512_ConvertToVector256Int16: + case NI_AVX512_ConvertToVector256Int16WithSaturation: + case NI_AVX512_ConvertToVector256Int32WithSaturation: + case NI_AVX512_ConvertToVector256SByte: + case NI_AVX512_ConvertToVector256SByteWithSaturation: + case NI_AVX512_ConvertToVector256UInt16: + case NI_AVX512_ConvertToVector256UInt16WithSaturation: + case NI_AVX512_ConvertToVector256UInt32WithSaturation: + case NI_AVX512_ExtractVector128: + case NI_AVX512_ExtractVector256: { // These HWIntrinsic operations are contained as part of a store return true; @@ -20449,10 +20421,10 @@ bool GenTree::isContainableHWIntrinsic() const case NI_SSE3_LoadAndDuplicateToVector128: case NI_SSE3_MoveAndDuplicate: case NI_AVX_BroadcastScalarToVector128: - case NI_AVX2_BroadcastScalarToVector128: case NI_AVX_BroadcastScalarToVector256: + case NI_AVX2_BroadcastScalarToVector128: case NI_AVX2_BroadcastScalarToVector256: - case NI_AVX512F_BroadcastScalarToVector512: + case NI_AVX512_BroadcastScalarToVector512: { // These intrinsic operations are contained as part of the operand of embedded broadcast compatible // instruction @@ -20492,7 +20464,7 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) switch (intrinsicId) { - case NI_EVEX_BlendVariableMask: + case NI_AVX512_BlendVariableMask: { GenTree* op2 = hwintrinsic->Op(2); @@ -20514,11 +20486,8 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) return false; } - case NI_AVX512F_Fixup: - case NI_AVX512F_FixupScalar: - case NI_AVX512F_VL_Fixup: - case NI_AVX10v1_Fixup: - case NI_AVX10v1_FixupScalar: + case NI_AVX512_Fixup: + case NI_AVX512_FixupScalar: { // We are actually only RMW in the case where the lookup table // has any value that could result in `op1` being picked. So @@ -20546,7 +20515,7 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) uint32_t count = simdSize / sizeof(uint32_t); uint32_t incSize = (simdBaseType == TYP_FLOAT) ? 1 : 2; - if (intrinsicId == NI_AVX512F_FixupScalar || intrinsicId == NI_AVX10v1_FixupScalar) + if (intrinsicId == NI_AVX512_FixupScalar) { // Upper elements come from op2 count = 1; @@ -20567,9 +20536,7 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) return false; } - case NI_AVX512F_TernaryLogic: - case NI_AVX512F_VL_TernaryLogic: - case NI_AVX10v1_TernaryLogic: + case NI_AVX512_TernaryLogic: { // We may not be RMW depending on the control byte as there // are many operations that do not use all three inputs. @@ -20842,18 +20809,13 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si if (simdBaseType == TYP_LONG) { - if (simdSize == 64) - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_Abs; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - intrinsic = NI_AVX10v1_Abs; + intrinsic = NI_AVX512_Abs; } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) + else { - intrinsic = NI_AVX512F_VL_Abs; + assert(simdSize != 64); } } else if (simdSize == 32) @@ -20863,17 +20825,8 @@ GenTree* Compiler::gtNewSimdAbsNode(var_types type, GenTree* op1, CorInfoType si } else if (simdSize == 64) { - if (simdBaseType == TYP_INT) - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_Abs; - } - else - { - assert(varTypeIsSmall(simdBaseType)); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); - intrinsic = NI_AVX512BW_Abs; - } + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_Abs; } else if (compOpportunisticallyDependsOn(InstructionSet_SSSE3)) { @@ -21215,8 +21168,8 @@ GenTree* Compiler::gtNewSimdBinOpNode( { if (simdBaseType == TYP_INT) { - assert(compOpportunisticallyDependsOn(InstructionSet_AVX) || - compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX) || + compIsaSupportedDebugOnly(InstructionSet_AVX512)); assert(simdSize == 16 || simdSize == 32); @@ -21236,32 +21189,31 @@ GenTree* Compiler::gtNewSimdBinOpNode( #if defined(TARGET_XARCH) if (varTypeIsByte(simdBaseType)) { - assert((simdSize != 64) || IsBaselineVector512IsaSupportedDebugOnly()); + assert((simdSize != 64) || compIsaSupportedDebugOnly(InstructionSet_AVX512)); CorInfoType widenedSimdBaseJitType; NamedIntrinsic widenIntrinsic; NamedIntrinsic narrowIntrinsic; var_types widenedType; unsigned widenedSimdSize; - bool isV512Supported = false; - if (simdSize == 32 && IsBaselineVector512IsaSupportedOpportunistically()) + if (simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - // Input is SIMD32 [U]Byte and AVX512BW is supported: + // Input is SIMD32 [U]Byte and AVX512 is supported: // - Widen inputs as SIMD64 [U]Short // - Multiply widened inputs (SIMD64 [U]Short) as widened product (SIMD64 [U]Short) // - Narrow widened product (SIMD64 [U]Short) as SIMD32 [U]Byte if (simdBaseType == TYP_BYTE) { widenedSimdBaseJitType = CORINFO_TYPE_SHORT; - widenIntrinsic = NI_AVX512BW_ConvertToVector512Int16; - narrowIntrinsic = NI_AVX512BW_ConvertToVector256SByte; + widenIntrinsic = NI_AVX512_ConvertToVector512Int16; + narrowIntrinsic = NI_AVX512_ConvertToVector256SByte; } else { widenedSimdBaseJitType = CORINFO_TYPE_USHORT; - widenIntrinsic = NI_AVX512BW_ConvertToVector512UInt16; - narrowIntrinsic = NI_AVX512BW_ConvertToVector256Byte; + widenIntrinsic = NI_AVX512_ConvertToVector512UInt16; + narrowIntrinsic = NI_AVX512_ConvertToVector256Byte; } widenedType = TYP_SIMD64; @@ -21285,9 +21237,9 @@ GenTree* Compiler::gtNewSimdBinOpNode( } else if (simdSize == 16 && compOpportunisticallyDependsOn(InstructionSet_AVX2)) { - if (compIsEvexOpportunisticallySupported(isV512Supported)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - // Input is SIMD16 [U]Byte and AVX512BW_VL is supported: + // Input is SIMD16 [U]Byte and AVX512 is supported: // - Widen inputs as SIMD32 [U]Short // - Multiply widened inputs (SIMD32 [U]Short) as widened product (SIMD32 [U]Short) // - Narrow widened product (SIMD32 [U]Short) as SIMD16 [U]Byte @@ -21296,14 +21248,12 @@ GenTree* Compiler::gtNewSimdBinOpNode( if (simdBaseType == TYP_BYTE) { widenedSimdBaseJitType = CORINFO_TYPE_SHORT; - narrowIntrinsic = !isV512Supported ? NI_AVX10v1_ConvertToVector128SByte - : NI_AVX512BW_VL_ConvertToVector128SByte; + narrowIntrinsic = NI_AVX512_ConvertToVector128SByte; } else { widenedSimdBaseJitType = CORINFO_TYPE_USHORT; - narrowIntrinsic = !isV512Supported ? NI_AVX10v1_ConvertToVector128Byte - : NI_AVX512BW_VL_ConvertToVector128Byte; + narrowIntrinsic = NI_AVX512_ConvertToVector128Byte; } widenedType = TYP_SIMD32; @@ -21327,7 +21277,7 @@ GenTree* Compiler::gtNewSimdBinOpNode( } else { - // Input is SIMD16 [U]Byte and AVX512BW_VL is NOT supported (only AVX2 will be used): + // Input is SIMD16 [U]Byte and AVX512 is NOT supported (only AVX2 will be used): // - Widen inputs as SIMD32 [U]Short // - Multiply widened inputs (SIMD32 [U]Short) as widened product (SIMD32 [U]Short) // - Mask widened product (SIMD32 [U]Short) to select relevant bits @@ -21578,9 +21528,9 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s } else if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); GenTree* op2 = gtNewIconNode(static_cast(FloatRoundingMode::ToPositiveInfinity)); - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512F_RoundScale, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_RoundScale, simdBaseJitType, simdSize); } else { @@ -21627,7 +21577,7 @@ GenTree* Compiler::gtNewSimdCvtMaskToVectorNode(var_types type, compMaskConvertUsed = true; #if defined(TARGET_XARCH) - return gtNewSimdHWIntrinsicNode(type, op1, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, NI_AVX512_ConvertMaskToVector, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) return gtNewSimdHWIntrinsicNode(type, op1, NI_Sve_ConvertMaskToVector, simdBaseJitType, simdSize); #else @@ -21655,13 +21605,11 @@ GenTree* Compiler::gtNewSimdCvtNode(var_types type, assert(IsBaselineSimdIsaSupportedDebugOnly()); #if defined(TARGET_XARCH) - assert(IsBaselineVector512IsaSupportedDebugOnly() || - (simdSize != 64 && compIsaSupportedDebugOnly(InstructionSet_AVX10v1)) || + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512) || ((simdTargetBaseType == TYP_INT) && ((simdSize == 16 && compIsaSupportedDebugOnly(InstructionSet_SSE41)) || (simdSize == 32 && compIsaSupportedDebugOnly(InstructionSet_AVX))))); GenTree* fixupVal; - bool isV512Supported = false; if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) { @@ -21669,23 +21617,19 @@ GenTree* Compiler::gtNewSimdCvtNode(var_types type, switch (simdTargetBaseType) { case TYP_INT: - cvtIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_ConvertToVectorInt32WithTruncationSaturation - : NI_AVX10v2_ConvertToVectorInt32WithTruncationSaturation; + cvtIntrinsic = NI_AVX10v2_ConvertToVectorInt32WithTruncationSaturation; break; case TYP_UINT: - cvtIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_ConvertToVectorUInt32WithTruncationSaturation - : NI_AVX10v2_ConvertToVectorUInt32WithTruncationSaturation; + cvtIntrinsic = NI_AVX10v2_ConvertToVectorUInt32WithTruncationSaturation; break; case TYP_LONG: - cvtIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_ConvertToVectorInt64WithTruncationSaturation - : NI_AVX10v2_ConvertToVectorInt64WithTruncationSaturation; + cvtIntrinsic = NI_AVX10v2_ConvertToVectorInt64WithTruncationSaturation; break; case TYP_ULONG: - cvtIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_ConvertToVectorUInt64WithTruncationSaturation - : NI_AVX10v2_ConvertToVectorUInt64WithTruncationSaturation; + cvtIntrinsic = NI_AVX10v2_ConvertToVectorUInt64WithTruncationSaturation; break; default: @@ -21695,7 +21639,7 @@ GenTree* Compiler::gtNewSimdCvtNode(var_types type, } return gtNewSimdHWIntrinsicNode(type, op1, cvtIntrinsic, simdSourceBaseJitType, simdSize); } - else if (compIsEvexOpportunisticallySupported(isV512Supported)) + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { /*Generate the control table for VFIXUPIMMSD/SS - For conversion to unsigned @@ -21721,17 +21665,8 @@ GenTree* Compiler::gtNewSimdCvtNode(var_types type, GenTree* tblCon = gtNewSimdCreateBroadcastNode(type, gtNewIconNode(iconVal), simdTargetBaseJitType, simdSize); // We need op1Clone to run fixup - GenTree* op1Clone = fgMakeMultiUse(&op1); - NamedIntrinsic fixupHwIntrinsicID; - - if (simdSize == 64) - { - fixupHwIntrinsicID = NI_AVX512F_Fixup; - } - else - { - fixupHwIntrinsicID = !isV512Supported ? NI_AVX10v1_Fixup : NI_AVX512F_VL_Fixup; - } + GenTree* op1Clone = fgMakeMultiUse(&op1); + NamedIntrinsic fixupHwIntrinsicID = NI_AVX512_Fixup; // run vfixupimmsd base on table and no flags reporting fixupVal = gtNewSimdHWIntrinsicNode(type, op1, op1Clone, tblCon, gtNewIconNode(0), fixupHwIntrinsicID, simdSourceBaseJitType, simdSize); @@ -21813,8 +21748,7 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, NamedIntrinsic hwIntrinsicID = NI_Illegal; #if defined(TARGET_XARCH) - assert(IsBaselineVector512IsaSupportedDebugOnly() || - (simdSize != 64 && compIsaSupportedDebugOnly(InstructionSet_AVX10v1)) || + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512) || ((simdTargetBaseType == TYP_INT) && ((simdSize == 16) || (simdSize == 32 && compIsaSupportedDebugOnly(InstructionSet_AVX))))); @@ -21830,7 +21764,7 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, { case 64: { - hwIntrinsicID = NI_AVX512F_ConvertToVector512Int32WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector512Int32WithTruncation; break; } @@ -21858,23 +21792,19 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, { case 64: { - hwIntrinsicID = NI_AVX512F_ConvertToVector512UInt32WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector512UInt32WithTruncation; break; } case 32: { - hwIntrinsicID = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector256UInt32WithTruncation - : NI_AVX512F_VL_ConvertToVector256UInt32WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector256UInt32WithTruncation; break; } case 16: { - hwIntrinsicID = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector128UInt32WithTruncation - : NI_AVX512F_VL_ConvertToVector128UInt32WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector128UInt32WithTruncation; break; } @@ -21900,23 +21830,19 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, { case 64: { - hwIntrinsicID = NI_AVX512DQ_ConvertToVector512Int64WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector512Int64WithTruncation; break; } case 32: { - hwIntrinsicID = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector256Int64WithTruncation - : NI_AVX512DQ_VL_ConvertToVector256Int64WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector256Int64WithTruncation; break; } case 16: { - hwIntrinsicID = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector128Int64WithTruncation - : NI_AVX512DQ_VL_ConvertToVector128Int64WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector128Int64WithTruncation; break; } @@ -21932,23 +21858,19 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, { case 64: { - hwIntrinsicID = NI_AVX512DQ_ConvertToVector512UInt64WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector512UInt64WithTruncation; break; } case 32: { - hwIntrinsicID = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector256UInt64WithTruncation - : NI_AVX512DQ_VL_ConvertToVector256UInt64WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector256UInt64WithTruncation; break; } case 16: { - hwIntrinsicID = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector128UInt64WithTruncation - : NI_AVX512DQ_VL_ConvertToVector128UInt64WithTruncation; + hwIntrinsicID = NI_AVX512_ConvertToVector128UInt64WithTruncation; break; } @@ -22052,7 +21974,7 @@ GenTree* Compiler::gtNewSimdCvtVectorToMaskNode(var_types type, compMaskConvertUsed = true; #if defined(TARGET_XARCH) - return gtNewSimdHWIntrinsicNode(TYP_MASK, op1, NI_EVEX_ConvertVectorToMask, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(TYP_MASK, op1, NI_AVX512_ConvertVectorToMask, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) // We use cmpne which requires an embedded mask. GenTree* trueMask = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); @@ -22394,7 +22316,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } else if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); intrinsic = NI_Vector512_op_Equality; } else @@ -22423,7 +22345,7 @@ GenTree* Compiler::gtNewSimdCmpOpAllNode( } else if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); intrinsic = NI_Vector512_op_Equality; } else @@ -22543,7 +22465,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( } else if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); intrinsic = NI_Vector512_op_Inequality; } else @@ -22571,7 +22493,7 @@ GenTree* Compiler::gtNewSimdCmpOpAnyNode( { if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); intrinsic = NI_Vector512_op_Inequality; } else if (simdSize == 32) @@ -23301,9 +23223,9 @@ GenTree* Compiler::gtNewSimdFloorNode(var_types type, GenTree* op1, CorInfoType } else if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); GenTree* op2 = gtNewIconNode(static_cast(FloatRoundingMode::ToNegativeInfinity)); - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512F_RoundScale, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_RoundScale, simdBaseJitType, simdSize); } else { @@ -23350,8 +23272,8 @@ GenTree* Compiler::gtNewSimdFmaNode( #if defined(TARGET_XARCH) if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_FusedMultiplyAdd; + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_FusedMultiplyAdd; } else { @@ -24235,8 +24157,8 @@ GenTree* Compiler::gtNewSimdLoadAlignedNode(var_types type, if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_LoadAlignedVector512; + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_LoadAlignedVector512; } else if (simdSize == 32) { @@ -24315,11 +24237,9 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, } else if (simdSize == 64) { - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) - { - intrinsic = NI_AVX512F_LoadAlignedVector512NonTemporal; - isNonTemporal = true; - } + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_LoadAlignedVector512NonTemporal; + isNonTemporal = true; } else if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) { @@ -24386,7 +24306,7 @@ GenTree* Compiler::gtNewSimdMaxNode( { if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) { - NamedIntrinsic minMaxIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_MinMax : NI_AVX10v2_MinMax; + NamedIntrinsic minMaxIntrinsic = NI_AVX10v2_MinMax; return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x05), minMaxIntrinsic, simdBaseJitType, simdSize); } @@ -24450,28 +24370,16 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( { intrinsic = NI_AVX2_Max; } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - intrinsic = NI_AVX10v1_Max; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) - { - intrinsic = NI_AVX512F_VL_Max; + intrinsic = NI_AVX512_Max; } } } else if (simdSize == 64) { - if (varTypeIsSmall(simdBaseType)) - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); - intrinsic = NI_AVX512BW_Max; - } - else - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_Max; - } + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_Max; } else { @@ -24567,13 +24475,9 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( case TYP_LONG: case TYP_ULONG: { - if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - intrinsic = NI_AVX10v1_Max; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - intrinsic = NI_AVX512F_VL_Max; + intrinsic = NI_AVX512_Max; } break; } @@ -24651,7 +24555,7 @@ GenTree* Compiler::gtNewSimdMinNode( { if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2)) { - NamedIntrinsic minMaxIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_MinMax : NI_AVX10v2_MinMax; + NamedIntrinsic minMaxIntrinsic = NI_AVX10v2_MinMax; return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x04), minMaxIntrinsic, simdBaseJitType, simdSize); } @@ -24715,28 +24619,16 @@ GenTree* Compiler::gtNewSimdMinNativeNode( { intrinsic = NI_AVX2_Min; } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - intrinsic = NI_AVX10v1_Min; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) - { - intrinsic = NI_AVX512F_VL_Min; + intrinsic = NI_AVX512_Min; } } } else if (simdSize == 64) { - if (varTypeIsSmall(simdBaseType)) - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); - intrinsic = NI_AVX512BW_Min; - } - else - { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_Min; - } + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_Min; } else { @@ -24828,13 +24720,9 @@ GenTree* Compiler::gtNewSimdMinNativeNode( case TYP_LONG: case TYP_ULONG: { - if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - intrinsic = NI_AVX10v1_Min; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - intrinsic = NI_AVX512F_VL_Min; + intrinsic = NI_AVX512_Min; } break; } @@ -24910,12 +24798,10 @@ GenTree* Compiler::gtNewSimdNarrowNode( GenTree* tmp1; GenTree* tmp2; - bool isV512Supported = false; - #if defined(TARGET_XARCH) GenTree* tmp3; GenTree* tmp4; - if (compIsEvexOpportunisticallySupported(isV512Supported)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { // This is the same in principle to the other comments below, however due to // code formatting, its too long to reasonably display here. @@ -24932,12 +24818,11 @@ GenTree* Compiler::gtNewSimdNarrowNode( { if (simdSize == 64) { - intrinsicId = NI_AVX512BW_ConvertToVector256SByte; + intrinsicId = NI_AVX512_ConvertToVector256SByte; } else { - intrinsicId = - !isV512Supported ? NI_AVX10v1_ConvertToVector128SByte : NI_AVX512BW_VL_ConvertToVector128SByte; + intrinsicId = NI_AVX512_ConvertToVector128SByte; } opBaseJitType = CORINFO_TYPE_SHORT; @@ -24948,12 +24833,11 @@ GenTree* Compiler::gtNewSimdNarrowNode( { if (simdSize == 64) { - intrinsicId = NI_AVX512BW_ConvertToVector256Byte; + intrinsicId = NI_AVX512_ConvertToVector256Byte; } else { - intrinsicId = - !isV512Supported ? NI_AVX10v1_ConvertToVector128Byte : NI_AVX512BW_VL_ConvertToVector128Byte; + intrinsicId = NI_AVX512_ConvertToVector128Byte; } opBaseJitType = CORINFO_TYPE_USHORT; @@ -24964,12 +24848,11 @@ GenTree* Compiler::gtNewSimdNarrowNode( { if (simdSize == 64) { - intrinsicId = NI_AVX512F_ConvertToVector256Int16; + intrinsicId = NI_AVX512_ConvertToVector256Int16; } else { - intrinsicId = - !isV512Supported ? NI_AVX10v1_ConvertToVector128Int16 : NI_AVX512F_VL_ConvertToVector128Int16; + intrinsicId = NI_AVX512_ConvertToVector128Int16; } opBaseJitType = CORINFO_TYPE_INT; @@ -24980,12 +24863,11 @@ GenTree* Compiler::gtNewSimdNarrowNode( { if (simdSize == 64) { - intrinsicId = NI_AVX512F_ConvertToVector256UInt16; + intrinsicId = NI_AVX512_ConvertToVector256UInt16; } else { - intrinsicId = - !isV512Supported ? NI_AVX10v1_ConvertToVector128UInt16 : NI_AVX512F_VL_ConvertToVector128UInt16; + intrinsicId = NI_AVX512_ConvertToVector128UInt16; } opBaseJitType = CORINFO_TYPE_UINT; @@ -24996,12 +24878,11 @@ GenTree* Compiler::gtNewSimdNarrowNode( { if (simdSize == 64) { - intrinsicId = NI_AVX512F_ConvertToVector256Int32; + intrinsicId = NI_AVX512_ConvertToVector256Int32; } else { - intrinsicId = - !isV512Supported ? NI_AVX10v1_ConvertToVector128Int32 : NI_AVX512F_VL_ConvertToVector128Int32; + intrinsicId = NI_AVX512_ConvertToVector128Int32; } opBaseJitType = CORINFO_TYPE_LONG; @@ -25012,12 +24893,11 @@ GenTree* Compiler::gtNewSimdNarrowNode( { if (simdSize == 64) { - intrinsicId = NI_AVX512F_ConvertToVector256UInt32; + intrinsicId = NI_AVX512_ConvertToVector256UInt32; } else { - intrinsicId = - !isV512Supported ? NI_AVX10v1_ConvertToVector128UInt32 : NI_AVX512F_VL_ConvertToVector128UInt32; + intrinsicId = NI_AVX512_ConvertToVector128UInt32; } opBaseJitType = CORINFO_TYPE_ULONG; @@ -25028,7 +24908,7 @@ GenTree* Compiler::gtNewSimdNarrowNode( { if (simdSize == 64) { - intrinsicId = NI_AVX512F_ConvertToVector256Single; + intrinsicId = NI_AVX512_ConvertToVector256Single; } else if (simdSize == 32) { @@ -25435,9 +25315,9 @@ GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType } else if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); GenTree* op2 = gtNewIconNode(static_cast(FloatRoundingMode::ToNearestInteger)); - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512F_RoundScale, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_RoundScale, simdBaseJitType, simdSize); } else { @@ -25523,7 +25403,6 @@ GenTree* Compiler::gtNewSimdShuffleVariableNode( // TODO-XARCH-CQ: If we have known set/unset bits for the indices, we could further optimise many cases // below. - bool isV512Supported = false; if (simdSize == 64) { if (elementSize == 1) @@ -25536,27 +25415,27 @@ GenTree* Compiler::gtNewSimdShuffleVariableNode( } else if (elementSize == 2) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); // swap the operands to match the encoding requirements - retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512BW_PermuteVar32x16, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512_PermuteVar32x16, simdBaseJitType, simdSize); retNode->SetReverseOp(); } else if (elementSize == 4) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); // swap the operands to match the encoding requirements - retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512F_PermuteVar16x32, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512_PermuteVar16x32, simdBaseJitType, simdSize); retNode->SetReverseOp(); } else { assert(elementSize == 8); - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); // swap the operands to match the encoding requirements - retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512F_PermuteVar8x64, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512_PermuteVar8x64, simdBaseJitType, simdSize); retNode->SetReverseOp(); } } @@ -25569,27 +25448,18 @@ GenTree* Compiler::gtNewSimdShuffleVariableNode( // high bit on index gives 0 already canUseSignedComparisonHint = true; } - else if ((elementSize == 1) && (simdSize == 32) && - compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512VBMI_VL)) + else if ((elementSize == 1) && (simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI)) { - NamedIntrinsic intrinsic = isV512Supported ? NI_AVX512VBMI_VL_PermuteVar32x8 : NI_AVX10v1_PermuteVar32x8; + NamedIntrinsic intrinsic = NI_AVX512VBMI_PermuteVar32x8; // swap the operands to match the encoding requirements retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, intrinsic, simdBaseJitType, simdSize); retNode->SetReverseOp(); } - else if ((elementSize == 2) && compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL)) + else if ((elementSize == 2) && compOpportunisticallyDependsOn(InstructionSet_AVX512)) { assert((simdSize == 16) || (simdSize == 32)); - NamedIntrinsic intrinsic; - if (isV512Supported) - { - intrinsic = (simdSize == 16) ? NI_AVX512BW_VL_PermuteVar8x16 : NI_AVX512BW_VL_PermuteVar16x16; - } - else - { - intrinsic = (simdSize == 16) ? NI_AVX10v1_PermuteVar8x16 : NI_AVX10v1_PermuteVar16x16; - } + NamedIntrinsic intrinsic = (simdSize == 16) ? NI_AVX512_PermuteVar8x16 : NI_AVX512_PermuteVar16x16; // swap the operands to match the encoding requirements retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, intrinsic, simdBaseJitType, simdSize); @@ -25614,20 +25484,18 @@ GenTree* Compiler::gtNewSimdShuffleVariableNode( retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX_PermuteVar, CORINFO_TYPE_FLOAT, simdSize); } } - else if ((elementSize == 8) && (simdSize == 32) && - compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512F_VL)) + else if ((elementSize == 8) && (simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - NamedIntrinsic intrinsic = isV512Supported ? NI_AVX512F_VL_PermuteVar4x64 : NI_AVX10v1_PermuteVar4x64; + NamedIntrinsic intrinsic = NI_AVX512_PermuteVar4x64; // swap the operands to match the encoding requirements retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, intrinsic, simdBaseJitType, simdSize); retNode->SetReverseOp(); } - else if ((elementSize == 8) && (simdSize == 16) && - compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512F_VL)) + else if ((elementSize == 8) && (simdSize == 16) && compOpportunisticallyDependsOn(InstructionSet_AVX512)) { GenTree* op1Copy = fgMakeMultiUse(&op1); // just use op1 again for the other variable - NamedIntrinsic intrinsic = isV512Supported ? NI_AVX512F_VL_PermuteVar2x64x2 : NI_AVX10v1_PermuteVar2x64x2; + NamedIntrinsic intrinsic = NI_AVX512_PermuteVar2x64x2; retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, op1Copy, intrinsic, simdBaseJitType, simdSize); } else @@ -26035,10 +25903,7 @@ GenTree* Compiler::gtNewSimdShuffleVariableNode( #if defined(TARGET_XARCH) // check if we have hardware accelerated unsigned comparison - bool hardwareAcceleratedUnsignedComparison = - (simdSize == 64) || - ((elementSize < 4) && compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL)) || - ((elementSize >= 4) && compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512F_VL)); + bool hardwareAcceleratedUnsignedComparison = compOpportunisticallyDependsOn(InstructionSet_AVX512); // if the hardware doesn't support direct unsigned comparison, we attempt to use signed comparison if (!hardwareAcceleratedUnsignedComparison) @@ -26262,11 +26127,8 @@ GenTree* Compiler::gtNewSimdShuffleNode( if (simdSize == 32) { assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); - bool isV512Supported = false; - if ((varTypeIsByte(simdBaseType) && - (!compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512VBMI_VL))) || - (varTypeIsShort(simdBaseType) && - (!compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL))) || + if ((varTypeIsByte(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI)) || + (varTypeIsShort(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512)) || // This condition is the condition for when we'd have to emit something slower than what we can do with // NI_AVX2_Shuffle directly: ((!crossLane) && (needsZero || (elementSize < 4) || ((elementSize == 4) && differsByLane)))) @@ -26440,23 +26302,17 @@ GenTree* Compiler::gtNewSimdShuffleNode( op2->AsVecCon()->gtSimdVal = vecCns; // swap the operands to match the encoding requirements - retNode = !isV512Supported ? gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX10v1_PermuteVar16x16, - simdBaseJitType, simdSize) - : gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512BW_VL_PermuteVar16x16, - simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512_PermuteVar16x16, simdBaseJitType, simdSize); } else if (elementSize == 1) { assert(crossLane); - assert(IsAvx10OrIsaSupportedDebugOnly(InstructionSet_AVX512VBMI_VL)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512VBMI)); op2 = gtNewVconNode(type); op2->AsVecCon()->gtSimdVal = vecCns; // swap the operands to match the encoding requirements - retNode = !isV512Supported ? gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX10v1_PermuteVar32x8, - simdBaseJitType, simdSize) - : gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512VBMI_VL_PermuteVar32x8, - simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512VBMI_PermuteVar32x8, simdBaseJitType, simdSize); } else { @@ -26487,7 +26343,8 @@ GenTree* Compiler::gtNewSimdShuffleNode( } else if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + if (!crossLane) { // if element size is 64-bit, try to use vshufpd instead of vpshufb. @@ -26501,7 +26358,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( } op2 = gtNewIconNode(immediate); GenTree* op1Copy = fgMakeMultiUse(&op1); - return gtNewSimdHWIntrinsicNode(type, op1, op1Copy, op2, NI_AVX512F_Shuffle, CORINFO_TYPE_DOUBLE, + return gtNewSimdHWIntrinsicNode(type, op1, op1Copy, op2, NI_AVX512_Shuffle, CORINFO_TYPE_DOUBLE, simdSize); } @@ -26519,13 +26376,13 @@ GenTree* Compiler::gtNewSimdShuffleNode( { op2 = gtNewIconNode(immediate); GenTree* op1Copy = fgMakeMultiUse(&op1); - return gtNewSimdHWIntrinsicNode(type, op1, op1Copy, op2, NI_AVX512F_Shuffle, simdBaseJitType, + return gtNewSimdHWIntrinsicNode(type, op1, op1Copy, op2, NI_AVX512_Shuffle, simdBaseJitType, simdSize); } else { op2 = gtNewIconNode(immediate); - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512F_Shuffle, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_Shuffle, simdBaseJitType, simdSize); } } @@ -26533,7 +26390,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( op2->AsVecCon()->gtSimdVal = vecCns; simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512BW_Shuffle, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_Shuffle, simdBaseJitType, simdSize); } else if (elementSize == 4) { @@ -26546,7 +26403,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( op2->AsVecCon()->gtSimdVal = vecCns; // swap the operands to match the encoding requirements - retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512F_PermuteVar16x32, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512_PermuteVar16x32, simdBaseJitType, simdSize); } else if (elementSize == 2) { @@ -26559,7 +26416,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( op2->AsVecCon()->gtSimdVal = vecCns; // swap the operands to match the encoding requirements - retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512BW_PermuteVar32x16, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512_PermuteVar32x16, simdBaseJitType, simdSize); } else if (elementSize == 1) { @@ -26583,7 +26440,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( op2->AsVecCon()->gtSimdVal = vecCns; // swap the operands to match the encoding requirements - retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512F_PermuteVar8x64, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512_PermuteVar8x64, simdBaseJitType, simdSize); } assert(retNode != nullptr); @@ -26728,8 +26585,8 @@ GenTree* Compiler::gtNewSimdSqrtNode(var_types type, GenTree* op1, CorInfoType s } else if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_Sqrt; + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_Sqrt; } else if (simdBaseType == TYP_FLOAT) { @@ -26817,8 +26674,8 @@ GenTree* Compiler::gtNewSimdStoreAlignedNode(GenTree* op1, GenTree* op2, CorInfo } else if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_StoreAligned; + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_StoreAligned; } else if (simdBaseType != TYP_FLOAT) { @@ -26875,8 +26732,8 @@ GenTree* Compiler::gtNewSimdStoreNonTemporalNode(GenTree* op1, if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - intrinsic = NI_AVX512F_StoreAlignedNonTemporal; + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); + intrinsic = NI_AVX512_StoreAlignedNonTemporal; } else if (simdSize == 32) { @@ -26925,7 +26782,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); GenTree* op1Dup = fgMakeMultiUse(&op1); op1 = gtNewSimdGetLowerNode(TYP_SIMD32, op1, simdBaseJitType, simdSize); @@ -26949,7 +26806,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si if (simdSize == 32) { - assert(IsBaselineVector256IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); GenTree* op1Dup = fgMakeMultiUse(&op1); op1 = gtNewSimdGetLowerNode(TYP_SIMD16, op1, simdBaseJitType, simdSize); @@ -27153,8 +27010,7 @@ GenTree* Compiler::gtNewSimdTernaryLogicNode(var_types type, CorInfoType simdBaseJitType, unsigned simdSize) { - assert(IsBaselineVector512IsaSupportedDebugOnly() || - ((simdSize != 64) && compIsaSupportedDebugOnly(InstructionSet_AVX10v1))); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); assert(varTypeIsSIMD(type)); assert(getSIMDTypeForSize(simdSize) == type); @@ -27174,22 +27030,7 @@ GenTree* Compiler::gtNewSimdTernaryLogicNode(var_types type, var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); - NamedIntrinsic intrinsic = NI_Illegal; - - if (simdSize == 64) - { - intrinsic = NI_AVX512F_TernaryLogic; - } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - assert((simdSize == 16) || (simdSize == 32)); - intrinsic = NI_AVX10v1_TernaryLogic; - } - else - { - assert((simdSize == 16) || (simdSize == 32)); - intrinsic = NI_AVX512F_VL_TernaryLogic; - } + NamedIntrinsic intrinsic = NI_AVX512_TernaryLogic; return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize); } @@ -27223,7 +27064,7 @@ GenTree* Compiler::gtNewSimdToScalarNode(var_types type, GenTree* op1, CorInfoTy #ifdef TARGET_XARCH if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); intrinsic = NI_Vector512_ToScalar; } else if (simdSize == 32) @@ -27287,9 +27128,9 @@ GenTree* Compiler::gtNewSimdTruncNode(var_types type, GenTree* op1, CorInfoType } else if (simdSize == 64) { - assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); GenTree* op2 = gtNewIconNode(static_cast(FloatRoundingMode::ToZero)); - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512F_RoundScale, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX512_RoundScale, simdBaseJitType, simdSize); } else { @@ -27431,7 +27272,7 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo #if defined(TARGET_XARCH) if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); tmp1 = gtNewSimdGetLowerNode(TYP_SIMD32, op1, simdBaseJitType, simdSize); @@ -27439,43 +27280,43 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo { case TYP_BYTE: { - intrinsic = NI_AVX512BW_ConvertToVector512Int16; + intrinsic = NI_AVX512_ConvertToVector512Int16; break; } case TYP_UBYTE: { - intrinsic = NI_AVX512BW_ConvertToVector512UInt16; + intrinsic = NI_AVX512_ConvertToVector512UInt16; break; } case TYP_SHORT: { - intrinsic = NI_AVX512F_ConvertToVector512Int32; + intrinsic = NI_AVX512_ConvertToVector512Int32; break; } case TYP_USHORT: { - intrinsic = NI_AVX512F_ConvertToVector512UInt32; + intrinsic = NI_AVX512_ConvertToVector512UInt32; break; } case TYP_INT: { - intrinsic = NI_AVX512F_ConvertToVector512Int64; + intrinsic = NI_AVX512_ConvertToVector512Int64; break; } case TYP_UINT: { - intrinsic = NI_AVX512F_ConvertToVector512UInt64; + intrinsic = NI_AVX512_ConvertToVector512UInt64; break; } case TYP_FLOAT: { - intrinsic = NI_AVX512F_ConvertToVector512Double; + intrinsic = NI_AVX512_ConvertToVector512Double; break; } @@ -27645,7 +27486,7 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo #if defined(TARGET_XARCH) if (simdSize == 64) { - assert(IsBaselineVector512IsaSupportedDebugOnly()); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); tmp1 = gtNewSimdGetUpperNode(TYP_SIMD32, op1, simdBaseJitType, simdSize); @@ -27653,43 +27494,43 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo { case TYP_BYTE: { - intrinsic = NI_AVX512BW_ConvertToVector512Int16; + intrinsic = NI_AVX512_ConvertToVector512Int16; break; } case TYP_UBYTE: { - intrinsic = NI_AVX512BW_ConvertToVector512UInt16; + intrinsic = NI_AVX512_ConvertToVector512UInt16; break; } case TYP_SHORT: { - intrinsic = NI_AVX512F_ConvertToVector512Int32; + intrinsic = NI_AVX512_ConvertToVector512Int32; break; } case TYP_USHORT: { - intrinsic = NI_AVX512F_ConvertToVector512UInt32; + intrinsic = NI_AVX512_ConvertToVector512UInt32; break; } case TYP_INT: { - intrinsic = NI_AVX512F_ConvertToVector512Int64; + intrinsic = NI_AVX512_ConvertToVector512Int64; break; } case TYP_UINT: { - intrinsic = NI_AVX512F_ConvertToVector512UInt64; + intrinsic = NI_AVX512_ConvertToVector512UInt64; break; } case TYP_FLOAT: { - intrinsic = NI_AVX512F_ConvertToVector512Double; + intrinsic = NI_AVX512_ConvertToVector512Double; break; } @@ -28537,7 +28378,7 @@ bool GenTreeHWIntrinsic::OperIsBroadcastScalar() const case NI_AVX_BroadcastScalarToVector256: case NI_SSE3_LoadAndDuplicateToVector128: case NI_SSE3_MoveAndDuplicate: - case NI_AVX512F_BroadcastScalarToVector512: + case NI_AVX512_BroadcastScalarToVector512: return true; default: return false; @@ -28595,86 +28436,64 @@ bool GenTreeHWIntrinsic::OperIsEmbRoundingEnabled() const switch (intrinsicId) { // these intrinsics only have the embedded rounding enabled implementation. - case NI_AVX512F_AddScalar: - case NI_AVX512F_DivideScalar: - case NI_AVX512F_MultiplyScalar: - case NI_AVX512F_SubtractScalar: - case NI_AVX512F_SqrtScalar: - case NI_AVX10v1_AddScalar: - case NI_AVX10v1_DivideScalar: - case NI_AVX10v1_MultiplyScalar: - case NI_AVX10v1_SubtractScalar: - case NI_AVX10v1_SqrtScalar: + case NI_AVX512_AddScalar: + case NI_AVX512_DivideScalar: + case NI_AVX512_MultiplyScalar: + case NI_AVX512_SubtractScalar: + case NI_AVX512_SqrtScalar: { return true; } - case NI_AVX512F_FusedMultiplyAdd: - case NI_AVX512F_FusedMultiplyAddScalar: - case NI_AVX512F_FusedMultiplyAddNegated: - case NI_AVX512F_FusedMultiplyAddNegatedScalar: - case NI_AVX512F_FusedMultiplyAddSubtract: - case NI_AVX512F_FusedMultiplySubtract: - case NI_AVX512F_FusedMultiplySubtractAdd: - case NI_AVX512F_FusedMultiplySubtractNegated: - case NI_AVX512F_FusedMultiplySubtractNegatedScalar: - case NI_AVX512F_FusedMultiplySubtractScalar: - case NI_AVX10v1_FusedMultiplyAddScalar: - case NI_AVX10v1_FusedMultiplyAddNegatedScalar: - case NI_AVX10v1_FusedMultiplySubtractScalar: - case NI_AVX10v1_FusedMultiplySubtractNegatedScalar: + case NI_AVX512_FusedMultiplyAdd: + case NI_AVX512_FusedMultiplyAddScalar: + case NI_AVX512_FusedMultiplyAddNegated: + case NI_AVX512_FusedMultiplyAddNegatedScalar: + case NI_AVX512_FusedMultiplyAddSubtract: + case NI_AVX512_FusedMultiplySubtract: + case NI_AVX512_FusedMultiplySubtractAdd: + case NI_AVX512_FusedMultiplySubtractNegated: + case NI_AVX512_FusedMultiplySubtractNegatedScalar: + case NI_AVX512_FusedMultiplySubtractScalar: { return numArgs == 4; } - case NI_AVX512F_Add: - case NI_AVX512F_Divide: - case NI_AVX512F_Multiply: - case NI_AVX512F_Subtract: + case NI_AVX512_Add: + case NI_AVX512_Divide: + case NI_AVX512_Multiply: + case NI_AVX512_Subtract: - case NI_AVX512F_Scale: - case NI_AVX512F_ScaleScalar: - case NI_AVX10v1_ScaleScalar: + case NI_AVX512_Scale: + case NI_AVX512_ScaleScalar: - case NI_AVX512F_ConvertScalarToVector128Single: + case NI_AVX512_ConvertScalarToVector128Single: #if defined(TARGET_AMD64) - case NI_AVX512F_X64_ConvertScalarToVector128Double: - case NI_AVX512F_X64_ConvertScalarToVector128Single: - case NI_AVX10v1_X64_ConvertScalarToVector128Double: - case NI_AVX10v1_X64_ConvertScalarToVector128Single: + case NI_AVX512_X64_ConvertScalarToVector128Double: + case NI_AVX512_X64_ConvertScalarToVector128Single: #endif // TARGET_AMD64 - case NI_AVX10v1_ConvertScalarToVector128Single: { return numArgs == 3; } - case NI_AVX512F_Sqrt: - case NI_AVX512F_ConvertToInt32: - case NI_AVX512F_ConvertToUInt32: - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256Single: - case NI_AVX512F_ConvertToVector256UInt32: - case NI_AVX512F_ConvertToVector512Single: - case NI_AVX512F_ConvertToVector512UInt32: - case NI_AVX512F_ConvertToVector512Int32: + case NI_AVX512_ConvertToInt32: + case NI_AVX512_ConvertToUInt32: + case NI_AVX512_ConvertToVector256Int32: + case NI_AVX512_ConvertToVector256Single: + case NI_AVX512_ConvertToVector256UInt32: + case NI_AVX512_ConvertToVector512Double: + case NI_AVX512_ConvertToVector512Int32: + case NI_AVX512_ConvertToVector512Int64: + case NI_AVX512_ConvertToVector512Single: + case NI_AVX512_ConvertToVector512UInt32: + case NI_AVX512_ConvertToVector512UInt64: + case NI_AVX512_Sqrt: #if defined(TARGET_AMD64) - case NI_AVX512F_X64_ConvertToInt64: - case NI_AVX512F_X64_ConvertToUInt64: - case NI_AVX10v1_X64_ConvertToInt64: - case NI_AVX10v1_X64_ConvertToUInt64: + case NI_AVX512_X64_ConvertToInt64: + case NI_AVX512_X64_ConvertToUInt64: #endif // TARGET_AMD64 - case NI_AVX512DQ_ConvertToVector256Single: - case NI_AVX512DQ_ConvertToVector512Double: - case NI_AVX512DQ_ConvertToVector512Int64: - case NI_AVX512DQ_ConvertToVector512UInt64: - case NI_AVX10v1_ConvertToInt32: - case NI_AVX10v1_ConvertToUInt32: - case NI_AVX10v1_V512_ConvertToVector256Single: - case NI_AVX10v1_V512_ConvertToVector512Double: - case NI_AVX10v1_V512_ConvertToVector512Int64: - case NI_AVX10v1_V512_ConvertToVector512UInt64: - case NI_AVX10v2_V512_ConvertToSByteWithSaturationAndZeroExtendToInt32: - case NI_AVX10v2_V512_ConvertToByteWithSaturationAndZeroExtendToInt32: + case NI_AVX10v2_ConvertToSByteWithSaturationAndZeroExtendToInt32: + case NI_AVX10v2_ConvertToByteWithSaturationAndZeroExtendToInt32: { return numArgs == 2; } @@ -28992,10 +28811,8 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE2_And: case NI_AVX_And: case NI_AVX2_And: - case NI_AVX512F_And: - case NI_AVX512DQ_And: - case NI_AVX10v1_V512_And: - case NI_EVEX_AndMask: + case NI_AVX512_And: + case NI_AVX512_AndMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_And: #endif @@ -29004,7 +28821,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_EVEX_NotMask: + case NI_AVX512_NotMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_Not: #endif @@ -29017,10 +28834,8 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE2_Xor: case NI_AVX_Xor: case NI_AVX2_Xor: - case NI_AVX512F_Xor: - case NI_AVX512DQ_Xor: - case NI_AVX10v1_V512_Xor: - case NI_EVEX_XorMask: + case NI_AVX512_Xor: + case NI_AVX512_XorMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_Xor: #endif @@ -29033,10 +28848,8 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE2_Or: case NI_AVX_Or: case NI_AVX2_Or: - case NI_AVX512F_Or: - case NI_AVX512DQ_Or: - case NI_AVX10v1_V512_Or: - case NI_EVEX_OrMask: + case NI_AVX512_Or: + case NI_AVX512_OrMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_Or: #endif @@ -29049,10 +28862,8 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE2_AndNot: case NI_AVX_AndNot: case NI_AVX2_AndNot: - case NI_AVX512F_AndNot: - case NI_AVX512DQ_AndNot: - case NI_AVX10v1_V512_AndNot: - case NI_EVEX_AndNotMask: + case NI_AVX512_AndNot: + case NI_AVX512_AndNotMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_BitwiseClear: #endif @@ -29065,8 +28876,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE2_Add: case NI_AVX_Add: case NI_AVX2_Add: - case NI_AVX512F_Add: - case NI_AVX512BW_Add: + case NI_AVX512_Add: #elif defined(TARGET_ARM64) case NI_AdvSimd_Add: case NI_AdvSimd_Arm64_Add: @@ -29078,8 +28888,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #if defined(TARGET_XARCH) case NI_SSE_AddScalar: case NI_SSE2_AddScalar: - case NI_AVX512F_AddScalar: - case NI_AVX10v1_AddScalar: + case NI_AVX512_AddScalar: { *isScalar = true; return GT_ADD; @@ -29101,7 +28910,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE_Divide: case NI_SSE2_Divide: case NI_AVX_Divide: - case NI_AVX512F_Divide: + case NI_AVX512_Divide: #elif defined(TARGET_ARM64) case NI_AdvSimd_Arm64_Divide: #endif @@ -29112,8 +28921,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #if defined(TARGET_XARCH) case NI_SSE_DivideScalar: case NI_SSE2_DivideScalar: - case NI_AVX512F_DivideScalar: - case NI_AVX10v1_DivideScalar: + case NI_AVX512_DivideScalar: { *isScalar = true; return GT_DIV; @@ -29137,12 +28945,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE41_MultiplyLow: case NI_AVX_Multiply: case NI_AVX2_MultiplyLow: - case NI_AVX512F_MultiplyLow: - case NI_AVX512BW_MultiplyLow: - case NI_AVX512DQ_MultiplyLow: - case NI_AVX512DQ_VL_MultiplyLow: - case NI_AVX10v1_MultiplyLow: - case NI_AVX10v1_V512_MultiplyLow: + case NI_AVX512_MultiplyLow: #elif defined(TARGET_ARM64) case NI_AdvSimd_Multiply: case NI_AdvSimd_Arm64_Multiply: @@ -29153,7 +28956,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #if defined(TARGET_XARCH) case NI_SSE2_Multiply: - case NI_AVX512F_Multiply: + case NI_AVX512_Multiply: { if (varTypeIsFloating(simdBaseType)) { @@ -29166,8 +28969,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #if defined(TARGET_XARCH) case NI_SSE_MultiplyScalar: case NI_SSE2_MultiplyScalar: - case NI_AVX512F_MultiplyScalar: - case NI_AVX10v1_MultiplyScalar: + case NI_AVX512_MultiplyScalar: { *isScalar = true; return GT_MUL; @@ -29204,22 +29006,14 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_AVX512F_RotateLeft: - case NI_AVX512F_RotateLeftVariable: - case NI_AVX512F_VL_RotateLeft: - case NI_AVX512F_VL_RotateLeftVariable: - case NI_AVX10v1_RotateLeft: - case NI_AVX10v1_RotateLeftVariable: + case NI_AVX512_RotateLeft: + case NI_AVX512_RotateLeftVariable: { return GT_ROL; } - case NI_AVX512F_RotateRight: - case NI_AVX512F_RotateRightVariable: - case NI_AVX512F_VL_RotateRight: - case NI_AVX512F_VL_RotateRightVariable: - case NI_AVX10v1_RotateRight: - case NI_AVX10v1_RotateRightVariable: + case NI_AVX512_RotateRight: + case NI_AVX512_RotateRightVariable: { return GT_ROR; } @@ -29229,12 +29023,8 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE2_ShiftLeftLogical: case NI_AVX2_ShiftLeftLogical: case NI_AVX2_ShiftLeftLogicalVariable: - case NI_AVX512F_ShiftLeftLogical: - case NI_AVX512F_ShiftLeftLogicalVariable: - case NI_AVX512BW_ShiftLeftLogical: - case NI_AVX512BW_ShiftLeftLogicalVariable: - case NI_AVX512BW_VL_ShiftLeftLogicalVariable: - case NI_AVX10v1_ShiftLeftLogicalVariable: + case NI_AVX512_ShiftLeftLogical: + case NI_AVX512_ShiftLeftLogicalVariable: #elif defined(TARGET_ARM64) case NI_AdvSimd_ShiftLeftLogical: #endif @@ -29257,15 +29047,8 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE2_ShiftRightArithmetic: case NI_AVX2_ShiftRightArithmetic: case NI_AVX2_ShiftRightArithmeticVariable: - case NI_AVX512F_ShiftRightArithmetic: - case NI_AVX512F_ShiftRightArithmeticVariable: - case NI_AVX512F_VL_ShiftRightArithmetic: - case NI_AVX512F_VL_ShiftRightArithmeticVariable: - case NI_AVX512BW_ShiftRightArithmetic: - case NI_AVX512BW_ShiftRightArithmeticVariable: - case NI_AVX512BW_VL_ShiftRightArithmeticVariable: - case NI_AVX10v1_ShiftRightArithmetic: - case NI_AVX10v1_ShiftRightArithmeticVariable: + case NI_AVX512_ShiftRightArithmetic: + case NI_AVX512_ShiftRightArithmeticVariable: #elif defined(TARGET_ARM64) case NI_AdvSimd_ShiftRightArithmetic: #endif @@ -29288,12 +29071,8 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE2_ShiftRightLogical: case NI_AVX2_ShiftRightLogical: case NI_AVX2_ShiftRightLogicalVariable: - case NI_AVX512F_ShiftRightLogical: - case NI_AVX512F_ShiftRightLogicalVariable: - case NI_AVX512BW_ShiftRightLogical: - case NI_AVX512BW_ShiftRightLogicalVariable: - case NI_AVX512BW_VL_ShiftRightLogicalVariable: - case NI_AVX10v1_ShiftRightLogicalVariable: + case NI_AVX512_ShiftRightLogical: + case NI_AVX512_ShiftRightLogicalVariable: #elif defined(TARGET_ARM64) case NI_AdvSimd_ShiftRightLogical: #endif @@ -29317,8 +29096,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE2_Subtract: case NI_AVX_Subtract: case NI_AVX2_Subtract: - case NI_AVX512F_Subtract: - case NI_AVX512BW_Subtract: + case NI_AVX512_Subtract: #elif defined(TARGET_ARM64) case NI_AdvSimd_Subtract: case NI_AdvSimd_Arm64_Subtract: @@ -29330,8 +29108,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #if defined(TARGET_XARCH) case NI_SSE_SubtractScalar: case NI_SSE2_SubtractScalar: - case NI_AVX512F_SubtractScalar: - case NI_AVX10v1_SubtractScalar: + case NI_AVX512_SubtractScalar: { *isScalar = true; return GT_SUB; @@ -29355,7 +29132,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE41_CompareEqual: case NI_AVX_CompareEqual: case NI_AVX2_CompareEqual: - case NI_EVEX_CompareEqualMask: + case NI_AVX512_CompareEqualMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_CompareEqual: case NI_AdvSimd_Arm64_CompareEqual: @@ -29390,7 +29167,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE42_CompareGreaterThan: case NI_AVX_CompareGreaterThan: case NI_AVX2_CompareGreaterThan: - case NI_EVEX_CompareGreaterThanMask: + case NI_AVX512_CompareGreaterThanMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_CompareGreaterThan: case NI_AdvSimd_Arm64_CompareGreaterThan: @@ -29423,7 +29200,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE_CompareGreaterThanOrEqual: case NI_SSE2_CompareGreaterThanOrEqual: case NI_AVX_CompareGreaterThanOrEqual: - case NI_EVEX_CompareGreaterThanOrEqualMask: + case NI_AVX512_CompareGreaterThanOrEqualMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_CompareGreaterThanOrEqual: case NI_AdvSimd_Arm64_CompareGreaterThanOrEqual: @@ -29458,7 +29235,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE42_CompareLessThan: case NI_AVX_CompareLessThan: case NI_AVX2_CompareLessThan: - case NI_EVEX_CompareLessThanMask: + case NI_AVX512_CompareLessThanMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_CompareLessThan: case NI_AdvSimd_Arm64_CompareLessThan: @@ -29491,7 +29268,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE_CompareLessThanOrEqual: case NI_SSE2_CompareLessThanOrEqual: case NI_AVX_CompareLessThanOrEqual: - case NI_EVEX_CompareLessThanOrEqualMask: + case NI_AVX512_CompareLessThanOrEqualMask: #elif defined(TARGET_ARM64) case NI_AdvSimd_CompareLessThanOrEqual: case NI_AdvSimd_Arm64_CompareLessThanOrEqual: @@ -29524,7 +29301,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty case NI_SSE_CompareNotEqual: case NI_SSE2_CompareNotEqual: case NI_AVX_CompareNotEqual: - case NI_EVEX_CompareNotEqualMask: + case NI_AVX512_CompareNotEqualMask: { return GT_NE; } @@ -29566,17 +29343,19 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForUnOp( assert(varTypeIsArithmetic(simdBaseType)); assert(varTypeIsSIMD(simdType)); +#if defined(TARGET_XARCH) if (simdSize == 64) { assert(!isScalar); - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); } else if (simdSize == 32) { assert(!isScalar); - assert(comp->IsBaselineVector256IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); } else +#endif // TARGET_XARCH { #if defined(TARGET_ARM64) assert(!isScalar || (simdSize == 8)); @@ -29669,17 +29448,19 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, assert(op1->TypeIs(simdType)); assert(op2 != nullptr); +#if defined(TARGET_XARCH) if (simdSize == 64) { assert(!isScalar); - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); } else if (simdSize == 32) { assert(!isScalar); - assert(comp->IsBaselineVector256IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); } else +#endif // TARGET_XARCH { #if defined(TARGET_ARM64) assert(!isScalar || (simdSize == 8)); @@ -29702,11 +29483,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsSmall(simdBaseType)) { - id = NI_AVX512BW_Add; + id = NI_AVX512_Add; } else { - id = NI_AVX512F_Add; + id = NI_AVX512_Add; } } else if (simdSize == 32) @@ -29757,11 +29538,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsFloating(simdBaseType)) { - id = NI_AVX512DQ_And; + id = NI_AVX512_And; } else { - id = NI_AVX512F_And; + id = NI_AVX512_And; } } else if (simdSize == 32) @@ -29809,11 +29590,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsFloating(simdBaseType)) { - id = NI_AVX512DQ_AndNot; + id = NI_AVX512_AndNot; } else { - id = NI_AVX512F_AndNot; + id = NI_AVX512_AndNot; } } else if (simdSize == 32) @@ -29858,7 +29639,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (simdSize == 64) { - id = NI_AVX512F_Divide; + id = NI_AVX512_Divide; } else if (simdSize == 32) { @@ -29898,11 +29679,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsShort(simdBaseType)) { - id = varTypeIsInt(op2) ? NI_AVX512BW_ShiftLeftLogical : NI_AVX512BW_ShiftLeftLogicalVariable; + id = varTypeIsInt(op2) ? NI_AVX512_ShiftLeftLogical : NI_AVX512_ShiftLeftLogicalVariable; } else if (!varTypeIsByte(simdBaseType)) { - id = varTypeIsInt(op2) ? NI_AVX512F_ShiftLeftLogical : NI_AVX512F_ShiftLeftLogicalVariable; + id = varTypeIsInt(op2) ? NI_AVX512_ShiftLeftLogical : NI_AVX512_ShiftLeftLogicalVariable; } } else if (varTypeIsShort(simdBaseType)) @@ -29920,14 +29701,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_SSE2_ShiftLeftLogical; } } - else + else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL)) - { - id = isV512Supported ? NI_AVX512BW_VL_ShiftLeftLogicalVariable - : NI_AVX10v1_ShiftLeftLogicalVariable; - } + id = NI_AVX512_ShiftLeftLogicalVariable; } } else if (!varTypeIsByte(simdBaseType)) @@ -29969,27 +29745,26 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsFloating(simdBaseType)) { - id = NI_AVX512F_Multiply; + id = NI_AVX512_Multiply; } else if (varTypeIsLong(simdBaseType)) { - id = NI_AVX512DQ_MultiplyLow; + id = NI_AVX512_MultiplyLow; } else if (varTypeIsInt(simdBaseType)) { - id = NI_AVX512F_MultiplyLow; + id = NI_AVX512_MultiplyLow; } else if (varTypeIsShort(simdBaseType)) { - id = NI_AVX512BW_MultiplyLow; + id = NI_AVX512_MultiplyLow; } } else if (varTypeIsLong(simdBaseType)) { - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512DQ_VL)) + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - id = isV512Supported ? NI_AVX512DQ_VL_MultiplyLow : NI_AVX10v1_MultiplyLow; + id = NI_AVX512_MultiplyLow; } } else if (simdSize == 32) @@ -30052,11 +29827,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsFloating(simdBaseType)) { - id = NI_AVX512DQ_Or; + id = NI_AVX512_Or; } else { - id = NI_AVX512F_Or; + id = NI_AVX512_Or; } } else if (simdSize == 32) @@ -30098,22 +29873,14 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (!varTypeIsSmall(simdBaseType)) { - id = varTypeIsInt(op2) ? NI_AVX512F_RotateLeft : NI_AVX512F_RotateLeftVariable; + id = varTypeIsInt(op2) ? NI_AVX512_RotateLeft : NI_AVX512_RotateLeftVariable; } } else if (!varTypeIsSmall(simdBaseType)) { - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512DQ_VL)) + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - if (isV512Supported) - { - id = varTypeIsInt(op2) ? NI_AVX512F_VL_RotateLeft : NI_AVX512F_VL_RotateLeftVariable; - } - else - { - id = varTypeIsInt(op2) ? NI_AVX10v1_RotateLeft : NI_AVX10v1_RotateLeftVariable; - } + id = varTypeIsInt(op2) ? NI_AVX512_RotateLeft : NI_AVX512_RotateLeftVariable; } } #endif // TARGET_XARCH @@ -30131,22 +29898,14 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (!varTypeIsSmall(simdBaseType)) { - id = varTypeIsInt(op2) ? NI_AVX512F_RotateRight : NI_AVX512F_RotateRightVariable; + id = varTypeIsInt(op2) ? NI_AVX512_RotateRight : NI_AVX512_RotateRightVariable; } } else if (!varTypeIsSmall(simdBaseType)) { - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512DQ_VL)) + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - if (isV512Supported) - { - id = varTypeIsInt(op2) ? NI_AVX512F_VL_RotateRight : NI_AVX512F_VL_RotateRightVariable; - } - else - { - id = varTypeIsInt(op2) ? NI_AVX10v1_RotateRight : NI_AVX10v1_RotateRightVariable; - } + id = varTypeIsInt(op2) ? NI_AVX512_RotateRight : NI_AVX512_RotateRightVariable; } } #endif // TARGET_XARCH @@ -30164,29 +29923,18 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsShort(simdBaseType)) { - id = - varTypeIsInt(op2) ? NI_AVX512BW_ShiftRightArithmetic : NI_AVX512BW_ShiftRightArithmeticVariable; + id = varTypeIsInt(op2) ? NI_AVX512_ShiftRightArithmetic : NI_AVX512_ShiftRightArithmeticVariable; } else if (!varTypeIsByte(simdBaseType)) { - id = varTypeIsInt(op2) ? NI_AVX512F_ShiftRightArithmetic : NI_AVX512F_ShiftRightArithmeticVariable; + id = varTypeIsInt(op2) ? NI_AVX512_ShiftRightArithmetic : NI_AVX512_ShiftRightArithmeticVariable; } } else if (genTypeSize(simdBaseType) == 8) { - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512F_VL)) + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - if (isV512Supported) - { - id = varTypeIsInt(op2) ? NI_AVX512F_VL_ShiftRightArithmetic - : NI_AVX512F_VL_ShiftRightArithmeticVariable; - } - else - { - id = varTypeIsInt(op2) ? NI_AVX10v1_ShiftRightArithmetic - : NI_AVX10v1_ShiftRightArithmeticVariable; - } + id = varTypeIsInt(op2) ? NI_AVX512_ShiftRightArithmetic : NI_AVX512_ShiftRightArithmeticVariable; } } else if (varTypeIsShort(simdBaseType)) @@ -30204,14 +29952,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_SSE2_ShiftRightArithmetic; } } - else + else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL)) - { - id = isV512Supported ? NI_AVX512BW_VL_ShiftRightArithmeticVariable - : NI_AVX10v1_ShiftRightArithmeticVariable; - } + id = NI_AVX512_ShiftRightArithmeticVariable; } } else if (!varTypeIsByte(simdBaseType)) @@ -30255,11 +29998,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsShort(simdBaseType)) { - id = varTypeIsInt(op2) ? NI_AVX512BW_ShiftRightLogical : NI_AVX512BW_ShiftRightLogicalVariable; + id = varTypeIsInt(op2) ? NI_AVX512_ShiftRightLogical : NI_AVX512_ShiftRightLogicalVariable; } else if (!varTypeIsByte(simdBaseType)) { - id = varTypeIsInt(op2) ? NI_AVX512F_ShiftRightLogical : NI_AVX512F_ShiftRightLogicalVariable; + id = varTypeIsInt(op2) ? NI_AVX512_ShiftRightLogical : NI_AVX512_ShiftRightLogicalVariable; } } else if (varTypeIsShort(simdBaseType)) @@ -30277,14 +30020,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_SSE2_ShiftRightLogical; } } - else + else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL)) - { - id = isV512Supported ? NI_AVX512BW_VL_ShiftRightLogicalVariable - : NI_AVX10v1_ShiftRightLogicalVariable; - } + id = NI_AVX512_ShiftRightLogicalVariable; } } else if (!varTypeIsByte(simdBaseType)) @@ -30326,11 +30064,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsSmall(simdBaseType)) { - id = NI_AVX512BW_Subtract; + id = NI_AVX512_Subtract; } else { - id = NI_AVX512F_Subtract; + id = NI_AVX512_Subtract; } } else if (simdSize == 32) @@ -30381,11 +30119,11 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { if (varTypeIsFloating(simdBaseType)) { - id = NI_AVX512DQ_Xor; + id = NI_AVX512_Xor; } else { - id = NI_AVX512F_Xor; + id = NI_AVX512_Xor; } } else if (simdSize == 32) @@ -30459,6 +30197,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, assert(op1->TypeIs(simdType)); assert(op2 != nullptr); +#if defined(TARGET_XARCH) if (varTypeIsMask(type)) { assert(!isScalar); @@ -30467,9 +30206,10 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, else if (simdSize == 32) { assert(!isScalar); - assert(comp->IsBaselineVector256IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); } else +#endif // TARGET_XARCH { assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16)); @@ -30492,7 +30232,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #if defined(TARGET_XARCH) if (varTypeIsMask(type)) { - id = NI_EVEX_CompareEqualMask; + id = NI_AVX512_CompareEqualMask; } else if (simdSize == 32) { @@ -30542,7 +30282,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #if defined(TARGET_XARCH) if (varTypeIsMask(type)) { - id = NI_EVEX_CompareGreaterThanOrEqualMask; + id = NI_AVX512_CompareGreaterThanOrEqualMask; } else if (varTypeIsIntegral(simdBaseType)) { @@ -30583,7 +30323,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #if defined(TARGET_XARCH) if (varTypeIsMask(type)) { - id = NI_EVEX_CompareGreaterThanMask; + id = NI_AVX512_CompareGreaterThanMask; } else if (varTypeIsIntegral(simdBaseType)) { @@ -30646,7 +30386,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #if defined(TARGET_XARCH) if (varTypeIsMask(type)) { - id = NI_EVEX_CompareLessThanOrEqualMask; + id = NI_AVX512_CompareLessThanOrEqualMask; } else if (varTypeIsIntegral(simdBaseType)) { @@ -30687,7 +30427,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #if defined(TARGET_XARCH) if (varTypeIsMask(type)) { - id = NI_EVEX_CompareLessThanMask; + id = NI_AVX512_CompareLessThanMask; } else if (varTypeIsIntegral(simdBaseType)) { @@ -30750,7 +30490,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, #if defined(TARGET_XARCH) if (varTypeIsMask(type)) { - id = NI_EVEX_CompareNotEqualMask; + id = NI_AVX512_CompareNotEqualMask; } else if (varTypeIsIntegral(simdBaseType)) { @@ -30927,8 +30667,8 @@ bool GenTreeHWIntrinsic::ShouldConstantProp(GenTree* operand, GenTreeVecCon* vec return (simdBaseType == TYP_FLOAT) && vecCon->IsZero(); } - case NI_EVEX_CompareEqualMask: - case NI_EVEX_CompareNotEqualMask: + case NI_AVX512_CompareEqualMask: + case NI_AVX512_CompareNotEqualMask: { // We can optimize when the constant is zero, but only // for non floating-point since +0.0 == -0.0 @@ -30964,9 +30704,7 @@ bool GenTreeHWIntrinsic::ShouldConstantProp(GenTree* operand, GenTreeVecCon* vec case NI_SSE2_Xor: case NI_AVX_Xor: case NI_AVX2_Xor: - case NI_AVX512F_Xor: - case NI_AVX512DQ_Xor: - case NI_AVX10v1_V512_Xor: + case NI_AVX512_Xor: { // We recognize this as GT_NOT which can enable other optimizations assert(GetOperandCount() == 2); @@ -31748,7 +31486,7 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForRmwIntrinsic(GenTree* use, GenTree { #if defined(TARGET_XARCH) assert(HWIntrinsicInfo::IsFmaIntrinsic(gtHWIntrinsicId) || HWIntrinsicInfo::IsPermuteVar2x(gtHWIntrinsicId) || - HWIntrinsicInfo::IsTernaryLogic(gtHWIntrinsicId)); + (gtHWIntrinsicId == NI_AVX512_TernaryLogic)); #elif defined(TARGET_ARM64) assert(HWIntrinsicInfo::IsFmaIntrinsic(gtHWIntrinsicId)); #endif diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index c6efe050d7e743..bdab2cbfb6cda5 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6464,7 +6464,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool OperIsConvertMaskToVector() const { #if defined(TARGET_XARCH) - return OperIsHWIntrinsic(NI_EVEX_ConvertMaskToVector); + return OperIsHWIntrinsic(NI_AVX512_ConvertMaskToVector); #elif defined(TARGET_ARM64) return OperIsHWIntrinsic(NI_Sve_ConvertMaskToVector); #else @@ -6475,7 +6475,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool OperIsConvertVectorToMask() const { #if defined(TARGET_XARCH) - return OperIsHWIntrinsic(NI_EVEX_ConvertVectorToMask); + return OperIsHWIntrinsic(NI_AVX512_ConvertVectorToMask); #elif defined(TARGET_ARM64) return OperIsHWIntrinsic(NI_Sve_ConvertVectorToMask); #else diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 3c1a51f0724244..00d15b6749eaaa 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -926,25 +926,14 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { FIRST_NI_AVXVNNI, LAST_NI_AVXVNNI }, { NI_Illegal, NI_Illegal }, // MOVBE { FIRST_NI_X86Serialize, LAST_NI_X86Serialize }, - { NI_Illegal, NI_Illegal }, // EVEX - { FIRST_NI_AVX512F, LAST_NI_AVX512F }, - { FIRST_NI_AVX512F_VL, LAST_NI_AVX512F_VL }, - { FIRST_NI_AVX512BW, LAST_NI_AVX512BW }, - { FIRST_NI_AVX512BW_VL, LAST_NI_AVX512BW_VL }, - { FIRST_NI_AVX512CD, LAST_NI_AVX512CD }, - { FIRST_NI_AVX512CD_VL, LAST_NI_AVX512CD_VL }, - { FIRST_NI_AVX512DQ, LAST_NI_AVX512DQ }, - { FIRST_NI_AVX512DQ_VL, LAST_NI_AVX512DQ_VL }, + { FIRST_NI_AVX512, LAST_NI_AVX512 }, { FIRST_NI_AVX512VBMI, LAST_NI_AVX512VBMI }, - { FIRST_NI_AVX512VBMI_VL, LAST_NI_AVX512VBMI_VL }, - { FIRST_NI_AVX10v1, LAST_NI_AVX10v1 }, - { FIRST_NI_AVX10v1_V512, LAST_NI_AVX10v1_V512 }, + { NI_Illegal, NI_Illegal }, // AVX10v1 { NI_Illegal, NI_Illegal }, // VectorT128 { NI_Illegal, NI_Illegal }, // VectorT256 { NI_Illegal, NI_Illegal }, // VectorT512 { NI_Illegal, NI_Illegal }, // APX { FIRST_NI_AVX10v2, LAST_NI_AVX10v2 }, // AVX10v2 - { FIRST_NI_AVX10v2_V512, LAST_NI_AVX10v2_V512 }, // AVX10v2_V512 { FIRST_NI_GFNI, LAST_NI_GFNI }, { FIRST_NI_GFNI_V256, LAST_NI_GFNI_V256 }, { FIRST_NI_GFNI_V512, LAST_NI_GFNI_V512 }, @@ -966,15 +955,10 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { FIRST_NI_POPCNT_X64, LAST_NI_POPCNT_X64 }, { NI_Illegal, NI_Illegal }, // AVXVNNI_X64 { NI_Illegal, NI_Illegal }, // X86Serialize_X64 - { FIRST_NI_AVX512F_X64, LAST_NI_AVX512F_X64 }, - { NI_Illegal, NI_Illegal }, // AVX512BW_X64 - { NI_Illegal, NI_Illegal }, // AVX512CD_X64 - { NI_Illegal, NI_Illegal }, // AVX512DQ_X64 + { FIRST_NI_AVX512_X64, LAST_NI_AVX512_X64 }, { NI_Illegal, NI_Illegal }, // AVX512VBMI_X64 - { FIRST_NI_AVX10v1_X64, LAST_NI_AVX10v1_X64 }, - { NI_Illegal, NI_Illegal }, // AVX10v1_V512_X64 + { NI_Illegal, NI_Illegal }, // AVX10v1_X64 { NI_Illegal, NI_Illegal }, // AVX10v2_X64 - { NI_Illegal, NI_Illegal }, // AVX10v2_V512_X64 { NI_Illegal, NI_Illegal }, // GFNI_X64 #elif defined (TARGET_ARM64) { FIRST_NI_ArmBase, LAST_NI_ArmBase }, @@ -1104,6 +1088,75 @@ static void ValidateHWIntrinsicIsaRangeArray() } #endif +//------------------------------------------------------------------------ +// binarySearchId: Does a binary search through a given ISA for the NamedIntrinsic matching a given name +// +// Arguments: +// isa -- The instruction set to search +// sig -- The signature of the intrinsic +// methodName -- The name of the method associated with the HWIntrinsic to lookup +// isLimitedVector256Isa -- true if Vector256 has limited acceleration support +// +// Return Value: +// The NamedIntrinsic associated with methodName and isa +static NamedIntrinsic binarySearchId(CORINFO_InstructionSet isa, + CORINFO_SIG_INFO* sig, + const char* methodName, + bool isLimitedVector256Isa) +{ + size_t isaIndex = static_cast(isa) - 1; + assert(isaIndex < ARRAY_SIZE(hwintrinsicIsaRangeArray)); + + const HWIntrinsicIsaRange& isaRange = hwintrinsicIsaRangeArray[isaIndex]; + + if (isaRange.FirstId == NI_Illegal) + { + return NI_Illegal; + } + + size_t rangeLower = isaRange.FirstId; + size_t rangeUpper = isaRange.LastId; + + while (rangeLower <= rangeUpper) + { + // This is safe since rangeLower and rangeUpper will never be negative + size_t rangeIndex = (rangeUpper + rangeLower) / 2; + + NamedIntrinsic ni = static_cast(rangeIndex); + const HWIntrinsicInfo& intrinsicInfo = HWIntrinsicInfo::lookup(ni); + + int sortOrder = strcmp(methodName, intrinsicInfo.name); + + if (sortOrder < 0) + { + rangeUpper = rangeIndex - 1; + } + else if (sortOrder > 0) + { + rangeLower = rangeIndex + 1; + } + else + { + assert(sortOrder == 0); + assert((intrinsicInfo.numArgs == -1) || (sig->numArgs == static_cast(intrinsicInfo.numArgs))); + +#if defined(TARGET_XARCH) + // on AVX1-only CPUs we only support a subset of intrinsics in Vector256 + if (isLimitedVector256Isa && !HWIntrinsicInfo::AvxOnlyCompatible(ni)) + { + return NI_Illegal; + } +#endif // TARGET_XARCH + + return ni; + } + } + + // There are several helper intrinsics that are implemented in managed code + // Those intrinsics will hit this code path and need to return NI_Illegal + return NI_Illegal; +} + //------------------------------------------------------------------------ // lookupId: Gets the NamedIntrinsic for a given method name and InstructionSet // @@ -1183,7 +1236,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, } else if (isa == InstructionSet_Vector512) { - isa = InstructionSet_AVX512F; + isa = InstructionSet_AVX512; vectorByteLength = 64; } else @@ -1273,7 +1326,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, } else if (isa == InstructionSet_Vector512) { - if (!comp->IsBaselineVector512IsaSupportedOpportunistically()) + if (!comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { return NI_Illegal; } @@ -1288,57 +1341,29 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, } #endif - size_t isaIndex = static_cast(isa) - 1; - assert(isaIndex < ARRAY_SIZE(hwintrinsicIsaRangeArray)); - - const HWIntrinsicIsaRange& isaRange = hwintrinsicIsaRangeArray[isaIndex]; - - if (isaRange.FirstId == NI_Illegal) - { - return NI_Illegal; - } - - size_t rangeLower = isaRange.FirstId; - size_t rangeUpper = isaRange.LastId; +#if defined(TARGET_XARCH) + // AVX10v1 is a strict superset of all AVX512 ISAs + // + // The original design was that it exposed the AVX512VL instructions without requiring V512 support + // however, later iterations changed this and it is now just a unifying ISA instead - while (rangeLower <= rangeUpper) + if (isa == InstructionSet_AVX10v1) { - // This is safe since rangeLower and rangeUpper will never be negative - size_t rangeIndex = (rangeUpper + rangeLower) / 2; - - NamedIntrinsic ni = static_cast(rangeIndex); - const HWIntrinsicInfo& intrinsicInfo = HWIntrinsicInfo::lookup(ni); + NamedIntrinsic ni = binarySearchId(InstructionSet_AVX512, sig, methodName, isLimitedVector256Isa); - int sortOrder = strcmp(methodName, intrinsicInfo.name); - - if (sortOrder < 0) + if (ni != NI_Illegal) { - rangeUpper = rangeIndex - 1; - } - else if (sortOrder > 0) - { - rangeLower = rangeIndex + 1; - } - else - { - assert(sortOrder == 0); - assert((intrinsicInfo.numArgs == -1) || (sig->numArgs == static_cast(intrinsicInfo.numArgs))); - -#if defined(TARGET_XARCH) - // on AVX1-only CPUs we only support a subset of intrinsics in Vector256 - if (isLimitedVector256Isa && !AvxOnlyCompatible(ni)) - { - return NI_Illegal; - } -#endif // TARGET_XARCH - return ni; } + return binarySearchId(InstructionSet_AVX512VBMI, sig, methodName, isLimitedVector256Isa); } + else if (isa == InstructionSet_AVX10v1_X64) + { + return binarySearchId(InstructionSet_AVX512_X64, sig, methodName, isLimitedVector256Isa); + } +#endif // TARGET_XARCH - // There are several helper intrinsics that are implemented in managed code - // Those intrinsics will hit this code path and need to return NI_Illegal - return NI_Illegal; + return binarySearchId(isa, sig, methodName, isLimitedVector256Isa); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index ec7003474c5d66..113b06e8dfc459 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -885,19 +885,12 @@ struct HWIntrinsicInfo #ifdef TARGET_XARCH switch (id) { + case NI_AVX2_ShiftLeftLogicalVariable: case NI_AVX2_ShiftRightArithmeticVariable: - case NI_AVX512F_ShiftRightArithmeticVariable: - case NI_AVX512F_VL_ShiftRightArithmeticVariable: - case NI_AVX512BW_ShiftRightArithmeticVariable: - case NI_AVX512BW_VL_ShiftRightArithmeticVariable: - case NI_AVX10v1_ShiftRightArithmeticVariable: case NI_AVX2_ShiftRightLogicalVariable: - case NI_AVX512F_ShiftRightLogicalVariable: - case NI_AVX512BW_ShiftRightLogicalVariable: - case NI_AVX512BW_VL_ShiftRightLogicalVariable: - case NI_AVX10v1_ShiftRightLogicalVariable: - case NI_AVX2_ShiftLeftLogicalVariable: - case NI_AVX512BW_VL_ShiftLeftLogicalVariable: + case NI_AVX512_ShiftLeftLogicalVariable: + case NI_AVX512_ShiftRightArithmeticVariable: + case NI_AVX512_ShiftRightLogicalVariable: return true; default: return false; @@ -1215,11 +1208,6 @@ struct HWIntrinsicInfo HWIntrinsicFlag flags = lookupFlags(id); return (flags & HW_Flag_PermuteVar2x) != 0; } - - static bool IsTernaryLogic(NamedIntrinsic id) - { - return (id == NI_AVX512F_TernaryLogic) || (id == NI_AVX512F_VL_TernaryLogic) || (id == NI_AVX10v1_TernaryLogic); - } #endif // TARGET_XARCH #if defined(TARGET_ARM64) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index fdee25c5c6b078..0d35a5178e5487 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -419,7 +419,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (op2->IsEmbMaskOp()) { - assert(intrinsicId == NI_EVEX_BlendVariableMask); + assert(intrinsicId == NI_AVX512_BlendVariableMask); assert(op2->isContained()); assert(op2->OperIsHWIntrinsic()); @@ -860,7 +860,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE41_BlendVariable: case NI_AVX_BlendVariable: case NI_AVX2_BlendVariable: - case NI_EVEX_BlendVariableMask: + case NI_AVX512_BlendVariableMask: { genHWIntrinsic_R_R_RM_R(node, ins, simdSize, instOptions); break; @@ -1000,18 +1000,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case InstructionSet_AVX: case InstructionSet_AVX2: - case InstructionSet_AVX512F: - case InstructionSet_AVX512F_VL: - case InstructionSet_AVX512F_X64: - case InstructionSet_AVX512BW: - case InstructionSet_AVX512BW_VL: + case InstructionSet_AVX512: + case InstructionSet_AVX512_X64: case InstructionSet_AVX512VBMI: - case InstructionSet_AVX512VBMI_VL: - case InstructionSet_AVX10v1: - case InstructionSet_AVX10v1_X64: - case InstructionSet_AVX10v1_V512: - case InstructionSet_AVX10v1_V512_X64: - case InstructionSet_EVEX: { genAvxFamilyIntrinsic(node, instOptions); break; @@ -1153,8 +1144,7 @@ void CodeGen::genHWIntrinsic_R_RM( break; } - case NI_AVX512F_BroadcastScalarToVector512: - case NI_AVX512BW_BroadcastScalarToVector512: + case NI_AVX512_BroadcastScalarToVector512: { needsInstructionFixup = true; break; @@ -1524,7 +1514,7 @@ void CodeGen::genHWIntrinsic_R_R_R_RM_I( // non-RMW based codegen. #if defined(DEBUG) - assert(HWIntrinsicInfo::IsTernaryLogic(node->GetHWIntrinsicId())); + assert(node->GetHWIntrinsicId() == NI_AVX512_TernaryLogic); uint8_t control = static_cast(ival); const TernaryLogicInfo& info = TernaryLogicInfo::lookup(control); @@ -1538,7 +1528,7 @@ void CodeGen::genHWIntrinsic_R_R_R_RM_I( else { #if defined(DEBUG) - if (HWIntrinsicInfo::IsTernaryLogic(node->GetHWIntrinsicId())) + if (node->GetHWIntrinsicId() == NI_AVX512_TernaryLogic) { uint8_t control = static_cast(ival); const TernaryLogicInfo& info = TernaryLogicInfo::lookup(control); @@ -1727,8 +1717,8 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* insOpts instOptions = INS_OPTS_NONE; switch (intrinsicId) { - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256UInt32: + case NI_AVX512_ConvertToVector256Int32: + case NI_AVX512_ConvertToVector256UInt32: { // This intrinsic has several overloads, only the ones with floating number inputs should reach this part. assert(varTypeIsFloating(baseType)); @@ -1743,15 +1733,11 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* break; } - case NI_AVX512F_ConvertToInt32: - case NI_AVX512F_ConvertToUInt32: - case NI_AVX10v1_ConvertToInt32: - case NI_AVX10v1_ConvertToUInt32: + case NI_AVX512_ConvertToInt32: + case NI_AVX512_ConvertToUInt32: #if defined(TARGET_AMD64) - case NI_AVX512F_X64_ConvertToInt64: - case NI_AVX512F_X64_ConvertToUInt64: - case NI_AVX10v1_X64_ConvertToInt64: - case NI_AVX10v1_X64_ConvertToUInt64: + case NI_AVX512_X64_ConvertToInt64: + case NI_AVX512_X64_ConvertToUInt64: #endif // TARGET_AMD64 { assert(varTypeIsFloating(baseType)); @@ -1768,10 +1754,8 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* break; } - case NI_AVX512F_X64_ConvertScalarToVector128Single: - case NI_AVX512F_X64_ConvertScalarToVector128Double: - case NI_AVX10v1_X64_ConvertScalarToVector128Single: - case NI_AVX10v1_X64_ConvertScalarToVector128Double: + case NI_AVX512_X64_ConvertScalarToVector128Single: + case NI_AVX512_X64_ConvertScalarToVector128Double: { assert(varTypeIsLong(baseType)); auto emitSwCase = [&](int8_t i) { @@ -1784,20 +1768,16 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* break; } - case NI_AVX512F_FusedMultiplyAdd: - case NI_AVX512F_FusedMultiplyAddScalar: - case NI_AVX512F_FusedMultiplyAddNegated: - case NI_AVX512F_FusedMultiplyAddNegatedScalar: - case NI_AVX512F_FusedMultiplyAddSubtract: - case NI_AVX512F_FusedMultiplySubtract: - case NI_AVX512F_FusedMultiplySubtractAdd: - case NI_AVX512F_FusedMultiplySubtractNegated: - case NI_AVX512F_FusedMultiplySubtractNegatedScalar: - case NI_AVX512F_FusedMultiplySubtractScalar: - case NI_AVX10v1_FusedMultiplyAddScalar: - case NI_AVX10v1_FusedMultiplyAddNegatedScalar: - case NI_AVX10v1_FusedMultiplySubtractScalar: - case NI_AVX10v1_FusedMultiplySubtractNegatedScalar: + case NI_AVX512_FusedMultiplyAdd: + case NI_AVX512_FusedMultiplyAddScalar: + case NI_AVX512_FusedMultiplyAddNegated: + case NI_AVX512_FusedMultiplyAddNegatedScalar: + case NI_AVX512_FusedMultiplyAddSubtract: + case NI_AVX512_FusedMultiplySubtract: + case NI_AVX512_FusedMultiplySubtractAdd: + case NI_AVX512_FusedMultiplySubtractNegated: + case NI_AVX512_FusedMultiplySubtractNegatedScalar: + case NI_AVX512_FusedMultiplySubtractScalar: { // For FMA intrinsics, since it is not possible to get any contained operand in this case: embedded rounding // is limited in register-to-register form, and the control byte is dynamic, we don't need to do any swap. @@ -2959,7 +2939,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_AddMask: + case NI_AVX512_AddMask: { assert(instOptions == INS_OPTS_NONE); @@ -2999,7 +2979,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_AndMask: + case NI_AVX512_AndMask: { assert(instOptions == INS_OPTS_NONE); @@ -3039,7 +3019,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_AndNotMask: + case NI_AVX512_AndNotMask: { assert(instOptions == INS_OPTS_NONE); @@ -3079,7 +3059,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_MoveMask: + case NI_AVX512_MoveMask: { assert(instOptions == INS_OPTS_NONE); @@ -3116,7 +3096,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_KORTEST: + case NI_AVX512_KORTEST: { assert(instOptions == INS_OPTS_NONE); @@ -3158,7 +3138,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_KTEST: + case NI_AVX512_KTEST: { assert(instOptions == INS_OPTS_NONE); @@ -3196,7 +3176,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_NotMask: + case NI_AVX512_NotMask: { assert(instOptions == INS_OPTS_NONE); @@ -3231,7 +3211,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_OrMask: + case NI_AVX512_OrMask: { assert(instOptions == INS_OPTS_NONE); @@ -3271,7 +3251,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_ShiftLeftMask: + case NI_AVX512_ShiftLeftMask: { assert(instOptions == INS_OPTS_NONE); @@ -3312,7 +3292,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_ShiftRightMask: + case NI_AVX512_ShiftRightMask: { assert(instOptions == INS_OPTS_NONE); @@ -3353,7 +3333,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_XorMask: + case NI_AVX512_XorMask: { assert(instOptions == INS_OPTS_NONE); @@ -3393,7 +3373,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_EVEX_XnorMask: + case NI_AVX512_XnorMask: { assert(instOptions == INS_OPTS_NONE); @@ -3433,18 +3413,12 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_AVX512F_ConvertToInt32: - case NI_AVX512F_ConvertToUInt32: - case NI_AVX512F_ConvertToUInt32WithTruncation: - case NI_AVX512F_X64_ConvertToInt64: - case NI_AVX512F_X64_ConvertToUInt64: - case NI_AVX512F_X64_ConvertToUInt64WithTruncation: - case NI_AVX10v1_X64_ConvertToInt64: - case NI_AVX10v1_X64_ConvertToUInt64: - case NI_AVX10v1_X64_ConvertToUInt64WithTruncation: - case NI_AVX10v1_ConvertToInt32: - case NI_AVX10v1_ConvertToUInt32: - case NI_AVX10v1_ConvertToUInt32WithTruncation: + case NI_AVX512_ConvertToInt32: + case NI_AVX512_ConvertToUInt32: + case NI_AVX512_ConvertToUInt32WithTruncation: + case NI_AVX512_X64_ConvertToInt64: + case NI_AVX512_X64_ConvertToUInt64: + case NI_AVX512_X64_ConvertToUInt64WithTruncation: { assert(baseType == TYP_DOUBLE || baseType == TYP_FLOAT); emitAttr attr = emitTypeSize(targetType); @@ -3454,12 +3428,10 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation: - case NI_AVX10v1_ConvertToVector128UInt32: - case NI_AVX10v1_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector128UInt32: + case NI_AVX512_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector256Int32: + case NI_AVX512_ConvertToVector256UInt32: { if (varTypeIsFloating(baseType)) { @@ -3470,48 +3442,26 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption FALLTHROUGH; } - case NI_AVX512F_ConvertToVector128Byte: - case NI_AVX512F_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_ConvertToVector128Int16: - case NI_AVX512F_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_ConvertToVector128SByte: - case NI_AVX512F_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_ConvertToVector128UInt16: - case NI_AVX512F_ConvertToVector128UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256Int16: - case NI_AVX512F_ConvertToVector256Int16WithSaturation: - case NI_AVX512F_ConvertToVector256Int32WithSaturation: - case NI_AVX512F_ConvertToVector256UInt16: - case NI_AVX512F_ConvertToVector256UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256UInt32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Byte: - case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int16: - case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int32: - case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128SByte: - case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128UInt16: - case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation: - case NI_AVX512BW_ConvertToVector256Byte: - case NI_AVX512BW_ConvertToVector256ByteWithSaturation: - case NI_AVX512BW_ConvertToVector256SByte: - case NI_AVX512BW_ConvertToVector256SByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128Byte: - case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128SByte: - case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Byte: - case NI_AVX10v1_ConvertToVector128ByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Int16: - case NI_AVX10v1_ConvertToVector128Int16WithSaturation: - case NI_AVX10v1_ConvertToVector128Int32: - case NI_AVX10v1_ConvertToVector128Int32WithSaturation: - case NI_AVX10v1_ConvertToVector128SByte: - case NI_AVX10v1_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128UInt16: - case NI_AVX10v1_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector128Byte: + case NI_AVX512_ConvertToVector128ByteWithSaturation: + case NI_AVX512_ConvertToVector128Int16: + case NI_AVX512_ConvertToVector128Int16WithSaturation: + case NI_AVX512_ConvertToVector128Int32: + case NI_AVX512_ConvertToVector128Int32WithSaturation: + case NI_AVX512_ConvertToVector128SByte: + case NI_AVX512_ConvertToVector128SByteWithSaturation: + case NI_AVX512_ConvertToVector128UInt16: + case NI_AVX512_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector256Byte: + case NI_AVX512_ConvertToVector256ByteWithSaturation: + case NI_AVX512_ConvertToVector256Int16: + case NI_AVX512_ConvertToVector256Int16WithSaturation: + case NI_AVX512_ConvertToVector256Int32WithSaturation: + case NI_AVX512_ConvertToVector256SByte: + case NI_AVX512_ConvertToVector256SByteWithSaturation: + case NI_AVX512_ConvertToVector256UInt16: + case NI_AVX512_ConvertToVector256UInt16WithSaturation: + case NI_AVX512_ConvertToVector256UInt32WithSaturation: { instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); @@ -3523,10 +3473,8 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption break; } - case NI_AVX512F_X64_ConvertScalarToVector128Double: - case NI_AVX512F_X64_ConvertScalarToVector128Single: - case NI_AVX10v1_X64_ConvertScalarToVector128Double: - case NI_AVX10v1_X64_ConvertScalarToVector128Single: + case NI_AVX512_X64_ConvertScalarToVector128Double: + case NI_AVX512_X64_ConvertScalarToVector128Single: { assert(baseType == TYP_ULONG || baseType == TYP_LONG); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index fcab210ae2bb12..ce25ed1ad81c83 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -914,362 +914,210 @@ HARDWARE_INTRINSIC(AVX2, Xor, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512F Intrinsics -#define FIRST_NI_AVX512F NI_AVX512F_Abs -HARDWARE_INTRINSIC(AVX512F, Abs, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pabsd, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, Add, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, AddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, AlignRight32, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, AlignRight64, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, And, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_vpandq, INS_vpandq, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, BlendVariable, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, BroadcastScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, BroadcastVector128ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_invalid, INS_invalid, INS_vbroadcastf32x4, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, BroadcastVector256ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti64x4, INS_vbroadcasti64x4, INS_invalid, INS_vbroadcastf64x4}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, Compare, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareGreaterThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareGreaterThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareLessThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareLessThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareNotEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareNotGreaterThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareNotGreaterThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareNotLessThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareNotLessThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareOrdered, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, CompareUnordered, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_cvtsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi32, INS_vcvtsd2usi32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Byte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128ByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128SByte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128SByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128UInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector128UInt16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Single, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Double, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int32, 64, -1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int64, 64, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Single, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt32, 64, -1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt64, 64, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, Divide, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, DivideScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, DuplicateEvenIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, DuplicateOddIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, ExtractVector128, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti64x2, INS_vextracti64x2, INS_vextractf32x4, INS_vextractf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, ExtractVector256, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti64x4, INS_vextracti64x4, INS_vextractf32x8, INS_vextractf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, Fixup, 64, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, FixupScalar, 16, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmss, INS_vfixupimmsd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddNegated, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddSubtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractNegated, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, GetExponent, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, GetExponentScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpss, INS_vgetexpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, GetMantissa, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, HW_Category_IMM, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, GetMantissaScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantss, INS_vgetmantsd}, HW_Category_IMM, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, InsertVector128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti64x2, INS_vinserti64x2, INS_vinsertf32x4, INS_vinsertf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, InsertVector256, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf32x8, INS_vinsertf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512NonTemporal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, LoadVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F, Max, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmaxsd, INS_pmaxud, INS_vpmaxsq, INS_vpmaxuq, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX512F, Min, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pminsd, INS_pminud, INS_vpminsq, INS_vpminuq, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(AVX512F, Multiply, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, MultiplyLow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512F, MultiplyScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, Or, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_vporq, INS_vporq, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, Permute2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, Permute4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, Permute4x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, PermuteVar16x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512F, PermuteVar16x32x2, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512F, PermuteVar2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpdvar}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, PermuteVar4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, PermuteVar8x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512F, PermuteVar8x64x2, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512F, Reciprocal14, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, Reciprocal14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ss, INS_vrcp14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, ReciprocalSqrt14, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, ReciprocalSqrt14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ss, INS_vrsqrt14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, RotateLeft, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, RotateLeftVariable, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, RotateRight, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, RotateRightVariable, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, RoundScale, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, RoundScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaless, INS_vrndscalesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, Scale, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefss, INS_vscalefsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ShiftLeftLogical, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, ShiftLeftLogicalVariable, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, ShiftRightArithmetic, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrad, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, ShiftRightArithmeticVariable, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravd, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, ShiftRightLogical, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, ShiftRightLogicalVariable, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, Shuffle, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, Shuffle4x128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, Sqrt, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, Store, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512F, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, Subtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, SubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, TernaryLogic, 64, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX512F, UnpackHigh, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, UnpackLow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, Xor, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) -#define LAST_NI_AVX512F NI_AVX512F_Xor +#define FIRST_NI_AVX512 NI_AVX512_Abs +HARDWARE_INTRINSIC(AVX512, Abs, -1, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, Add, 64, -1, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, AddSaturate, 64, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, AddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, AlignRight, 64, 3, {INS_palignr, INS_palignr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, AlignRight32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, AlignRight64, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, And, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_vpandq, INS_vpandq, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, Average, 64, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, BlendVariable, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastPairScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastScalarToVector512, 64, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastVector128ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x4, INS_vbroadcasti32x4, INS_vbroadcasti64x2, INS_vbroadcasti64x2, INS_vbroadcastf32x4, INS_vbroadcastf64x2}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, BroadcastVector256ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x8, INS_vbroadcasti32x8, INS_vbroadcasti64x4, INS_vbroadcasti64x4, INS_vbroadcastf32x8, INS_vbroadcastf64x4}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Compare, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareOrdered, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, CompareUnordered, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_cvtsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToUInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi32, INS_vcvtsd2usi32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToUInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128ByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Int64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128SByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128Single, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector128UInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Byte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256ByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Int64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256SByte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256SByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256Single, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt16WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32WithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector256UInt64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Double, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int32, 64, -1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int64, 64, -1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Int64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512Single, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt32, 64, -1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt32WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt64, 64, -1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ConvertToVector512UInt64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512, DetectConflicts, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Divide, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, DivideScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, DuplicateEvenIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, DuplicateOddIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ExtractVector128, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti64x2, INS_vextracti64x2, INS_vextractf32x4, INS_vextractf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, ExtractVector256, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti64x4, INS_vextracti64x4, INS_vextractf32x8, INS_vextractf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, Fixup, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, FixupScalar, 16, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmss, INS_vfixupimmsd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddNegated, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplyAddSubtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractNegated, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, FusedMultiplySubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, GetExponent, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, GetExponentScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpss, INS_vgetexpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, GetMantissa, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, HW_Category_IMM, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, GetMantissaScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantss, INS_vgetmantsd}, HW_Category_IMM, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, InsertVector128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x4, INS_vinserti32x4, INS_vinserti64x2, INS_vinserti64x2, INS_vinsertf32x4, INS_vinsertf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, InsertVector256, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf32x8, INS_vinsertf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, LeadingZeroCount, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, LoadAlignedVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, LoadAlignedVector512NonTemporal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, LoadVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512, Max, -1, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_vpmaxsq, INS_vpmaxuq, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX512, Min, -1, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_vpminsq, INS_vpminuq, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX512, Multiply, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, MultiplyAddAdjacent, 64, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, MultiplyHigh, 64, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, MultiplyHighRoundScale, 64, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, MultiplyLow, -1, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, MultiplyScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, Or, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_vporq, INS_vporq, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, PackSignedSaturate, 64, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, PackUnsignedSaturate, 64, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Permute2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Permute4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Permute4x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x16, 32, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x16x2, 32, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar16x32x2, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpdvar}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, PermuteVar2x64x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar32x16, 64, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar32x16x2, 64, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x32x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar4x64x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x16 , 16, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x16x2, 16, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x32x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512, PermuteVar8x64x2, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512, Range, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, RangeScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangess, INS_vrangesd}, HW_Category_IMM, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, Reciprocal14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Reciprocal14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ss, INS_vrcp14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, ReciprocalSqrt14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ReciprocalSqrt14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ss, INS_vrsqrt14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, Reduce, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ReduceScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreducess, INS_vreducesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, RotateLeft, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, RotateLeftVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, RotateRight, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, RotateRightVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, RoundScale, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, RoundScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaless, INS_vrndscalesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512, Scale, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefss, INS_vscalefsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, ShiftLeftLogical, 64, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftLeftLogical128BitLane, 64, 2, {INS_pslldq, INS_pslldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ShiftRightArithmetic, -1, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_vpsravd, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, ShiftRightLogical, 64, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftRightLogical128BitLane, 64, 2, {INS_psrldq, INS_psrldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShiftRightLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Shuffle, 64, -1, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Shuffle2x128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Shuffle4x128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShuffleHigh, 64, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, ShuffleLow, 64, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, Sqrt, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, Store, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, Subtract, 64, -1, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, SubtractSaturate, 64, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, SubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512, SumAbsoluteDifferences, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, SumAbsoluteDifferencesInBlock32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512, TernaryLogic, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(AVX512, UnpackHigh, 64, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, UnpackLow, 64, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, Xor, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) +#define LAST_NI_AVX512 NI_AVX512_Xor // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512F.VL Intrinsics -#define FIRST_NI_AVX512F_VL NI_AVX512F_VL_Abs -HARDWARE_INTRINSIC(AVX512F_VL, Abs, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F_VL, AlignRight32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, AlignRight64, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, CompareGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F_VL, CompareGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F_VL, CompareLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F_VL, CompareLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F_VL, CompareNotEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128ByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128SByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256UInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256UInt32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F_VL, Fixup, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, GetExponent, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, GetMantissa, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, HW_Category_IMM, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, Max, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaxsq, INS_vpmaxuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512F_VL, Min, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpminsq, INS_vpminuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar2x64x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar4x32x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar4x64x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar8x32x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512F_VL, Reciprocal14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, ReciprocalSqrt14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, RotateLeft, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, RotateLeftVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, RotateRight, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, RotateRightVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, RoundScale, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, Scale, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmetic, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, Shuffle2x128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, TernaryLogic, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -#define LAST_NI_AVX512F_VL NI_AVX512F_VL_TernaryLogic - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512F.X64 Intrinsics -#define FIRST_NI_AVX512F_X64 NI_AVX512F_X64_ConvertScalarToVector128Double -HARDWARE_INTRINSIC(AVX512F_X64, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_vcvtusi2sd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F_X64, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_vcvtusi2ss64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F_X64, ConvertToInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi64, INS_vcvtsd2usi64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -#define LAST_NI_AVX512F_X64 NI_AVX512F_X64_ConvertToUInt64WithTruncation - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512BW Intrinsics -#define FIRST_NI_AVX512BW NI_AVX512BW_Abs -HARDWARE_INTRINSIC(AVX512BW, Abs, 64, 1, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512BW, Add, 64, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, AddSaturate, 64, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, AlignRight, 64, 3, {INS_palignr, INS_palignr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, Average, 64, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, BlendVariable, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, BroadcastScalarToVector512, 64, 1, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, CompareEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, CompareGreaterThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, CompareGreaterThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, CompareLessThan, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, CompareLessThanOrEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, CompareNotEqual, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256Byte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256ByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256SByte, 64, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256SByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW, ConvertToVector512Int16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512BW, ConvertToVector512UInt16, 64, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512BW, LoadVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW, Max, 64, 2, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, Min, 64, 2, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, MultiplyAddAdjacent, 64, 2, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, MultiplyHigh, 64, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, MultiplyHighRoundScale, 64, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, MultiplyLow, 64, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, PackSignedSaturate, 64, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, PackUnsignedSaturate, 64, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, PermuteVar32x16, 64, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512BW, PermuteVar32x16x2, 64, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512BW, ShiftLeftLogical, 64, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, ShiftLeftLogical128BitLane, 64, 2, {INS_pslldq, INS_pslldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, ShiftLeftLogicalVariable, 64, 2, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, ShiftRightArithmetic, 64, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, ShiftRightArithmeticVariable, 64, 2, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, ShiftRightLogical, 64, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, ShiftRightLogical128BitLane, 64, 2, {INS_psrldq, INS_psrldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, ShiftRightLogicalVariable, 64, 2, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, Shuffle, 64, 2, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, ShuffleHigh, 64, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, ShuffleLow, 64, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, Store, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512BW, Subtract, 64, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, SubtractSaturate, 64, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, SumAbsoluteDifferences, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, SumAbsoluteDifferencesInBlock32, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, UnpackHigh, 64, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, UnpackLow, 64, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -#define LAST_NI_AVX512BW NI_AVX512BW_UnpackLow - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512BW.VL Intrinsics -#define FIRST_NI_AVX512BW_VL NI_AVX512BW_VL_CompareGreaterThan -HARDWARE_INTRINSIC(AVX512BW_VL, CompareGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW_VL, CompareGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW_VL, CompareLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW_VL, CompareLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW_VL, CompareNotEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128ByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128SByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar16x16, 32, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar16x16x2, 32, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar8x16 , 16, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar8x16x2, 16, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512BW_VL, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW_VL, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW_VL, ShiftRightLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW_VL, SumAbsoluteDifferencesInBlock32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -#define LAST_NI_AVX512BW_VL NI_AVX512BW_VL_SumAbsoluteDifferencesInBlock32 - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512CD Intrinsics -#define FIRST_NI_AVX512CD NI_AVX512CD_DetectConflicts -HARDWARE_INTRINSIC(AVX512CD, DetectConflicts, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512CD, LeadingZeroCount, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -#define LAST_NI_AVX512CD NI_AVX512CD_LeadingZeroCount - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512CD.VL Intrinsics -#define FIRST_NI_AVX512CD_VL NI_AVX512CD_VL_DetectConflicts -HARDWARE_INTRINSIC(AVX512CD_VL, DetectConflicts, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512CD_VL, LeadingZeroCount, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -#define LAST_NI_AVX512CD_VL NI_AVX512CD_VL_LeadingZeroCount - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512DQ Intrinsics -#define FIRST_NI_AVX512DQ NI_AVX512DQ_And -HARDWARE_INTRINSIC(AVX512DQ, And, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512DQ, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512DQ, BroadcastPairScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ, BroadcastVector128ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti64x2, INS_vbroadcasti64x2, INS_invalid, INS_vbroadcastf64x2}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ, BroadcastVector256ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x8, INS_vbroadcasti32x8, INS_invalid, INS_invalid, INS_vbroadcastf32x8, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector256Single, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Double, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Int64, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Int64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512UInt64, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512UInt64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ, ExtractVector128, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti64x2, INS_vextracti64x2, INS_invalid, INS_vextractf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512DQ, ExtractVector256, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_invalid, INS_invalid, INS_vextractf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512DQ, InsertVector128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti64x2, INS_vinserti64x2, INS_invalid, INS_vinsertf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512DQ, InsertVector256, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_invalid, INS_invalid, INS_vinsertf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512DQ, MultiplyLow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512DQ, Or, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512DQ, Range, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ, RangeScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangess, INS_vrangesd}, HW_Category_IMM, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512DQ, Reduce, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512DQ, ReduceScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreducess, INS_vreducesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512DQ, Xor, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -#define LAST_NI_AVX512DQ NI_AVX512DQ_Xor - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512DQ.VL Intrinsics -#define FIRST_NI_AVX512DQ_VL NI_AVX512DQ_VL_BroadcastPairScalarToVector128 -HARDWARE_INTRINSIC(AVX512DQ_VL, BroadcastPairScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ_VL, BroadcastPairScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Int64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Single, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128UInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Int64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Int64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256UInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256UInt64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, MultiplyLow, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512DQ_VL, Range, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ_VL, Reduce, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -#define LAST_NI_AVX512DQ_VL NI_AVX512DQ_VL_Reduce +// AVX512.X64 Intrinsics +#define FIRST_NI_AVX512_X64 NI_AVX512_X64_ConvertScalarToVector128Double +HARDWARE_INTRINSIC(AVX512_X64, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_vcvtusi2sd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_vcvtusi2ss64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertToInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertToUInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi64, INS_vcvtsd2usi64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +#define LAST_NI_AVX512_X64 NI_AVX512_X64_ConvertToUInt64WithTruncation // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -1277,186 +1125,24 @@ HARDWARE_INTRINSIC(AVX512DQ_VL, Reduce, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512VBMI Intrinsics #define FIRST_NI_AVX512VBMI NI_AVX512VBMI_MultiShift -HARDWARE_INTRINSIC(AVX512VBMI, MultiShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512VBMI, MultiShift, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar16x8, 16, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar16x8x2, 16, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar32x8, 32, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar32x8x2, 32, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8, 64, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8x2, 64, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) #define LAST_NI_AVX512VBMI NI_AVX512VBMI_PermuteVar64x8x2 -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512VBMI.VL Intrinsics -#define FIRST_NI_AVX512VBMI_VL NI_AVX512VBMI_VL_MultiShift -HARDWARE_INTRINSIC(AVX512VBMI_VL, MultiShift, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar16x8, 16, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar16x8x2, 16, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar32x8, 32, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar32x8x2, 32, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -#define LAST_NI_AVX512VBMI_VL NI_AVX512VBMI_VL_PermuteVar32x8x2 - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX10V1 Intrinsics -#define FIRST_NI_AVX10v1 NI_AVX10v1_Abs -HARDWARE_INTRINSIC(AVX10v1, Abs, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, AddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, AlignRight32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1, AlignRight64, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1, BroadcastPairScalarToVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, BroadcastPairScalarToVector256, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, CompareGreaterThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX10v1, CompareGreaterThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX10v1, CompareLessThan, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX10v1, CompareLessThanOrEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX10v1, CompareNotEqual, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(AVX10v1, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_cvtsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToUInt32, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi32, INS_vcvtsd2usi32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ConvertToUInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Byte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128ByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Int16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Int16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Int32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Int32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Int64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128SByte, -1, 1, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128SByteWithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128Single, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128UInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128UInt16WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128UInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128UInt32WithSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128UInt32WithTruncation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector128UInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256Double, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256Int64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256Int64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256UInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256UInt32WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256UInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, ConvertToVector256UInt64WithTruncation, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1, DetectConflicts, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, DivideScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, Fixup, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1, FixupScalar, 16, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmss, INS_vfixupimmsd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, FusedMultiplyAddNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, FusedMultiplyAddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, FusedMultiplySubtractNegatedScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, FusedMultiplySubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, GetExponent, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, GetExponentScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpss, INS_vgetexpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, GetMantissa, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, HW_Category_IMM, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, GetMantissaScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantss, INS_vgetmantsd}, HW_Category_IMM, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, LeadingZeroCount, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, Max, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaxsq, INS_vpmaxuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX10v1, Min, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpminsq, INS_vpminuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX10v1, MultiShift, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX10v1, MultiplyLow, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX10v1, MultiplyScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16, 32, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16x2, 32, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x8, 16, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x8x2, 16, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar2x64x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar32x8, 32, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar32x8x2, 32, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar4x32x2, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar4x64, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar4x64x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar8x16, 16, 2, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar8x16x2, 16, 3, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX10v1, PermuteVar8x32x2, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX10v1, Range, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, RangeScalar, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangess, INS_vrangesd}, HW_Category_IMM, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, Reciprocal14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, Reciprocal14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ss, INS_vrcp14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, ReciprocalSqrt14, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, ReciprocalSqrt14Scalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ss, INS_vrsqrt14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, Reduce, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1, ReduceScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreducess, INS_vreducesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, RotateLeft, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1, RotateLeftVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, RotateRight, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1, RotateRightVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, RoundScale, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1, RoundScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaless, INS_vrndscalesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, Scale, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, ScaleScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefss, INS_vscalefsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, ShiftLeftLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, ShiftRightArithmetic, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_invalid, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, ShiftRightLogicalVariable, -1, 2, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1, Shuffle2x128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, SubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, SumAbsoluteDifferencesInBlock32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1, TernaryLogic, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -#define LAST_NI_AVX10v1 NI_AVX10v1_TernaryLogic - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX10V1_V512 Intrinsics -#define FIRST_NI_AVX10v1_V512 NI_AVX10v1_V512_And -HARDWARE_INTRINSIC(AVX10v1_V512, And, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX10v1_V512, AndNot, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX10v1_V512, BroadcastPairScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1_V512, BroadcastVector128ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti64x2, INS_vbroadcasti64x2, INS_invalid, INS_vbroadcastf64x2}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1_V512, BroadcastVector256ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x8, INS_vbroadcasti32x8, INS_invalid, INS_invalid, INS_vbroadcastf32x8, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1_V512, ConvertToVector256Single, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, ConvertToVector512Double, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, ConvertToVector512Int64, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, ConvertToVector512Int64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1_V512, ConvertToVector512UInt64, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_V512, ConvertToVector512UInt64WithTruncation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v1_V512, DetectConflicts, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1_V512, ExtractVector128, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti64x2, INS_vextracti64x2, INS_invalid, INS_vextractf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1_V512, ExtractVector256, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_invalid, INS_invalid, INS_vextractf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1_V512, InsertVector128, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti64x2, INS_vinserti64x2, INS_invalid, INS_vinsertf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1_V512, InsertVector256, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_invalid, INS_invalid, INS_vinsertf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1_V512, LeadingZeroCount, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1_V512, MultiShift, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX10v1_V512, MultiplyLow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX10v1_V512, Or, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX10v1_V512, PermuteVar64x8, 64, 2, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX10v1_V512, PermuteVar64x8x2, 64, 3, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX10v1_V512, Range, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX10v1_V512, Reduce, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX10v1_V512, Xor, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -#define LAST_NI_AVX10v1_V512 NI_AVX10v1_V512_Xor - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX512F.X64 Intrinsics -#define FIRST_NI_AVX10v1_X64 NI_AVX10v1_X64_ConvertScalarToVector128Double -HARDWARE_INTRINSIC(AVX10v1_X64, ConvertScalarToVector128Double, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_vcvtusi2sd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_X64, ConvertScalarToVector128Single, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_vcvtusi2ss64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_X64, ConvertToInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_X64, ConvertToUInt64, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi64, INS_vcvtsd2usi64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -#define LAST_NI_AVX10v1_X64 NI_AVX10v1_X64_ConvertToUInt64WithTruncation - // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX10v2 Intrinsics #define FIRST_NI_AVX10v2 NI_AVX10v2_ConvertToByteWithSaturationAndZeroExtendToInt32 -HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX10v2, ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithSaturationAndZeroExtendToInt32, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX10v2, ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt32WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2dqs, INS_vcvttpd2dqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorInt64WithTruncationSaturation, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qqs, INS_vcvttpd2qqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) @@ -1465,27 +1151,10 @@ HARDWARE_INTRINSIC(AVX10v2, ConvertToVectorUInt64WithTruncationSaturatio HARDWARE_INTRINSIC(AVX10v2, MinMax, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxps, INS_vminmaxpd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX10v2, MinMaxScalar, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxss, INS_vminmaxsd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX10v2, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_vmovw, INS_vmovw, INS_vmovd, INS_vmovd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX10v2, MultipleSumAbsoluteDifferences, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vmpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX10v2, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_vmovw, INS_vmovw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) #define LAST_NI_AVX10v2 NI_AVX10v2_StoreScalar -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// AVX10v2_V512 Intrinsics -#define FIRST_NI_AVX10v2_V512 NI_AVX10v2_V512_ConvertToByteWithSaturationAndZeroExtendToInt32 -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToByteWithSaturationAndZeroExtendToInt32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToByteWithTruncatedSaturationAndZeroExtendToInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2iubs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToSByteWithSaturationAndZeroExtendToInt32, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToSByteWithTruncatedSaturationAndZeroExtendToInt32, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2ibs, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToVectorInt32WithTruncationSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2dqs, INS_vcvttpd2dqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToVectorInt64WithTruncationSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qqs, INS_vcvttpd2qqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToVectorUInt32WithTruncationSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udqs, INS_vcvttpd2udqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2_V512, ConvertToVectorUInt64WithTruncationSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqqs, INS_vcvttpd2uqqs}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2_V512, MinMax, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vminmaxps, INS_vminmaxpd}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX10v2_V512, MultipleSumAbsoluteDifferences, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vmpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -#define LAST_NI_AVX10v2_V512 NI_AVX10v2_V512_MultipleSumAbsoluteDifferences - // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} @@ -1694,42 +1363,41 @@ HARDWARE_INTRINSIC(SSE, COMISS, HARDWARE_INTRINSIC(SSE, UCOMISS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, COMISD, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, UCOMISD, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE41, PTEST, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(AVX, PTEST, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(EVEX, KORTEST, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(EVEX, KTEST, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(EVEX, PTESTM, 0, 2, {INS_vptestmb, INS_vptestmb, INS_vptestmw, INS_vptestmw, INS_vptestmd, INS_vptestmd, INS_vptestmq, INS_vptestmq, INS_vptestmd, INS_vptestmq}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(EVEX, PTESTNM, 0, 2, {INS_vptestnmb, INS_vptestnmb, INS_vptestnmw, INS_vptestnmw, INS_vptestnmd, INS_vptestnmd, INS_vptestnmq, INS_vptestnmq, INS_vptestnmd, INS_vptestnmq}, HW_Category_SimpleSIMD, HW_Flag_Commutative) - -HARDWARE_INTRINSIC(EVEX, AddMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, AndMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, AndNotMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, BlendVariableMask, -1, 3, {INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(EVEX, CompareMask, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, CompareEqualMask, -1, 2, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(EVEX, CompareGreaterThanMask, -1, 2, {INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, CompareGreaterThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, CompareLessThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, CompareLessThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, CompareNotEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(EVEX, CompareNotGreaterThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, CompareNotGreaterThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, CompareNotLessThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, CompareNotLessThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, CompareOrderedMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, CompareUnorderedMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, ConvertMaskToVector, -1, 1, {INS_vpmovm2b, INS_vpmovm2b, INS_vpmovm2w, INS_vpmovm2w, INS_vpmovm2d, INS_vpmovm2d, INS_vpmovm2q, INS_vpmovm2q, INS_vpmovm2d, INS_vpmovm2q}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, ConvertVectorToMask, -1, 1, {INS_vpmovb2m, INS_vpmovb2m, INS_vpmovw2m, INS_vpmovw2m, INS_vpmovd2m, INS_vpmovd2m, INS_vpmovq2m, INS_vpmovq2m, INS_vpmovd2m, INS_vpmovq2m}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, MoveMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(EVEX, NotMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, op_EqualityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) -HARDWARE_INTRINSIC(EVEX, op_InequalityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) -HARDWARE_INTRINSIC(EVEX, OrMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, ShiftLeftMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(EVEX, ShiftRightMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(EVEX, XorMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, XnorMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(SSE41, PTEST, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, PTEST, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX512, KORTEST, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX512, KTEST, 0, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX512, PTESTM, 0, 2, {INS_vptestmb, INS_vptestmb, INS_vptestmw, INS_vptestmw, INS_vptestmd, INS_vptestmd, INS_vptestmq, INS_vptestmq, INS_vptestmd, INS_vptestmq}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, PTESTNM, 0, 2, {INS_vptestnmb, INS_vptestnmb, INS_vptestnmw, INS_vptestnmw, INS_vptestnmd, INS_vptestnmd, INS_vptestnmq, INS_vptestnmq, INS_vptestnmd, INS_vptestnmq}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, AddMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, AndMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, AndNotMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, BlendVariableMask, -1, 3, {INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512, CompareMask, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareEqualMask, -1, 2, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThanMask, -1, 2, {INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareGreaterThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareLessThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareLessThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotGreaterThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThanMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareNotLessThanOrEqualMask, -1, 2, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareOrderedMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, CompareUnorderedMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, ConvertMaskToVector, -1, 1, {INS_vpmovm2b, INS_vpmovm2b, INS_vpmovm2w, INS_vpmovm2w, INS_vpmovm2d, INS_vpmovm2d, INS_vpmovm2q, INS_vpmovm2q, INS_vpmovm2d, INS_vpmovm2q}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, ConvertVectorToMask, -1, 1, {INS_vpmovb2m, INS_vpmovb2m, INS_vpmovw2m, INS_vpmovw2m, INS_vpmovd2m, INS_vpmovd2m, INS_vpmovq2m, INS_vpmovq2m, INS_vpmovd2m, INS_vpmovq2m}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, MoveMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(AVX512, NotMask, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, op_EqualityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, op_InequalityMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512, OrMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, ShiftLeftMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, ShiftRightMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512, XorMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512, XnorMask, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) #endif // FEATURE_HW_INTRINSIC #undef HARDWARE_INTRINSIC diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 29fe70b74b62a6..b3f99b252e504c 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -36,24 +36,14 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) return InstructionSet_AVX_X64; case InstructionSet_AVX2: return InstructionSet_AVX2_X64; - case InstructionSet_AVX512BW: - return InstructionSet_AVX512BW_X64; - case InstructionSet_AVX512CD: - return InstructionSet_AVX512CD_X64; - case InstructionSet_AVX512DQ: - return InstructionSet_AVX512DQ_X64; - case InstructionSet_AVX512F: - return InstructionSet_AVX512F_X64; + case InstructionSet_AVX512: + return InstructionSet_AVX512_X64; case InstructionSet_AVX512VBMI: return InstructionSet_AVX512VBMI_X64; case InstructionSet_AVX10v1: return InstructionSet_AVX10v1_X64; - case InstructionSet_AVX10v1_V512: - return InstructionSet_AVX10v1_V512_X64; case InstructionSet_AVX10v2: return InstructionSet_AVX10v2_X64; - case InstructionSet_AVX10v2_V512: - return InstructionSet_AVX10v2_V512_X64; case InstructionSet_AVXVNNI: return InstructionSet_AVXVNNI_X64; case InstructionSet_AES: @@ -91,18 +81,17 @@ static CORINFO_InstructionSet VLVersionOfIsa(CORINFO_InstructionSet isa) { switch (isa) { - case InstructionSet_AVX512BW: - return InstructionSet_AVX512BW_VL; - case InstructionSet_AVX512CD: - return InstructionSet_AVX512CD_VL; - case InstructionSet_AVX512DQ: - return InstructionSet_AVX512DQ_VL; - case InstructionSet_AVX512F: - return InstructionSet_AVX512F_VL; + case InstructionSet_AVX512: case InstructionSet_AVX512VBMI: - return InstructionSet_AVX512VBMI_VL; + { + // These nested ISAs aren't tracked by the JIT support + return isa; + } + default: + { return InstructionSet_NONE; + } } } @@ -119,11 +108,19 @@ static CORINFO_InstructionSet V256VersionOfIsa(CORINFO_InstructionSet isa) switch (isa) { case InstructionSet_GFNI: + { return InstructionSet_GFNI_V256; + } + case InstructionSet_PCLMULQDQ: + { return InstructionSet_PCLMULQDQ_V256; + } + default: + { return InstructionSet_NONE; + } } } @@ -140,19 +137,28 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa) switch (isa) { case InstructionSet_AVX10v1: - return InstructionSet_AVX10v1_V512; case InstructionSet_AVX10v1_X64: - return InstructionSet_AVX10v1_V512_X64; case InstructionSet_AVX10v2: - return InstructionSet_AVX10v2_V512; case InstructionSet_AVX10v2_X64: - return InstructionSet_AVX10v2_V512_X64; + { + // These nested ISAs aren't tracked by the JIT support + return isa; + } + case InstructionSet_GFNI: + { return InstructionSet_GFNI_V512; + } + case InstructionSet_PCLMULQDQ: + { return InstructionSet_PCLMULQDQ_V512; + } + default: + { return InstructionSet_NONE; + } } } @@ -197,21 +203,10 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) } else if (strncmp(className + 3, "512", 3) == 0) { - if (strcmp(className + 6, "BW") == 0) - { - return InstructionSet_AVX512BW; - } - else if (strcmp(className + 6, "CD") == 0) + if ((strcmp(className + 6, "BW") == 0) || (strcmp(className + 6, "CD") == 0) || + (strcmp(className + 6, "DQ") == 0) || (strcmp(className + 6, "F") == 0)) { - return InstructionSet_AVX512CD; - } - else if (strcmp(className + 6, "DQ") == 0) - { - return InstructionSet_AVX512DQ; - } - else if (strcmp(className + 6, "F") == 0) - { - return InstructionSet_AVX512F; + return InstructionSet_AVX512; } else if (strcmp(className + 6, "Vbmi") == 0) { @@ -425,11 +420,10 @@ int HWIntrinsicInfo::lookupImmUpperBound(NamedIntrinsic id) { case NI_AVX_Compare: case NI_AVX_CompareScalar: - case NI_AVX512F_Compare: - case NI_EVEX_CompareMask: + case NI_AVX512_Compare: + case NI_AVX512_CompareMask: case NI_AVX10v2_MinMaxScalar: case NI_AVX10v2_MinMax: - case NI_AVX10v2_V512_MinMax: { assert(!HWIntrinsicInfo::HasFullRangeImm(id)); return 31; // enum FloatComparisonMode has 32 values @@ -444,17 +438,10 @@ int HWIntrinsicInfo::lookupImmUpperBound(NamedIntrinsic id) return 8; } - case NI_AVX512F_GetMantissa: - case NI_AVX512F_GetMantissaScalar: - case NI_AVX512F_VL_GetMantissa: - case NI_AVX512DQ_Range: - case NI_AVX512DQ_RangeScalar: - case NI_AVX512DQ_VL_Range: - case NI_AVX10v1_GetMantissa: - case NI_AVX10v1_GetMantissaScalar: - case NI_AVX10v1_Range: - case NI_AVX10v1_RangeScalar: - case NI_AVX10v1_V512_Range: + case NI_AVX512_GetMantissa: + case NI_AVX512_GetMantissaScalar: + case NI_AVX512_Range: + case NI_AVX512_RangeScalar: { assert(!HWIntrinsicInfo::HasFullRangeImm(id)); return 15; @@ -604,9 +591,9 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { case FloatComparisonMode::OrderedEqualNonSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareEqualMask; + return NI_AVX512_CompareEqualMask; } else if (intrinsic == NI_AVX_CompareScalar) { @@ -624,9 +611,9 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic case FloatComparisonMode::OrderedGreaterThanSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareGreaterThanMask; + return NI_AVX512_CompareGreaterThanMask; } else if (intrinsic == NI_AVX_CompareScalar) { @@ -644,9 +631,9 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic case FloatComparisonMode::OrderedGreaterThanOrEqualSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareGreaterThanOrEqualMask; + return NI_AVX512_CompareGreaterThanOrEqualMask; } else if (intrinsic == NI_AVX_CompareScalar) { @@ -665,9 +652,9 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic case FloatComparisonMode::OrderedLessThanSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareLessThanMask; + return NI_AVX512_CompareLessThanMask; } else if (intrinsic == NI_AVX_CompareScalar) { @@ -685,9 +672,9 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic case FloatComparisonMode::OrderedLessThanOrEqualSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareLessThanOrEqualMask; + return NI_AVX512_CompareLessThanOrEqualMask; } else if (intrinsic == NI_AVX_CompareScalar) { @@ -706,9 +693,9 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic case FloatComparisonMode::UnorderedNotEqualNonSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareNotEqualMask; + return NI_AVX512_CompareNotEqualMask; } else if (intrinsic == NI_AVX_CompareScalar) { @@ -726,9 +713,9 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic case FloatComparisonMode::UnorderedNotGreaterThanSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareNotGreaterThanMask; + return NI_AVX512_CompareNotGreaterThanMask; } else if (intrinsic == NI_AVX_CompareScalar) { @@ -747,9 +734,9 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic case FloatComparisonMode::UnorderedNotGreaterThanOrEqualSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareNotGreaterThanOrEqualMask; + return NI_AVX512_CompareNotGreaterThanOrEqualMask; } else if (intrinsic == NI_AVX_CompareScalar) { @@ -769,9 +756,9 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic case FloatComparisonMode::UnorderedNotLessThanSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareNotLessThanMask; + return NI_AVX512_CompareNotLessThanMask; } else if (intrinsic == NI_AVX_CompareScalar) { @@ -789,9 +776,9 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic case FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareNotLessThanOrEqualMask; + return NI_AVX512_CompareNotLessThanOrEqualMask; } else if (intrinsic == NI_AVX_CompareScalar) { @@ -810,9 +797,9 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic case FloatComparisonMode::OrderedNonSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareOrderedMask; + return NI_AVX512_CompareOrderedMask; } else if (intrinsic == NI_AVX_CompareScalar) { @@ -830,9 +817,9 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic case FloatComparisonMode::UnorderedNonSignaling: { - if (intrinsic == NI_EVEX_CompareMask) + if (intrinsic == NI_AVX512_CompareMask) { - return NI_EVEX_CompareUnorderedMask; + return NI_AVX512_CompareUnorderedMask; } else if (intrinsic == NI_AVX_CompareScalar) { @@ -874,20 +861,9 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_AVX_X64: case InstructionSet_AVX2: case InstructionSet_AVX2_X64: - case InstructionSet_AVX512F: - case InstructionSet_AVX512F_VL: - case InstructionSet_AVX512F_X64: - case InstructionSet_AVX512BW: - case InstructionSet_AVX512BW_VL: - case InstructionSet_AVX512BW_X64: - case InstructionSet_AVX512CD: - case InstructionSet_AVX512CD_VL: - case InstructionSet_AVX512CD_X64: - case InstructionSet_AVX512DQ: - case InstructionSet_AVX512DQ_VL: - case InstructionSet_AVX512DQ_X64: + case InstructionSet_AVX512: + case InstructionSet_AVX512_X64: case InstructionSet_AVX512VBMI: - case InstructionSet_AVX512VBMI_VL: case InstructionSet_AVX512VBMI_X64: case InstructionSet_AVXVNNI: case InstructionSet_AVXVNNI_X64: @@ -926,13 +902,8 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_X86Serialize_X64: case InstructionSet_AVX10v1: case InstructionSet_AVX10v1_X64: - case InstructionSet_AVX10v1_V512: - case InstructionSet_AVX10v1_V512_X64: case InstructionSet_AVX10v2: case InstructionSet_AVX10v2_X64: - case InstructionSet_AVX10v2_V512: - case InstructionSet_AVX10v2_V512_X64: - case InstructionSet_EVEX: case InstructionSet_GFNI: case InstructionSet_GFNI_X64: case InstructionSet_GFNI_V256: @@ -1001,7 +972,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim case NI_SSE2_CompareEqual: case NI_SSE2_CompareScalarEqual: case NI_AVX_CompareEqual: - case NI_EVEX_CompareEqualMask: + case NI_AVX512_CompareEqualMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1019,7 +990,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim case NI_SSE2_CompareGreaterThan: case NI_SSE2_CompareScalarGreaterThan: case NI_AVX_CompareGreaterThan: - case NI_EVEX_CompareGreaterThanMask: + case NI_AVX512_CompareGreaterThanMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1029,7 +1000,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); return static_cast(FloatComparisonMode::OrderedGreaterThanSignaling); } - else if ((id == NI_EVEX_CompareGreaterThanMask) && varTypeIsUnsigned(simdBaseType)) + else if ((id == NI_AVX512_CompareGreaterThanMask) && varTypeIsUnsigned(simdBaseType)) { // TODO-XARCH-CQ: Allow the other integer paths to use the EVEX encoding return static_cast(IntComparisonMode::GreaterThan); @@ -1042,13 +1013,13 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim case NI_SSE2_CompareLessThan: case NI_SSE2_CompareScalarLessThan: case NI_AVX_CompareLessThan: - case NI_EVEX_CompareLessThanMask: + case NI_AVX512_CompareLessThanMask: { if (varTypeIsFloating(simdBaseType)) { return static_cast(FloatComparisonMode::OrderedLessThanSignaling); } - else if (id == NI_EVEX_CompareLessThanMask) + else if (id == NI_AVX512_CompareLessThanMask) { // TODO-XARCH-CQ: Allow the other integer paths to use the EVEX encoding return static_cast(IntComparisonMode::LessThan); @@ -1061,7 +1032,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim case NI_SSE2_CompareGreaterThanOrEqual: case NI_SSE2_CompareScalarGreaterThanOrEqual: case NI_AVX_CompareGreaterThanOrEqual: - case NI_EVEX_CompareGreaterThanOrEqualMask: + case NI_AVX512_CompareGreaterThanOrEqualMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1073,7 +1044,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim } else { - assert(id == NI_EVEX_CompareGreaterThanOrEqualMask); + assert(id == NI_AVX512_CompareGreaterThanOrEqualMask); return static_cast(IntComparisonMode::GreaterThanOrEqual); } break; @@ -1084,7 +1055,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim case NI_SSE2_CompareLessThanOrEqual: case NI_SSE2_CompareScalarLessThanOrEqual: case NI_AVX_CompareLessThanOrEqual: - case NI_EVEX_CompareLessThanOrEqualMask: + case NI_AVX512_CompareLessThanOrEqualMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1092,7 +1063,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim } else { - assert(id == NI_EVEX_CompareLessThanOrEqualMask); + assert(id == NI_AVX512_CompareLessThanOrEqualMask); return static_cast(IntComparisonMode::LessThanOrEqual); } break; @@ -1103,7 +1074,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim case NI_SSE2_CompareNotEqual: case NI_SSE2_CompareScalarNotEqual: case NI_AVX_CompareNotEqual: - case NI_EVEX_CompareNotEqualMask: + case NI_AVX512_CompareNotEqualMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1111,7 +1082,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim } else { - assert(id == NI_EVEX_CompareNotEqualMask); + assert(id == NI_AVX512_CompareNotEqualMask); return static_cast(IntComparisonMode::NotEqual); } break; @@ -1122,7 +1093,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim case NI_SSE2_CompareNotGreaterThan: case NI_SSE2_CompareScalarNotGreaterThan: case NI_AVX_CompareNotGreaterThan: - case NI_EVEX_CompareNotGreaterThanMask: + case NI_AVX512_CompareNotGreaterThanMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1134,7 +1105,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim } else { - assert(id == NI_EVEX_CompareNotGreaterThanMask); + assert(id == NI_AVX512_CompareNotGreaterThanMask); return static_cast(IntComparisonMode::LessThanOrEqual); } break; @@ -1145,7 +1116,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim case NI_SSE2_CompareNotLessThan: case NI_SSE2_CompareScalarNotLessThan: case NI_AVX_CompareNotLessThan: - case NI_EVEX_CompareNotLessThanMask: + case NI_AVX512_CompareNotLessThanMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1153,7 +1124,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim } else { - assert(id == NI_EVEX_CompareNotLessThanMask); + assert(id == NI_AVX512_CompareNotLessThanMask); return static_cast(IntComparisonMode::GreaterThanOrEqual); } break; @@ -1164,7 +1135,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim case NI_SSE2_CompareNotGreaterThanOrEqual: case NI_SSE2_CompareScalarNotGreaterThanOrEqual: case NI_AVX_CompareNotGreaterThanOrEqual: - case NI_EVEX_CompareNotGreaterThanOrEqualMask: + case NI_AVX512_CompareNotGreaterThanOrEqualMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1176,7 +1147,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim } else { - assert(id == NI_EVEX_CompareNotGreaterThanOrEqualMask); + assert(id == NI_AVX512_CompareNotGreaterThanOrEqualMask); return static_cast(IntComparisonMode::LessThan); } break; @@ -1187,7 +1158,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim case NI_SSE2_CompareNotLessThanOrEqual: case NI_SSE2_CompareScalarNotLessThanOrEqual: case NI_AVX_CompareNotLessThanOrEqual: - case NI_EVEX_CompareNotLessThanOrEqualMask: + case NI_AVX512_CompareNotLessThanOrEqualMask: { if (varTypeIsFloating(simdBaseType)) { @@ -1195,7 +1166,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim } else { - assert(id == NI_EVEX_CompareNotLessThanOrEqualMask); + assert(id == NI_AVX512_CompareNotLessThanOrEqualMask); return static_cast(IntComparisonMode::GreaterThan); } break; @@ -1206,7 +1177,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim case NI_SSE2_CompareOrdered: case NI_SSE2_CompareScalarOrdered: case NI_AVX_CompareOrdered: - case NI_EVEX_CompareOrderedMask: + case NI_AVX512_CompareOrderedMask: { assert(varTypeIsFloating(simdBaseType)); return static_cast(FloatComparisonMode::OrderedNonSignaling); @@ -1217,7 +1188,7 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim case NI_SSE2_CompareUnordered: case NI_SSE2_CompareScalarUnordered: case NI_AVX_CompareUnordered: - case NI_EVEX_CompareUnorderedMask: + case NI_AVX512_CompareUnorderedMask: { assert(varTypeIsFloating(simdBaseType)); return static_cast(FloatComparisonMode::UnorderedNonSignaling); @@ -1309,14 +1280,9 @@ GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdT case NI_AVX2_ShiftLeftLogical: case NI_AVX2_ShiftRightArithmetic: case NI_AVX2_ShiftRightLogical: - case NI_AVX512F_ShiftLeftLogical: - case NI_AVX512F_ShiftRightArithmetic: - case NI_AVX512F_ShiftRightLogical: - case NI_AVX512F_VL_ShiftRightArithmetic: - case NI_AVX512BW_ShiftLeftLogical: - case NI_AVX512BW_ShiftRightArithmetic: - case NI_AVX512BW_ShiftRightLogical: - case NI_AVX10v1_ShiftRightArithmetic: + case NI_AVX512_ShiftLeftLogical: + case NI_AVX512_ShiftRightArithmetic: + case NI_AVX512_ShiftRightLogical: { // These intrinsics have overloads that take op2 in a simd register and just read the lowest 8-bits @@ -1329,24 +1295,16 @@ GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdT return gtNewSimdHWIntrinsicNode(simdType, op1, tmpOp, intrinsic, simdBaseJitType, genTypeSize(simdType)); } - case NI_AVX512F_RotateLeft: - case NI_AVX512F_RotateRight: - case NI_AVX512F_VL_RotateLeft: - case NI_AVX512F_VL_RotateRight: - case NI_AVX10v1_RotateLeft: - case NI_AVX10v1_RotateRight: + case NI_AVX512_RotateLeft: + case NI_AVX512_RotateRight: { var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); // These intrinsics have variants that take op2 in a simd register and read a unique shift per element intrinsic = static_cast(intrinsic + 1); - static_assert_no_msg(NI_AVX512F_RotateLeftVariable == (NI_AVX512F_RotateLeft + 1)); - static_assert_no_msg(NI_AVX512F_RotateRightVariable == (NI_AVX512F_RotateRight + 1)); - static_assert_no_msg(NI_AVX512F_VL_RotateLeftVariable == (NI_AVX512F_VL_RotateLeft + 1)); - static_assert_no_msg(NI_AVX512F_VL_RotateRightVariable == (NI_AVX512F_VL_RotateRight + 1)); - static_assert_no_msg(NI_AVX10v1_RotateLeftVariable == (NI_AVX10v1_RotateLeft + 1)); - static_assert_no_msg(NI_AVX10v1_RotateRightVariable == (NI_AVX10v1_RotateRight + 1)); + static_assert_no_msg(NI_AVX512_RotateLeftVariable == (NI_AVX512_RotateLeft + 1)); + static_assert_no_msg(NI_AVX512_RotateRightVariable == (NI_AVX512_RotateRight + 1)); impSpillSideEffect(true, stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); @@ -1461,9 +1419,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_SSE2_AndNot: case NI_AVX_AndNot: case NI_AVX2_AndNot: - case NI_AVX512F_AndNot: - case NI_AVX512DQ_AndNot: - case NI_AVX10v1_V512_AndNot: + case NI_AVX512_AndNot: { assert(sig->numArgs == 2); @@ -1532,7 +1488,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { if (simdSize == 64) { - intrinsic = NI_AVX512BW_AddSaturate; + intrinsic = NI_AVX512_AddSaturate; } else if (simdSize == 32) { @@ -1610,7 +1566,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // The mask we need is ((a ^ b) & ~(b ^ c)) < 0 - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { // tmpDup1 = a: 0xF0 // op1Dup1 = b: 0xCC @@ -2014,25 +1970,21 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); assert(varTypeIsLong(simdBaseType)); - if (IsBaselineVector512IsaSupportedOpportunistically() || - ((simdSize != 64) && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))) + + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { if (simdSize == 64) { - intrinsic = NI_AVX512DQ_ConvertToVector512Double; + intrinsic = NI_AVX512_ConvertToVector512Double; } else if (simdSize == 32) { - intrinsic = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector256Double - : NI_AVX512DQ_VL_ConvertToVector256Double; + intrinsic = NI_AVX512_ConvertToVector256Double; } else { assert(simdSize == 16); - intrinsic = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? NI_AVX10v1_ConvertToVector128Double - : NI_AVX512DQ_VL_ConvertToVector128Double; + intrinsic = NI_AVX512_ConvertToVector128Double; } op1 = impSIMDPopStack(); retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); @@ -2079,8 +2031,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(sig->numArgs == 1); assert(simdBaseType == TYP_DOUBLE); - if (IsBaselineVector512IsaSupportedOpportunistically() || - (simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_LONG, simdBaseJitType, simdSize); @@ -2100,8 +2051,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - if (IsBaselineVector512IsaSupportedOpportunistically() || - (simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_LONG, simdBaseJitType, simdSize); @@ -2127,39 +2077,24 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, intrinsic = NI_AVX_ConvertToVector256Single; break; case 64: - intrinsic = NI_AVX512F_ConvertToVector512Single; - break; - default: - unreached(); - } - } - else if (simdBaseType == TYP_UINT && simdSize != 64 && - compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - switch (simdSize) - { - case 16: - intrinsic = NI_AVX10v1_ConvertToVector128Single; - break; - case 32: - intrinsic = NI_AVX10v1_ConvertToVector256Single; + intrinsic = NI_AVX512_ConvertToVector512Single; break; default: unreached(); } } - else if (simdBaseType == TYP_UINT && IsBaselineVector512IsaSupportedOpportunistically()) + else if (simdBaseType == TYP_UINT && compOpportunisticallyDependsOn(InstructionSet_AVX512)) { switch (simdSize) { case 16: - intrinsic = NI_AVX512F_VL_ConvertToVector128Single; + intrinsic = NI_AVX512_ConvertToVector128Single; break; case 32: - intrinsic = NI_AVX512F_VL_ConvertToVector256Single; + intrinsic = NI_AVX512_ConvertToVector256Single; break; case 64: - intrinsic = NI_AVX512F_ConvertToVector512Single; + intrinsic = NI_AVX512_ConvertToVector512Single; break; default: unreached(); @@ -2180,8 +2115,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, assert(sig->numArgs == 1); assert(simdBaseType == TYP_FLOAT); - if (IsBaselineVector512IsaSupportedOpportunistically() || - (simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_UINT, simdBaseJitType, simdSize); @@ -2201,8 +2135,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - if (IsBaselineVector512IsaSupportedOpportunistically() || - (simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_UINT, simdBaseJitType, simdSize); @@ -2216,8 +2149,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { assert(sig->numArgs == 1); assert(simdBaseType == TYP_DOUBLE); - if (IsBaselineVector512IsaSupportedOpportunistically() || - (simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))) + + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNode(retType, op1, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize); @@ -2237,8 +2170,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - if (IsBaselineVector512IsaSupportedOpportunistically() || - (simdSize != 64 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { op1 = impSIMDPopStack(); retNode = gtNewSimdCvtNativeNode(retType, op1, CORINFO_TYPE_ULONG, simdBaseJitType, simdSize); @@ -2487,7 +2419,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // Check to see if it is possible to emulate the integer division if (!(simdBaseType == TYP_INT && ((simdSize == 16 && compOpportunisticallyDependsOn(InstructionSet_AVX)) || - (simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX512F))))) + (simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX512))))) { break; } @@ -2609,7 +2541,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { op1 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseJitType, simdSize); } - retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_EVEX_MoveMask, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_AVX512_MoveMask, simdBaseJitType, simdSize); break; } @@ -3240,8 +3172,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_SSE_LoadVector128: case NI_SSE2_LoadVector128: case NI_AVX_LoadVector256: - case NI_AVX512F_LoadVector512: - case NI_AVX512BW_LoadVector512: + case NI_AVX512_LoadVector512: case NI_Vector128_LoadUnsafe: case NI_Vector256_LoadUnsafe: case NI_Vector512_LoadUnsafe: @@ -3492,7 +3423,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, intrinsic = NI_SSE2_PackSignedSaturate; retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { if ((simdSize == 32) || (simdSize == 64)) { @@ -3508,37 +3439,37 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { case TYP_SHORT: { - intrinsic = NI_AVX512BW_ConvertToVector256SByteWithSaturation; + intrinsic = NI_AVX512_ConvertToVector256SByteWithSaturation; break; } case TYP_USHORT: { - intrinsic = NI_AVX512BW_ConvertToVector256ByteWithSaturation; + intrinsic = NI_AVX512_ConvertToVector256ByteWithSaturation; break; } case TYP_INT: { - intrinsic = NI_AVX512F_ConvertToVector256Int16WithSaturation; + intrinsic = NI_AVX512_ConvertToVector256Int16WithSaturation; break; } case TYP_UINT: { - intrinsic = NI_AVX512F_ConvertToVector256UInt16WithSaturation; + intrinsic = NI_AVX512_ConvertToVector256UInt16WithSaturation; break; } case TYP_LONG: { - intrinsic = NI_AVX512F_ConvertToVector256Int32WithSaturation; + intrinsic = NI_AVX512_ConvertToVector256Int32WithSaturation; break; } case TYP_ULONG: { - intrinsic = NI_AVX512F_ConvertToVector256UInt32WithSaturation; + intrinsic = NI_AVX512_ConvertToVector256UInt32WithSaturation; break; } @@ -3560,25 +3491,25 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { case TYP_USHORT: { - intrinsic = NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation; + intrinsic = NI_AVX512_ConvertToVector128ByteWithSaturation; break; } case TYP_UINT: { - intrinsic = NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation; + intrinsic = NI_AVX512_ConvertToVector128UInt16WithSaturation; break; } case TYP_LONG: { - intrinsic = NI_AVX512F_VL_ConvertToVector128Int32WithSaturation; + intrinsic = NI_AVX512_ConvertToVector128Int32WithSaturation; break; } case TYP_ULONG: { - intrinsic = NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation; + intrinsic = NI_AVX512_ConvertToVector128UInt32WithSaturation; break; } @@ -3844,7 +3775,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (simdSize == 64) { - intrinsic = NI_AVX512F_ShiftLeftLogicalVariable; + intrinsic = NI_AVX512_ShiftLeftLogicalVariable; } else { @@ -3943,8 +3874,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_SSE_Store: case NI_SSE2_Store: case NI_AVX_Store: - case NI_AVX512F_Store: - case NI_AVX512BW_Store: + case NI_AVX512_Store: { assert(retType == TYP_VOID); assert(sig->numArgs == 2); @@ -4070,7 +4000,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { if (simdSize == 64) { - intrinsic = NI_AVX512BW_SubtractSaturate; + intrinsic = NI_AVX512_SubtractSaturate; } else if (simdSize == 32) { @@ -4148,7 +4078,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // The mask we need is ((a ^ b) & (b ^ c)) < 0 - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { // tmpDup1 = a: 0xF0 // op1Dup1 = b: 0xCC @@ -4241,7 +4171,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || compOpportunisticallyDependsOn(InstructionSet_AVX2)) { - assert((simdSize != 64) || IsBaselineVector512IsaSupportedDebugOnly()); + assert((simdSize != 64) || compIsaSupportedDebugOnly(InstructionSet_AVX512)); op1 = impSIMDPopStack(); @@ -4573,21 +4503,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } case NI_AVX2_PermuteVar8x32: - case NI_AVX512BW_PermuteVar32x16: - case NI_AVX512BW_VL_PermuteVar8x16: - case NI_AVX512BW_VL_PermuteVar16x16: - case NI_AVX512F_PermuteVar8x64: - case NI_AVX512F_PermuteVar16x32: - case NI_AVX512F_VL_PermuteVar4x64: + case NI_AVX512_PermuteVar4x64: + case NI_AVX512_PermuteVar8x16: + case NI_AVX512_PermuteVar8x64: + case NI_AVX512_PermuteVar16x16: + case NI_AVX512_PermuteVar16x32: + case NI_AVX512_PermuteVar32x16: + case NI_AVX512VBMI_PermuteVar16x8: + case NI_AVX512VBMI_PermuteVar32x8: case NI_AVX512VBMI_PermuteVar64x8: - case NI_AVX512VBMI_VL_PermuteVar16x8: - case NI_AVX512VBMI_VL_PermuteVar32x8: - case NI_AVX10v1_PermuteVar16x8: - case NI_AVX10v1_PermuteVar8x16: - case NI_AVX10v1_PermuteVar16x16: - case NI_AVX10v1_PermuteVar32x8: - case NI_AVX10v1_PermuteVar4x64: - case NI_AVX10v1_V512_PermuteVar64x8: { simdBaseJitType = getBaseJitTypeOfSIMDType(sig->retTypeSigClass); @@ -4601,11 +4525,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_AVX512F_Fixup: - case NI_AVX512F_FixupScalar: - case NI_AVX512F_VL_Fixup: - case NI_AVX10v1_Fixup: - case NI_AVX10v1_FixupScalar: + case NI_AVX512_Fixup: + case NI_AVX512_FixupScalar: { assert(sig->numArgs == 4); @@ -4637,9 +4558,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_AVX512F_TernaryLogic: - case NI_AVX512F_VL_TernaryLogic: - case NI_AVX10v1_TernaryLogic: + case NI_AVX512_TernaryLogic: { assert(sig->numArgs == 4); @@ -5230,8 +5149,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_AVX512F_BlendVariable: - case NI_AVX512BW_BlendVariable: + case NI_AVX512_BlendVariable: { assert(sig->numArgs == 3); @@ -5243,20 +5161,20 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { op3 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op3, simdBaseJitType, simdSize); } - retNode = - gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, NI_EVEX_BlendVariableMask, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, NI_AVX512_BlendVariableMask, simdBaseJitType, + simdSize); break; } case NI_AVX_Compare: case NI_AVX_CompareScalar: - case NI_AVX512F_Compare: + case NI_AVX512_Compare: { assert(sig->numArgs == 3); - if (intrinsic == NI_AVX512F_Compare) + if (intrinsic == NI_AVX512_Compare) { - intrinsic = NI_EVEX_CompareMask; + intrinsic = NI_AVX512_CompareMask; retType = TYP_MASK; } @@ -5298,58 +5216,46 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_AVX512F_CompareEqual: - case NI_AVX512BW_CompareEqual: + case NI_AVX512_CompareEqual: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareEqualMask, simdBaseJitType, simdSize); + retNode = + gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareEqualMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareGreaterThan: - case NI_AVX512F_VL_CompareGreaterThan: - case NI_AVX10v1_CompareGreaterThan: - case NI_AVX512BW_CompareGreaterThan: - case NI_AVX512BW_VL_CompareGreaterThan: + case NI_AVX512_CompareGreaterThan: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = - gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareGreaterThanMask, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareGreaterThanMask, simdBaseJitType, + simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareGreaterThanOrEqual: - case NI_AVX512F_VL_CompareGreaterThanOrEqual: - case NI_AVX512BW_CompareGreaterThanOrEqual: - case NI_AVX512BW_VL_CompareGreaterThanOrEqual: - case NI_AVX10v1_CompareGreaterThanOrEqual: + case NI_AVX512_CompareGreaterThanOrEqual: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareGreaterThanOrEqualMask, + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareGreaterThanOrEqualMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareLessThan: - case NI_AVX512F_VL_CompareLessThan: - case NI_AVX512BW_CompareLessThan: - case NI_AVX512BW_VL_CompareLessThan: - case NI_AVX10v1_CompareLessThan: + case NI_AVX512_CompareLessThan: { assert(sig->numArgs == 2); @@ -5357,33 +5263,25 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = - gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareLessThanMask, simdBaseJitType, simdSize); + gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareLessThanMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareLessThanOrEqual: - case NI_AVX512F_VL_CompareLessThanOrEqual: - case NI_AVX512BW_CompareLessThanOrEqual: - case NI_AVX512BW_VL_CompareLessThanOrEqual: - case NI_AVX10v1_CompareLessThanOrEqual: + case NI_AVX512_CompareLessThanOrEqual: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareLessThanOrEqualMask, simdBaseJitType, - simdSize); + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareLessThanOrEqualMask, + simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareNotEqual: - case NI_AVX512F_VL_CompareNotEqual: - case NI_AVX512BW_CompareNotEqual: - case NI_AVX512BW_VL_CompareNotEqual: - case NI_AVX10v1_CompareNotEqual: + case NI_AVX512_CompareNotEqual: { assert(sig->numArgs == 2); @@ -5391,64 +5289,64 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = - gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotEqualMask, simdBaseJitType, simdSize); + gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareNotEqualMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareNotGreaterThan: + case NI_AVX512_CompareNotGreaterThan: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotGreaterThanMask, simdBaseJitType, + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareNotGreaterThanMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareNotGreaterThanOrEqual: + case NI_AVX512_CompareNotGreaterThanOrEqual: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotGreaterThanOrEqualMask, + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareNotGreaterThanOrEqualMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareNotLessThan: + case NI_AVX512_CompareNotLessThan: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = - gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotLessThanMask, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareNotLessThanMask, simdBaseJitType, + simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareNotLessThanOrEqual: + case NI_AVX512_CompareNotLessThanOrEqual: { assert(sig->numArgs == 2); op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotLessThanOrEqualMask, + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareNotLessThanOrEqualMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareOrdered: + case NI_AVX512_CompareOrdered: { assert(sig->numArgs == 2); @@ -5456,12 +5354,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = - gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareOrderedMask, simdBaseJitType, simdSize); + gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareOrderedMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } - case NI_AVX512F_CompareUnordered: + case NI_AVX512_CompareUnordered: { assert(sig->numArgs == 2); @@ -5469,7 +5367,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = - gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareUnorderedMask, simdBaseJitType, simdSize); + gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_AVX512_CompareUnorderedMask, simdBaseJitType, simdSize); retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 053042ef9c9604..05c4b8d57ecddd 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -3897,7 +3897,7 @@ GenTree* Compiler::impImportStaticReadOnlyField(CORINFO_FIELD_HANDLE field, CORI #ifdef TARGET_XARCH if (simdType == TYP_SIMD64) { - hwAccelerated = compOpportunisticallyDependsOn(InstructionSet_AVX512F); + hwAccelerated = compOpportunisticallyDependsOn(InstructionSet_AVX512); } else if (simdType == TYP_SIMD32) { diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index e328d43b50b0de..776856ceff7175 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -4297,7 +4297,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, case NI_System_Math_FusedMultiplyAdd: { #ifdef TARGET_XARCH - if (IsAvx10OrIsaSupportedOpportunistically(InstructionSet_FMA)) + if (compOpportunisticallyDependsOn(InstructionSet_FMA)) { assert(varTypeIsFloating(callType)); @@ -5583,13 +5583,9 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, { hwIntrinsicId = NI_SSE_ConvertToInt32WithTruncation; } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - hwIntrinsicId = NI_AVX10v1_ConvertToUInt32WithTruncation; - } - else if (IsBaselineVector512IsaSupportedOpportunistically()) - { - hwIntrinsicId = NI_AVX512F_ConvertToUInt32WithTruncation; + hwIntrinsicId = NI_AVX512_ConvertToUInt32WithTruncation; } } else @@ -5600,13 +5596,9 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, { hwIntrinsicId = NI_SSE2_ConvertToInt32WithTruncation; } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - hwIntrinsicId = NI_AVX10v1_ConvertToUInt32WithTruncation; - } - else if (IsBaselineVector512IsaSupportedOpportunistically()) + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - hwIntrinsicId = NI_AVX512F_ConvertToUInt32WithTruncation; + hwIntrinsicId = NI_AVX512_ConvertToUInt32WithTruncation; } } } @@ -5621,13 +5613,9 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, { hwIntrinsicId = NI_SSE_X64_ConvertToInt64WithTruncation; } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - hwIntrinsicId = NI_AVX10v1_X64_ConvertToUInt64WithTruncation; - } - else if (IsBaselineVector512IsaSupportedOpportunistically()) + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - hwIntrinsicId = NI_AVX512F_X64_ConvertToUInt64WithTruncation; + hwIntrinsicId = NI_AVX512_X64_ConvertToUInt64WithTruncation; } } else @@ -5638,13 +5626,9 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, { hwIntrinsicId = NI_SSE2_X64_ConvertToInt64WithTruncation; } - else if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) + else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - hwIntrinsicId = NI_AVX10v1_X64_ConvertToUInt64WithTruncation; - } - else if (IsBaselineVector512IsaSupportedOpportunistically()) - { - hwIntrinsicId = NI_AVX512F_X64_ConvertToUInt64WithTruncation; + hwIntrinsicId = NI_AVX512_X64_ConvertToUInt64WithTruncation; } } } @@ -9493,15 +9477,10 @@ GenTree* Compiler::impEstimateIntrinsic(CORINFO_METHOD_HANDLE method, assert(sig->numArgs == 1); #if defined(TARGET_XARCH) - if (compExactlyDependsOn(InstructionSet_AVX10v1)) - { - simdType = TYP_SIMD16; - intrinsicId = NI_AVX10v1_Reciprocal14Scalar; - } - else if (compExactlyDependsOn(InstructionSet_AVX512F)) + if (compExactlyDependsOn(InstructionSet_AVX512)) { simdType = TYP_SIMD16; - intrinsicId = NI_AVX512F_Reciprocal14Scalar; + intrinsicId = NI_AVX512_Reciprocal14Scalar; } else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_SSE)) { @@ -9533,10 +9512,10 @@ GenTree* Compiler::impEstimateIntrinsic(CORINFO_METHOD_HANDLE method, assert(sig->numArgs == 1); #if defined(TARGET_XARCH) - if (compExactlyDependsOn(InstructionSet_AVX512F)) + if (compExactlyDependsOn(InstructionSet_AVX512)) { simdType = TYP_SIMD16; - intrinsicId = NI_AVX512F_ReciprocalSqrt14Scalar; + intrinsicId = NI_AVX512_ReciprocalSqrt14Scalar; } else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_SSE)) { @@ -9943,9 +9922,8 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) if (!isMagnitude && compOpportunisticallyDependsOn(InstructionSet_SSE2)) { - bool needsFixup = false; - bool canHandle = false; - bool isV512Supported = false; + bool needsFixup = false; + bool canHandle = false; if (isMax) { @@ -9974,7 +9952,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, needsFixup = cnsNode->IsFloatPositiveZero(); } - if (!needsFixup || compIsEvexOpportunisticallySupported(isV512Supported)) + if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512)) { // Given the checks, op1 can safely be the cns and op2 the other node @@ -10015,7 +9993,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, needsFixup = cnsNode->IsFloatNegativeZero(); } - if (!needsFixup || compIsEvexOpportunisticallySupported(isV512Supported)) + if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512)) { // Given the checks, op1 can safely be the cns and op2 the other node @@ -10099,8 +10077,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, tbl->gtSimdVal.i32[0] = 0x0700; } - NamedIntrinsic fixupScalarId = - isV512Supported ? NI_AVX512F_FixupScalar : NI_AVX10v1_FixupScalar; + NamedIntrinsic fixupScalarId = NI_AVX512_FixupScalar; retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, retNode, op2Clone, tbl, gtNewIconNode(0), fixupScalarId, callJitType, 16); @@ -10122,8 +10099,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, } #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - bool isV512Supported = false; - if (compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512DQ)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { // We are constructing a chain of intrinsics similar to: // var op1 = Vector128.CreateScalarUnsafe(x); @@ -10177,10 +10153,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, GenTree* op1Clone; op1 = impCloneExpr(op1, &op1Clone, CHECK_SPILL_ALL, nullptr DEBUGARG("Cloning op1 for Math.Max/Min")); - GenTree* tmp = - !isV512Supported - ? gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_AVX10v1_RangeScalar, callJitType, 16) - : gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_AVX512DQ_RangeScalar, callJitType, 16); + GenTree* tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_AVX512_RangeScalar, callJitType, 16); // FixupScalar(left, right, table, control) computes the input type of right // adjusts it based on the table and then returns @@ -10199,7 +10172,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, // * qnan, norm = norm // * norm, norm = norm - NamedIntrinsic fixupHwIntrinsicID = !isV512Supported ? NI_AVX10v1_FixupScalar : NI_AVX512F_FixupScalar; + NamedIntrinsic fixupHwIntrinsicID = NI_AVX512_FixupScalar; if (isNumber) { // We need to fixup the case of: diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 7d20cb7ca913e7..e28f7397ea33ed 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -928,7 +928,7 @@ CodeGen::OperandDesc CodeGen::genOperandDesc(instruction ins, GenTree* op) case NI_SSE3_MoveAndDuplicate: case NI_AVX2_BroadcastScalarToVector128: case NI_AVX2_BroadcastScalarToVector256: - case NI_AVX512F_BroadcastScalarToVector512: + case NI_AVX512_BroadcastScalarToVector512: { assert(hwintrinsic->isContained()); if (intrinsicId == NI_SSE3_MoveAndDuplicate) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 334b368c6d9d2b..d5a47bab27ab39 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -399,16 +399,8 @@ RELEASE_CONFIG_INTEGER(EnableHWIntrinsic, "EnableHWIntrinsic", RELEASE_CONFIG_INTEGER(EnableAES, "EnableAES", 1) // Allows AES+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVX, "EnableAVX", 1) // Allows AVX+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVX2, "EnableAVX2", 1) // Allows AVX2+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512BW, "EnableAVX512BW", 1) // Allows AVX512BW+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512BW_VL, "EnableAVX512BW_VL", 1) // Allows AVX512BW+ AVX512VL+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512CD, "EnableAVX512CD", 1) // Allows AVX512CD+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512CD_VL, "EnableAVX512CD_VL", 1) // Allows AVX512CD+ AVX512VL+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512DQ, "EnableAVX512DQ", 1) // Allows AVX512DQ+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512DQ_VL, "EnableAVX512DQ_VL", 1) // Allows AVX512DQ+ AVX512VL+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512F, "EnableAVX512F", 1) // Allows AVX512F+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512F_VL, "EnableAVX512F_VL", 1) // Allows AVX512F+ AVX512VL+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX512, "EnableAVX512", 1) // Allows AVX512+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVX512VBMI, "EnableAVX512VBMI", 1) // Allows AVX512VBMI+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512VBMI_VL, "EnableAVX512VBMI_VL", 1) // Allows AVX512VBMI_VL+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVX10v1, "EnableAVX10v1", 1) // Allows AVX10v1+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVX10v2, "EnableAVX10v2", 1) // Allows AVX10v2+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVXVNNI, "EnableAVXVNNI", 1) // Allows AVXVNNI+ hardware intrinsics to be disabled @@ -450,6 +442,24 @@ RELEASE_CONFIG_INTEGER(EnableEmbeddedMasking, "EnableEmbeddedMasking", RELEASE_CONFIG_INTEGER(EnableApxNDD, "EnableApxNDD", 0) // Allows APX NDD feature to be disabled RELEASE_CONFIG_INTEGER(EnableApxConditionalChaining, "EnableApxConditionalChaining", 0) // Allows APX conditional compare chaining +// +// These are "legacy" ISA enablement knobs that aren't recommended for use anymore +// +#if defined(TARGET_AMD64) || defined(TARGET_X86) +// These have been superceded by EnableAVX512 as you get all of them or none of them +RELEASE_CONFIG_INTEGER(EnableAVX512BW, "EnableAVX512BW", 1) // Allows AVX512BW+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX512BW_VL, "EnableAVX512BW_VL", 1) // Allows AVX512BW+ AVX512VL+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX512CD, "EnableAVX512CD", 1) // Allows AVX512CD+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX512CD_VL, "EnableAVX512CD_VL", 1) // Allows AVX512CD+ AVX512VL+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX512DQ, "EnableAVX512DQ", 1) // Allows AVX512DQ+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX512DQ_VL, "EnableAVX512DQ_VL", 1) // Allows AVX512DQ+ AVX512VL+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX512F, "EnableAVX512F", 1) // Allows AVX512F+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX512F_VL, "EnableAVX512F_VL", 1) // Allows AVX512F+ AVX512VL+ hardware intrinsics to be disabled + +// These have been superceded by EnableAVX512VBMI as you get all of them or none of them +RELEASE_CONFIG_INTEGER(EnableAVX512VBMI_VL, "EnableAVX512VBMI_VL", 1) // Allows AVX512VBMI_VL+ hardware intrinsics to be disabled +#endif + // clang-format on #ifdef FEATURE_SIMD diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 89d06d24505cbc..1dd9ff67d16e4d 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -1969,7 +1969,7 @@ class LocalAddressVisitor final : public GenTreeVisitor else if (((indir->TypeIs(TYP_SIMD16) && m_compiler->compOpportunisticallyDependsOn(InstructionSet_AVX)) || (indir->TypeIs(TYP_SIMD32) && - m_compiler->IsBaselineVector512IsaSupportedOpportunistically())) && + m_compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512))) && (genTypeSize(indir) * 2 == genTypeSize(varDsc)) && ((offset % genTypeSize(indir)) == 0)) { return isDef ? IndirTransform::WithElement : IndirTransform::GetElement; diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 5b13b6494351ae..bf0a19c31136b4 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2396,7 +2396,7 @@ bool Lowering::LowerCallMemcmp(GenTreeCall* call, GenTree** next) ssize_t MaxUnrollSize = comp->IsBaselineSimdIsaSupported() ? 32 : 16; #if defined(FEATURE_SIMD) && defined(TARGET_XARCH) - if (comp->IsBaselineVector512IsaSupportedOpportunistically()) + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { MaxUnrollSize = 128; } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 061341e34a80b7..3f60c0cbeb1b71 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -1306,7 +1306,7 @@ void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIn { GenTreeCC* cc = LowerNodeCC(node, condition); - assert((HWIntrinsicInfo::lookupNumArgs(newIntrinsicId) == 2) || (newIntrinsicId == NI_EVEX_KORTEST)); + assert((HWIntrinsicInfo::lookupNumArgs(newIntrinsicId) == 2) || (newIntrinsicId == NI_AVX512_KORTEST)); node->ChangeHWIntrinsicId(newIntrinsicId); node->gtType = TYP_VOID; node->ClearUnusedValue(); @@ -1350,8 +1350,8 @@ void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIn break; } - case NI_EVEX_KORTEST: - case NI_EVEX_KTEST: + case NI_AVX512_KORTEST: + case NI_AVX512_KTEST: { // No containment support, so no reason to swap operands canSwapOperands = false; @@ -1499,8 +1499,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) GenTree* op3 = nullptr; // We want to specially recognize this pattern as GT_NOT - bool isOperNot = (oper == GT_XOR) && op2->IsVectorAllBitsSet(); - bool isV512Supported = false; + bool isOperNot = (oper == GT_XOR) && op2->IsVectorAllBitsSet(); LIR::Use use; if (BlockRange().TryGetUse(node, &use)) @@ -1560,7 +1559,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return nextNode; } - if (comp->compIsEvexOpportunisticallySupported(isV512Supported)) + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { // For everything else we want to lower it to a standard TernaryLogic node GenTree* nextNode = node->gtNext; @@ -1724,12 +1723,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, A, controlByte); } - NamedIntrinsic ternaryLogicId = NI_AVX512F_TernaryLogic; - - if (simdSize != 64) - { - ternaryLogicId = isV512Supported ? NI_AVX512F_VL_TernaryLogic : NI_AVX10v1_TernaryLogic; - } + NamedIntrinsic ternaryLogicId = NI_AVX512_TernaryLogic; GenTree* op4 = comp->gtNewIconNode(controlByte); BlockRange().InsertBefore(userIntrin, op4); @@ -1741,7 +1735,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } } - if (isOperNot && comp->compIsEvexOpportunisticallySupported(isV512Supported)) + if (isOperNot && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { // Lowering this to TernaryLogic(zero, zero, op1, ~C) is smaller // and faster than emitting the pcmpeqd; pxor sequence. @@ -1752,7 +1746,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { GenTreeHWIntrinsic* opIntrin = op1->AsHWIntrinsic(); - if (HWIntrinsicInfo::IsTernaryLogic(opIntrin->GetHWIntrinsicId())) + if (opIntrin->GetHWIntrinsicId() == NI_AVX512_TernaryLogic) { GenTree* opControl = opIntrin->Op(4); @@ -1781,12 +1775,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } } - NamedIntrinsic ternaryLogicId = NI_AVX512F_TernaryLogic; - - if (simdSize != 64) - { - ternaryLogicId = isV512Supported ? NI_AVX512F_VL_TernaryLogic : NI_AVX10v1_TernaryLogic; - } + NamedIntrinsic ternaryLogicId = NI_AVX512_TernaryLogic; op3 = op1; @@ -1869,15 +1858,10 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Vector512_GetUpper: { - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); var_types simdBaseType = node->GetSimdBaseType(); - intrinsicId = NI_AVX512F_ExtractVector256; - - if ((genTypeSize(simdBaseType) == 4) && !comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) - { - intrinsicId = NI_AVX512DQ_ExtractVector256; - } + intrinsicId = NI_AVX512_ExtractVector256; GenTree* op1 = node->Op(1); @@ -1926,16 +1910,11 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Vector512_WithLower: case NI_Vector512_WithUpper: { - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); var_types simdBaseType = node->GetSimdBaseType(); int index = (intrinsicId == NI_Vector512_WithUpper) ? 1 : 0; - intrinsicId = NI_AVX512F_InsertVector256; - - if ((genTypeSize(simdBaseType) == 4) && !comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) - { - intrinsicId = NI_AVX512DQ_InsertVector256; - } + intrinsicId = NI_AVX512_InsertVector256; GenTree* op1 = node->Op(1); GenTree* op2 = node->Op(2); @@ -1962,11 +1941,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) return LowerHWIntrinsicCmpOp(node, GT_NE); } - case NI_AVX512F_Fixup: - case NI_AVX512F_FixupScalar: - case NI_AVX512F_VL_Fixup: - case NI_AVX10v1_Fixup: - case NI_AVX10v1_FixupScalar: + case NI_AVX512_Fixup: + case NI_AVX512_FixupScalar: { if (!node->isRMWHWIntrinsic(comp)) { @@ -1989,8 +1965,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_EVEX_CompareEqualMask: - case NI_EVEX_CompareNotEqualMask: + case NI_AVX512_CompareEqualMask: + case NI_AVX512_CompareNotEqualMask: { GenTree* op1 = node->Op(1); GenTree* op2 = node->Op(2); @@ -1999,14 +1975,14 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { NamedIntrinsic testIntrinsicId; - if (intrinsicId == NI_EVEX_CompareEqualMask) + if (intrinsicId == NI_AVX512_CompareEqualMask) { // We have `CompareEqual(x, Zero)` where a given element // equaling zero returns 1. We can therefore use `vptestnm(x, x)` // since it does `(x & x) == 0`, thus giving us `1` if zero and `0` // if non-zero - testIntrinsicId = NI_EVEX_PTESTNM; + testIntrinsicId = NI_AVX512_PTESTNM; } else { @@ -2015,8 +1991,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // since it does `(x & x) != 0`, thus giving us `1` if non-zero and `0` // if zero - assert(intrinsicId == NI_EVEX_CompareNotEqualMask); - testIntrinsicId = NI_EVEX_PTESTM; + assert(intrinsicId == NI_AVX512_CompareNotEqualMask); + testIntrinsicId = NI_AVX512_PTESTM; } node->Op(1) = op1; @@ -2036,7 +2012,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_EVEX_AndMask: + case NI_AVX512_AndMask: { // We want to recognize (~op1 & op2) and transform it // into Evex.AndNotMask(op1, op2) as well as (op1 & ~op2) @@ -2049,7 +2025,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) GenTree* op1 = node->Op(1); GenTree* op2 = node->Op(2); - if (op1->OperIsHWIntrinsic(NI_EVEX_NotMask)) + if (op1->OperIsHWIntrinsic(NI_AVX512_NotMask)) { GenTreeHWIntrinsic* opIntrin = op1->AsHWIntrinsic(); unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); @@ -2063,7 +2039,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } } - if (!transform && op2->OperIsHWIntrinsic(NI_EVEX_NotMask)) + if (!transform && op2->OperIsHWIntrinsic(NI_AVX512_NotMask)) { GenTreeHWIntrinsic* opIntrin = op2->AsHWIntrinsic(); unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); @@ -2081,27 +2057,27 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) if (transform) { - intrinsicId = NI_EVEX_AndNotMask; + intrinsicId = NI_AVX512_AndNotMask; node->ChangeHWIntrinsicId(intrinsicId, op1, op2); } break; } - case NI_EVEX_NotMask: + case NI_AVX512_NotMask: { // We want to recognize ~(op1 ^ op2) and transform it // into Evex.XnorMask(op1, op2) GenTree* op1 = node->Op(1); - if (op1->OperIsHWIntrinsic(NI_EVEX_XorMask)) + if (op1->OperIsHWIntrinsic(NI_AVX512_XorMask)) { GenTreeHWIntrinsic* opIntrin = op1->AsHWIntrinsic(); unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); if (genTypeSize(opIntrin->GetSimdBaseType()) == simdBaseTypeSize) { - intrinsicId = NI_EVEX_XnorMask; + intrinsicId = NI_AVX512_XnorMask; node->ResetHWIntrinsicId(intrinsicId, comp, opIntrin->Op(1), opIntrin->Op(2)); BlockRange().Remove(opIntrin); } @@ -2602,9 +2578,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) LowerFusedMultiplyAdd(node); break; - case NI_AVX512F_TernaryLogic: - case NI_AVX512F_VL_TernaryLogic: - case NI_AVX10v1_TernaryLogic: + case NI_AVX512_TernaryLogic: { return LowerHWIntrinsicTernaryLogic(node); } @@ -2833,7 +2807,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm // to detect and account for those differences is not likely to be worth the tradeoff. // // TODO-XARCH-AVX512: Given the above don't emit the PTEST path above when AVX-512 is available - // This will require exposing `NI_AVX512F_TestZ` so that we can keep codegen optimized to just + // This will require exposing `NI_AVX512_TestZ` so that we can keep codegen optimized to just // `vptestm` followed by `kortest`. This will be one instruction more than just `vptest` but // it has the advantages detailed above. // @@ -2843,7 +2817,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm GenTree* maskNode = node; GenTree* nextNode = node->gtNext; - NamedIntrinsic maskIntrinsicId = NI_EVEX_CompareEqualMask; + NamedIntrinsic maskIntrinsicId = NI_AVX512_CompareEqualMask; uint32_t count = simdSize / genTypeSize(maskBaseType); // KORTEST does a bitwise or on the result and sets ZF if it is zero and CF if it is all @@ -2945,75 +2919,75 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm switch (maskIntrinsicId) { - case NI_EVEX_CompareEqualMask: + case NI_AVX512_CompareEqualMask: { - maskIntrinsicId = NI_EVEX_CompareNotEqualMask; + maskIntrinsicId = NI_AVX512_CompareNotEqualMask; break; } - case NI_EVEX_CompareGreaterThanMask: + case NI_AVX512_CompareGreaterThanMask: { - maskIntrinsicId = NI_EVEX_CompareNotGreaterThanMask; + maskIntrinsicId = NI_AVX512_CompareNotGreaterThanMask; break; } - case NI_EVEX_CompareGreaterThanOrEqualMask: + case NI_AVX512_CompareGreaterThanOrEqualMask: { - maskIntrinsicId = NI_EVEX_CompareNotGreaterThanOrEqualMask; + maskIntrinsicId = NI_AVX512_CompareNotGreaterThanOrEqualMask; break; } - case NI_EVEX_CompareLessThanMask: + case NI_AVX512_CompareLessThanMask: { - maskIntrinsicId = NI_EVEX_CompareNotLessThanMask; + maskIntrinsicId = NI_AVX512_CompareNotLessThanMask; break; } - case NI_EVEX_CompareLessThanOrEqualMask: + case NI_AVX512_CompareLessThanOrEqualMask: { - maskIntrinsicId = NI_EVEX_CompareNotLessThanOrEqualMask; + maskIntrinsicId = NI_AVX512_CompareNotLessThanOrEqualMask; break; } - case NI_EVEX_CompareNotEqualMask: + case NI_AVX512_CompareNotEqualMask: { - maskIntrinsicId = NI_EVEX_CompareEqualMask; + maskIntrinsicId = NI_AVX512_CompareEqualMask; break; } - case NI_EVEX_CompareNotGreaterThanMask: + case NI_AVX512_CompareNotGreaterThanMask: { - maskIntrinsicId = NI_EVEX_CompareGreaterThanMask; + maskIntrinsicId = NI_AVX512_CompareGreaterThanMask; break; } - case NI_EVEX_CompareNotGreaterThanOrEqualMask: + case NI_AVX512_CompareNotGreaterThanOrEqualMask: { - maskIntrinsicId = NI_EVEX_CompareGreaterThanOrEqualMask; + maskIntrinsicId = NI_AVX512_CompareGreaterThanOrEqualMask; break; } - case NI_EVEX_CompareNotLessThanMask: + case NI_AVX512_CompareNotLessThanMask: { - maskIntrinsicId = NI_EVEX_CompareLessThanMask; + maskIntrinsicId = NI_AVX512_CompareLessThanMask; break; } - case NI_EVEX_CompareNotLessThanOrEqualMask: + case NI_AVX512_CompareNotLessThanOrEqualMask: { - maskIntrinsicId = NI_EVEX_CompareLessThanOrEqualMask; + maskIntrinsicId = NI_AVX512_CompareLessThanOrEqualMask; break; } - case NI_EVEX_CompareOrderedMask: + case NI_AVX512_CompareOrderedMask: { - maskIntrinsicId = NI_EVEX_CompareUnorderedMask; + maskIntrinsicId = NI_AVX512_CompareUnorderedMask; break; } - case NI_EVEX_CompareUnorderedMask: + case NI_AVX512_CompareUnorderedMask: { - maskIntrinsicId = NI_EVEX_CompareOrderedMask; + maskIntrinsicId = NI_AVX512_CompareOrderedMask; break; } @@ -3028,15 +3002,16 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm GenTree* cnsNode; - maskNode = comp->gtNewSimdHWIntrinsicNode(TYP_MASK, maskNode, NI_EVEX_NotMask, + maskNode = comp->gtNewSimdHWIntrinsicNode(TYP_MASK, maskNode, NI_AVX512_NotMask, maskBaseJitType, simdSize); BlockRange().InsertBefore(node, maskNode); cnsNode = comp->gtNewIconNode(8 - count); BlockRange().InsertAfter(maskNode, cnsNode); - maskNode = comp->gtNewSimdHWIntrinsicNode(TYP_MASK, maskNode, cnsNode, - NI_EVEX_ShiftLeftMask, maskBaseJitType, simdSize); + maskNode = + comp->gtNewSimdHWIntrinsicNode(TYP_MASK, maskNode, cnsNode, NI_AVX512_ShiftLeftMask, + maskBaseJitType, simdSize); BlockRange().InsertAfter(cnsNode, maskNode); LowerNode(maskNode); @@ -3044,11 +3019,11 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm BlockRange().InsertAfter(maskNode, cnsNode); maskNode = - comp->gtNewSimdHWIntrinsicNode(TYP_MASK, maskNode, cnsNode, NI_EVEX_ShiftRightMask, + comp->gtNewSimdHWIntrinsicNode(TYP_MASK, maskNode, cnsNode, NI_AVX512_ShiftRightMask, maskBaseJitType, simdSize); BlockRange().InsertAfter(cnsNode, maskNode); - maskIntrinsicId = NI_EVEX_ShiftRightMask; + maskIntrinsicId = NI_AVX512_ShiftRightMask; break; } } @@ -3095,7 +3070,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm if (!varTypeIsFloating(simdBaseType) && (op2 != nullptr) && op2->IsVectorZero()) { - NamedIntrinsic testIntrinsicId = NI_EVEX_PTESTM; + NamedIntrinsic testIntrinsicId = NI_AVX512_PTESTM; bool skipReplaceOperands = false; if (op1->OperIsHWIntrinsic()) @@ -3126,7 +3101,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm // will then set `ZF: 1` if all elements were 0 and `ZF: 0` if any elements were // non-zero. The default GenCondition then remain correct - assert(testIntrinsicId == NI_EVEX_PTESTM); + assert(testIntrinsicId == NI_AVX512_PTESTM); GenTree* nestedOp1 = op1Intrinsic->Op(1); GenTree* nestedOp2 = op1Intrinsic->Op(2); @@ -3139,7 +3114,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm if ((nestedIntrinId == NI_SSE3_MoveAndDuplicate) || (nestedIntrinId == NI_AVX2_BroadcastScalarToVector128) || (nestedIntrinId == NI_AVX2_BroadcastScalarToVector256) || - (nestedIntrinId == NI_AVX512F_BroadcastScalarToVector512)) + (nestedIntrinId == NI_AVX512_BroadcastScalarToVector512)) { // We need to rewrite the embedded broadcast back to a regular constant // so that the subsequent containment check for ptestm can determine @@ -3250,7 +3225,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm if (count < 8) { assert((count == 1) || (count == 2) || (count == 4)); - maskIntrinsicId = NI_EVEX_CompareNotEqualMask; + maskIntrinsicId = NI_AVX512_CompareNotEqualMask; } else { @@ -3262,7 +3237,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm } else { - maskIntrinsicId = NI_EVEX_CompareNotEqualMask; + maskIntrinsicId = NI_AVX512_CompareNotEqualMask; } } @@ -3278,11 +3253,11 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm { GenTreeHWIntrinsic* cc; - cc = comp->gtNewSimdHWIntrinsicNode(simdType, maskNode, NI_EVEX_KORTEST, maskBaseJitType, simdSize); + cc = comp->gtNewSimdHWIntrinsicNode(simdType, maskNode, NI_AVX512_KORTEST, maskBaseJitType, simdSize); BlockRange().InsertBefore(nextNode, cc); use.ReplaceWith(cc); - LowerHWIntrinsicCC(cc, NI_EVEX_KORTEST, cmpCnd); + LowerHWIntrinsicCC(cc, NI_AVX512_KORTEST, cmpCnd); nextNode = cc->gtNext; } @@ -3521,7 +3496,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) } assert(maskNode->TypeGet() == TYP_MASK); - blendVariableId = NI_EVEX_BlendVariableMask; + blendVariableId = NI_AVX512_BlendVariableMask; op1 = maskNode; } else if (op2->IsVectorZero() || op3->IsVectorZero()) @@ -3586,16 +3561,10 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node) } } - bool isV512Supported = false; - if (comp->compIsEvexOpportunisticallySupported(isV512Supported)) + if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) { // We can't use the mask, but we can emit a ternary logic node - NamedIntrinsic ternaryLogicId = NI_AVX512F_TernaryLogic; - - if (simdSize != 64) - { - ternaryLogicId = !isV512Supported ? NI_AVX10v1_TernaryLogic : NI_AVX512F_VL_TernaryLogic; - } + NamedIntrinsic ternaryLogicId = NI_AVX512_TernaryLogic; GenTree* control = comp->gtNewIconNode(0xCA); // (B & A) | (C & ~A) BlockRange().InsertBefore(node, control); @@ -3800,7 +3769,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) { case NI_AVX_Compare: { - cndId = NI_EVEX_CompareMask; + cndId = NI_AVX512_CompareMask; break; } @@ -3810,7 +3779,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) case NI_AVX_CompareEqual: case NI_AVX2_CompareEqual: { - cndId = NI_EVEX_CompareEqualMask; + cndId = NI_AVX512_CompareEqualMask; break; } @@ -3820,7 +3789,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) case NI_AVX_CompareGreaterThan: case NI_AVX2_CompareGreaterThan: { - cndId = NI_EVEX_CompareGreaterThanMask; + cndId = NI_AVX512_CompareGreaterThanMask; break; } @@ -3828,7 +3797,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareGreaterThanOrEqual: case NI_AVX_CompareGreaterThanOrEqual: { - cndId = NI_EVEX_CompareGreaterThanOrEqualMask; + cndId = NI_AVX512_CompareGreaterThanOrEqualMask; break; } @@ -3838,7 +3807,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) case NI_AVX_CompareLessThan: case NI_AVX2_CompareLessThan: { - cndId = NI_EVEX_CompareLessThanMask; + cndId = NI_AVX512_CompareLessThanMask; break; } @@ -3846,7 +3815,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareLessThanOrEqual: case NI_AVX_CompareLessThanOrEqual: { - cndId = NI_EVEX_CompareLessThanOrEqualMask; + cndId = NI_AVX512_CompareLessThanOrEqualMask; break; } @@ -3854,7 +3823,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareNotEqual: case NI_AVX_CompareNotEqual: { - cndId = NI_EVEX_CompareNotEqualMask; + cndId = NI_AVX512_CompareNotEqualMask; break; } @@ -3862,7 +3831,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareNotGreaterThan: case NI_AVX_CompareNotGreaterThan: { - cndId = NI_EVEX_CompareGreaterThanMask; + cndId = NI_AVX512_CompareGreaterThanMask; break; } @@ -3870,7 +3839,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareNotGreaterThanOrEqual: case NI_AVX_CompareNotGreaterThanOrEqual: { - cndId = NI_EVEX_CompareNotGreaterThanOrEqualMask; + cndId = NI_AVX512_CompareNotGreaterThanOrEqualMask; break; } @@ -3878,7 +3847,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareNotLessThan: case NI_AVX_CompareNotLessThan: { - cndId = NI_EVEX_CompareNotLessThanMask; + cndId = NI_AVX512_CompareNotLessThanMask; break; } @@ -3886,7 +3855,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareNotLessThanOrEqual: case NI_AVX_CompareNotLessThanOrEqual: { - cndId = NI_EVEX_CompareNotLessThanOrEqualMask; + cndId = NI_AVX512_CompareNotLessThanOrEqualMask; break; } @@ -3894,7 +3863,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareOrdered: case NI_AVX_CompareOrdered: { - cndId = NI_EVEX_CompareOrderedMask; + cndId = NI_AVX512_CompareOrderedMask; break; } @@ -3902,7 +3871,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) case NI_SSE2_CompareUnordered: case NI_AVX_CompareUnordered: { - cndId = NI_EVEX_CompareUnorderedMask; + cndId = NI_AVX512_CompareUnorderedMask; break; } @@ -3937,7 +3906,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) node->SetSimdBaseJitType(condition->AsHWIntrinsic()->GetSimdBaseJitType()); - node->ResetHWIntrinsicId(NI_EVEX_BlendVariableMask, comp, selectFalse, selectTrue, condition); + node->ResetHWIntrinsicId(NI_AVX512_BlendVariableMask, comp, selectFalse, selectTrue, condition); BlockRange().Remove(op4); break; } @@ -4267,7 +4236,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) if (intrinsicId == NI_Vector512_Create) { - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); // We will be constructing the following parts: // /--* op1 T // tmp1 = * HWINTRINSIC simd32 T CreateScalarUnsafe @@ -4280,34 +4249,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp1 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, simdBaseJitType, 16); LowerNode(tmp1); - switch (simdBaseType) - { - case TYP_BYTE: - case TYP_UBYTE: - case TYP_SHORT: - case TYP_USHORT: - { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); - node->ResetHWIntrinsicId(NI_AVX512BW_BroadcastScalarToVector512, tmp1); - break; - } - - case TYP_INT: - case TYP_UINT: - case TYP_FLOAT: - case TYP_DOUBLE: - case TYP_LONG: - case TYP_ULONG: - { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); - node->ResetHWIntrinsicId(NI_AVX512F_BroadcastScalarToVector512, tmp1); - break; - } - default: - { - unreached(); - } - } + node->ResetHWIntrinsicId(NI_AVX512_BroadcastScalarToVector512, tmp1); return LowerNode(node); } @@ -4652,8 +4594,8 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) if (intrinsicId == NI_Vector512_Create || intrinsicId == NI_Vector256_Create) { assert(argCnt >= (simdSize / genTypeSize(TYP_LONG))); - assert(((simdSize == 64) && comp->IsBaselineVector512IsaSupportedDebugOnly()) || - ((simdSize == 32) && comp->IsBaselineVector256IsaSupportedDebugOnly())); + assert(((simdSize == 64) && comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)) || + ((simdSize == 32) && comp->compIsaSupportedDebugOnly(InstructionSet_AVX))); // The larger vector implementation is simplified by splitting the // job in half and delegating to the next smaller vector size. @@ -5429,7 +5371,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) if (intrinsicId == NI_Vector512_GetElement) { - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); if (simd16Idx == 0) { @@ -5467,12 +5409,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) BlockRange().InsertBefore(node, idx); LowerNode(idx); - NamedIntrinsic extractIntrinsicId = NI_AVX512F_ExtractVector128; - - if ((genTypeSize(simdBaseType) == 8) && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) - { - extractIntrinsicId = NI_AVX512DQ_ExtractVector128; - } + NamedIntrinsic extractIntrinsicId = NI_AVX512_ExtractVector128; tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, idx, extractIntrinsicId, simdBaseJitType, simdSize); BlockRange().InsertBefore(node, tmp1); @@ -5704,7 +5641,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) // it and then operate on that. At the end, we will insert the simd16 // result back into the simd64 local, producing our final value. - assert(comp->IsBaselineVector512IsaSupportedDebugOnly()); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX512)); // This copy of "node" will have the simd16 value we need. result = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, intrinsicId, simdBaseJitType, 16); @@ -5761,12 +5698,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(op1, idx); LowerNode(idx); - NamedIntrinsic extractIntrinsicId = NI_AVX512F_ExtractVector128; - - if ((genTypeSize(simdBaseType) == 8) && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) - { - extractIntrinsicId = NI_AVX512DQ_ExtractVector128; - } + NamedIntrinsic extractIntrinsicId = NI_AVX512_ExtractVector128; tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, idx, extractIntrinsicId, simdBaseJitType, simdSize); BlockRange().InsertAfter(idx, tmp1); @@ -5781,12 +5713,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) BlockRange().InsertBefore(node, idx); LowerNode(idx); - NamedIntrinsic insertIntrinsicId = NI_AVX512F_InsertVector128; - - if ((genTypeSize(simdBaseType) == 8) && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) - { - insertIntrinsicId = NI_AVX512DQ_InsertVector128; - } + NamedIntrinsic insertIntrinsicId = NI_AVX512_InsertVector128; node->ResetHWIntrinsicId(insertIntrinsicId, comp, tmp64, result, idx); } @@ -6066,8 +5993,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) { // Now that we have finalized the shape of the tree, lower the insertion node as well. - assert((node->GetHWIntrinsicId() == NI_AVX512F_InsertVector128) || - (node->GetHWIntrinsicId() == NI_AVX512DQ_InsertVector128)); + assert(node->GetHWIntrinsicId() == NI_AVX512_InsertVector128); assert(node != result); nextNode = LowerNode(node); @@ -7971,12 +7897,8 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) case NI_SSE41_X64_Extract: case NI_AVX_ExtractVector128: case NI_AVX2_ExtractVector128: - case NI_AVX512F_ExtractVector128: - case NI_AVX512F_ExtractVector256: - case NI_AVX512DQ_ExtractVector128: - case NI_AVX512DQ_ExtractVector256: - case NI_AVX10v1_V512_ExtractVector128: - case NI_AVX10v1_V512_ExtractVector256: + case NI_AVX512_ExtractVector128: + case NI_AVX512_ExtractVector256: { // These intrinsics are "ins reg/mem, xmm, imm8" @@ -7994,12 +7916,10 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) break; } - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation: - case NI_AVX10v1_ConvertToVector128UInt32: - case NI_AVX10v1_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector128UInt32: + case NI_AVX512_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector256Int32: + case NI_AVX512_ConvertToVector256UInt32: { if (varTypeIsFloating(simdBaseType)) { @@ -8008,48 +7928,26 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) FALLTHROUGH; } - case NI_AVX512F_ConvertToVector128Byte: - case NI_AVX512F_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_ConvertToVector128Int16: - case NI_AVX512F_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_ConvertToVector128SByte: - case NI_AVX512F_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_ConvertToVector128UInt16: - case NI_AVX512F_ConvertToVector128UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256Int16: - case NI_AVX512F_ConvertToVector256Int16WithSaturation: - case NI_AVX512F_ConvertToVector256Int32WithSaturation: - case NI_AVX512F_ConvertToVector256UInt16: - case NI_AVX512F_ConvertToVector256UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256UInt32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Byte: - case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int16: - case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int32: - case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128SByte: - case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128UInt16: - case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation: - case NI_AVX512BW_ConvertToVector256Byte: - case NI_AVX512BW_ConvertToVector256ByteWithSaturation: - case NI_AVX512BW_ConvertToVector256SByte: - case NI_AVX512BW_ConvertToVector256SByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128Byte: - case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128SByte: - case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Byte: - case NI_AVX10v1_ConvertToVector128ByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Int16: - case NI_AVX10v1_ConvertToVector128Int16WithSaturation: - case NI_AVX10v1_ConvertToVector128Int32: - case NI_AVX10v1_ConvertToVector128Int32WithSaturation: - case NI_AVX10v1_ConvertToVector128SByte: - case NI_AVX10v1_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128UInt16: - case NI_AVX10v1_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector128Byte: + case NI_AVX512_ConvertToVector128ByteWithSaturation: + case NI_AVX512_ConvertToVector128Int16: + case NI_AVX512_ConvertToVector128Int16WithSaturation: + case NI_AVX512_ConvertToVector128Int32: + case NI_AVX512_ConvertToVector128Int32WithSaturation: + case NI_AVX512_ConvertToVector128SByte: + case NI_AVX512_ConvertToVector128SByteWithSaturation: + case NI_AVX512_ConvertToVector128UInt16: + case NI_AVX512_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector256Byte: + case NI_AVX512_ConvertToVector256ByteWithSaturation: + case NI_AVX512_ConvertToVector256Int16: + case NI_AVX512_ConvertToVector256Int16WithSaturation: + case NI_AVX512_ConvertToVector256Int32WithSaturation: + case NI_AVX512_ConvertToVector256SByte: + case NI_AVX512_ConvertToVector256SByteWithSaturation: + case NI_AVX512_ConvertToVector256UInt16: + case NI_AVX512_ConvertToVector256UInt16WithSaturation: + case NI_AVX512_ConvertToVector256UInt32WithSaturation: { // These intrinsics are "ins reg/mem, xmm" instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, simdBaseType, comp); @@ -9033,14 +8931,9 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_AVX2_ShiftLeftLogical: case NI_AVX2_ShiftRightArithmetic: case NI_AVX2_ShiftRightLogical: - case NI_AVX512BW_ShiftLeftLogical: - case NI_AVX512BW_ShiftRightArithmetic: - case NI_AVX512BW_ShiftRightLogical: - case NI_AVX512F_ShiftLeftLogical: - case NI_AVX512F_ShiftRightArithmetic: - case NI_AVX512F_ShiftRightLogical: - case NI_AVX512F_VL_ShiftRightArithmetic: - case NI_AVX10v1_ShiftRightArithmetic: + case NI_AVX512_ShiftLeftLogical: + case NI_AVX512_ShiftRightArithmetic: + case NI_AVX512_ShiftRightLogical: { assert((tupleType & INS_TT_MEM128) != 0); @@ -9206,8 +9099,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_AVX2_BroadcastScalarToVector128: case NI_AVX2_BroadcastScalarToVector256: - case NI_AVX512F_BroadcastScalarToVector512: - case NI_AVX512BW_BroadcastScalarToVector512: + case NI_AVX512_BroadcastScalarToVector512: { // These can have either pointer or vector operands. For the pointer case, we can't check // size, so just assume it matches. @@ -9339,7 +9231,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_SSE_LoadAlignedVector128: case NI_SSE2_LoadAlignedVector128: case NI_AVX_LoadAlignedVector256: - case NI_AVX512F_LoadAlignedVector512: + case NI_AVX512_LoadAlignedVector512: { // In minOpts, we need to ensure that an unaligned address will fault when an explicit LoadAligned is used. // Non-VEX encoded instructions will fault if an unaligned SIMD16 load is contained but will not for scalar @@ -9364,7 +9256,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_SSE3_MoveAndDuplicate: case NI_AVX2_BroadcastScalarToVector128: case NI_AVX2_BroadcastScalarToVector256: - case NI_AVX512F_BroadcastScalarToVector512: + case NI_AVX512_BroadcastScalarToVector512: { if (comp->opts.MinOpts() || !comp->canUseEmbeddedBroadcast()) { @@ -9486,7 +9378,7 @@ void Lowering::TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode, } else if (simdType == TYP_SIMD64) { - broadcastName = NI_AVX512F_BroadcastScalarToVector512; + broadcastName = NI_AVX512_BroadcastScalarToVector512; } else { @@ -9712,12 +9604,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE41_RoundToNegativeInfinityScalar: case NI_SSE41_RoundToPositiveInfinityScalar: case NI_SSE41_RoundToZeroScalar: - case NI_AVX512F_GetExponentScalar: - case NI_AVX512F_Reciprocal14Scalar: - case NI_AVX512F_ReciprocalSqrt14Scalar: - case NI_AVX10v1_GetExponentScalar: - case NI_AVX10v1_Reciprocal14Scalar: - case NI_AVX10v1_ReciprocalSqrt14Scalar: + case NI_AVX512_GetExponentScalar: + case NI_AVX512_Reciprocal14Scalar: + case NI_AVX512_ReciprocalSqrt14Scalar: { // These intrinsics have both 1 and 2-operand overloads. // @@ -9761,8 +9650,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX2_BroadcastScalarToVector128: case NI_AVX2_BroadcastScalarToVector256: - case NI_AVX512F_BroadcastScalarToVector512: - case NI_AVX512BW_BroadcastScalarToVector512: + case NI_AVX512_BroadcastScalarToVector512: { if (node->OperIsMemoryLoad()) { @@ -9820,12 +9708,10 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_AVX512F_ConvertToVector256Int32: - case NI_AVX512F_ConvertToVector256UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32: - case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation: - case NI_AVX10v1_ConvertToVector128UInt32: - case NI_AVX10v1_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector128UInt32: + case NI_AVX512_ConvertToVector128UInt32WithSaturation: + case NI_AVX512_ConvertToVector256Int32: + case NI_AVX512_ConvertToVector256UInt32: { if (varTypeIsFloating(simdBaseType)) { @@ -9836,48 +9722,26 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) FALLTHROUGH; } - case NI_AVX512F_ConvertToVector128Byte: - case NI_AVX512F_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_ConvertToVector128Int16: - case NI_AVX512F_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_ConvertToVector128SByte: - case NI_AVX512F_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_ConvertToVector128UInt16: - case NI_AVX512F_ConvertToVector128UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256Int16: - case NI_AVX512F_ConvertToVector256Int16WithSaturation: - case NI_AVX512F_ConvertToVector256Int32WithSaturation: - case NI_AVX512F_ConvertToVector256UInt16: - case NI_AVX512F_ConvertToVector256UInt16WithSaturation: - case NI_AVX512F_ConvertToVector256UInt32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Byte: - case NI_AVX512F_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int16: - case NI_AVX512F_VL_ConvertToVector128Int16WithSaturation: - case NI_AVX512F_VL_ConvertToVector128Int32: - case NI_AVX512F_VL_ConvertToVector128Int32WithSaturation: - case NI_AVX512F_VL_ConvertToVector128SByte: - case NI_AVX512F_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX512F_VL_ConvertToVector128UInt16: - case NI_AVX512F_VL_ConvertToVector128UInt16WithSaturation: - case NI_AVX512BW_ConvertToVector256Byte: - case NI_AVX512BW_ConvertToVector256ByteWithSaturation: - case NI_AVX512BW_ConvertToVector256SByte: - case NI_AVX512BW_ConvertToVector256SByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128Byte: - case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation: - case NI_AVX512BW_VL_ConvertToVector128SByte: - case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Byte: - case NI_AVX10v1_ConvertToVector128ByteWithSaturation: - case NI_AVX10v1_ConvertToVector128Int16: - case NI_AVX10v1_ConvertToVector128Int16WithSaturation: - case NI_AVX10v1_ConvertToVector128Int32: - case NI_AVX10v1_ConvertToVector128Int32WithSaturation: - case NI_AVX10v1_ConvertToVector128SByte: - case NI_AVX10v1_ConvertToVector128SByteWithSaturation: - case NI_AVX10v1_ConvertToVector128UInt16: - case NI_AVX10v1_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector128Byte: + case NI_AVX512_ConvertToVector128ByteWithSaturation: + case NI_AVX512_ConvertToVector128Int16: + case NI_AVX512_ConvertToVector128Int16WithSaturation: + case NI_AVX512_ConvertToVector128Int32: + case NI_AVX512_ConvertToVector128Int32WithSaturation: + case NI_AVX512_ConvertToVector128SByte: + case NI_AVX512_ConvertToVector128SByteWithSaturation: + case NI_AVX512_ConvertToVector128UInt16: + case NI_AVX512_ConvertToVector128UInt16WithSaturation: + case NI_AVX512_ConvertToVector256Byte: + case NI_AVX512_ConvertToVector256ByteWithSaturation: + case NI_AVX512_ConvertToVector256Int16: + case NI_AVX512_ConvertToVector256Int16WithSaturation: + case NI_AVX512_ConvertToVector256Int32WithSaturation: + case NI_AVX512_ConvertToVector256SByte: + case NI_AVX512_ConvertToVector256SByteWithSaturation: + case NI_AVX512_ConvertToVector256UInt16: + case NI_AVX512_ConvertToVector256UInt16WithSaturation: + case NI_AVX512_ConvertToVector256UInt32WithSaturation: { // These intrinsics are "ins reg/mem, xmm" and get // contained by the relevant store operation instead. @@ -10045,12 +9909,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_Extract: case NI_AVX_ExtractVector128: case NI_AVX2_ExtractVector128: - case NI_AVX512F_ExtractVector128: - case NI_AVX512F_ExtractVector256: - case NI_AVX512DQ_ExtractVector128: - case NI_AVX512DQ_ExtractVector256: - case NI_AVX10v1_V512_ExtractVector128: - case NI_AVX10v1_V512_ExtractVector256: + case NI_AVX512_ExtractVector128: + case NI_AVX512_ExtractVector256: { // These intrinsics are "ins reg/mem, xmm, imm8" and get // contained by the relevant store operation instead. @@ -10076,18 +9936,14 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX2_Permute4x64: case NI_AVX2_ShuffleHigh: case NI_AVX2_ShuffleLow: - case NI_AVX512F_Permute2x64: - case NI_AVX512F_Permute4x32: - case NI_AVX512F_Permute4x64: - case NI_AVX512F_Shuffle: - case NI_AVX512BW_ShuffleHigh: - case NI_AVX512BW_ShuffleLow: - case NI_AVX512F_RotateLeft: - case NI_AVX512F_RotateRight: - case NI_AVX512F_VL_RotateLeft: - case NI_AVX512F_VL_RotateRight: - case NI_AVX10v1_RotateLeft: - case NI_AVX10v1_RotateRight: + case NI_AVX512_Permute2x64: + case NI_AVX512_Permute4x32: + case NI_AVX512_Permute4x64: + case NI_AVX512_Shuffle: + case NI_AVX512_ShuffleHigh: + case NI_AVX512_ShuffleLow: + case NI_AVX512_RotateLeft: + case NI_AVX512_RotateRight: { // These intrinsics have op2 as an imm and op1 as a reg/mem @@ -10118,14 +9974,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX2_ShiftLeftLogical: case NI_AVX2_ShiftRightArithmetic: case NI_AVX2_ShiftRightLogical: - case NI_AVX512F_ShiftLeftLogical: - case NI_AVX512F_ShiftRightArithmetic: - case NI_AVX512F_ShiftRightLogical: - case NI_AVX512F_VL_ShiftRightArithmetic: - case NI_AVX512BW_ShiftLeftLogical: - case NI_AVX512BW_ShiftRightArithmetic: - case NI_AVX512BW_ShiftRightLogical: - case NI_AVX10v1_ShiftRightArithmetic: + case NI_AVX512_ShiftLeftLogical: + case NI_AVX512_ShiftRightArithmetic: + case NI_AVX512_ShiftRightLogical: { // These intrinsics can have op2 be imm or reg/mem // They also can have op1 be reg/mem and op2 be imm @@ -10148,16 +9999,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } case NI_AES_KeygenAssist: - case NI_AVX512F_GetMantissa: - case NI_AVX512F_VL_GetMantissa: - case NI_AVX512F_RoundScale: - case NI_AVX512F_VL_RoundScale: - case NI_AVX512DQ_Reduce: - case NI_AVX512DQ_VL_Reduce: - case NI_AVX10v1_GetMantissa: - case NI_AVX10v1_Reduce: - case NI_AVX10v1_RoundScale: - case NI_AVX10v1_V512_Reduce: + case NI_AVX512_GetMantissa: + case NI_AVX512_RoundScale: + case NI_AVX512_Reduce: { if (!isContainedImm) { @@ -10173,8 +10017,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_ShiftRightLogical128BitLane: case NI_AVX2_ShiftLeftLogical128BitLane: case NI_AVX2_ShiftRightLogical128BitLane: - case NI_AVX512BW_ShiftLeftLogical128BitLane: - case NI_AVX512BW_ShiftRightLogical128BitLane: + case NI_AVX512_ShiftLeftLogical128BitLane: + case NI_AVX512_ShiftRightLogical128BitLane: { // These intrinsics have op2 as an imm and op1 as a reg/mem when AVX512BW+VL is supported @@ -10188,12 +10032,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_AVX512F_GetMantissaScalar: - case NI_AVX512F_RoundScaleScalar: - case NI_AVX512DQ_ReduceScalar: - case NI_AVX10v1_GetMantissaScalar: - case NI_AVX10v1_ReduceScalar: - case NI_AVX10v1_RoundScaleScalar: + case NI_AVX512_GetMantissaScalar: + case NI_AVX512_RoundScaleScalar: + case NI_AVX512_ReduceScalar: { // These intrinsics have both 2 and 3-operand overloads. // @@ -10204,8 +10045,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) return; } - case NI_EVEX_ShiftLeftMask: - case NI_EVEX_ShiftRightMask: + case NI_AVX512_ShiftLeftMask: + case NI_AVX512_ShiftRightMask: { // These intrinsics don't support a memory operand and // we don't currently generate a jmp table fallback. @@ -10578,7 +10419,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_EVEX_BlendVariableMask: + case NI_AVX512_BlendVariableMask: { // BlendVariableMask represents one of the following instructions: // * vblendmpd @@ -10975,46 +10816,27 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX2_InsertVector128: case NI_AVX2_MultipleSumAbsoluteDifferences: case NI_AVX2_Permute2x128: - case NI_AVX512F_AlignRight32: - case NI_AVX512F_AlignRight64: - case NI_EVEX_CompareMask: - case NI_AVX512F_GetMantissaScalar: - case NI_AVX512F_InsertVector128: - case NI_AVX512F_InsertVector256: - case NI_AVX512F_RoundScaleScalar: - case NI_AVX512F_Shuffle: - case NI_AVX512F_Shuffle4x128: - case NI_AVX512F_VL_AlignRight32: - case NI_AVX512F_VL_AlignRight64: - case NI_AVX512F_VL_Shuffle2x128: - case NI_AVX512BW_AlignRight: - case NI_AVX512BW_SumAbsoluteDifferencesInBlock32: - case NI_AVX512BW_VL_SumAbsoluteDifferencesInBlock32: - case NI_AVX512DQ_InsertVector128: - case NI_AVX512DQ_InsertVector256: - case NI_AVX512DQ_Range: - case NI_AVX512DQ_RangeScalar: - case NI_AVX512DQ_VL_Range: - case NI_AVX512DQ_ReduceScalar: + case NI_AVX512_AlignRight32: + case NI_AVX512_AlignRight64: + case NI_AVX512_AlignRight: + case NI_AVX512_GetMantissaScalar: + case NI_AVX512_InsertVector128: + case NI_AVX512_InsertVector256: + case NI_AVX512_Range: + case NI_AVX512_RangeScalar: + case NI_AVX512_ReduceScalar: + case NI_AVX512_RoundScaleScalar: + case NI_AVX512_Shuffle2x128: + case NI_AVX512_Shuffle4x128: + case NI_AVX512_Shuffle: + case NI_AVX512_SumAbsoluteDifferencesInBlock32: + case NI_AVX512_CompareMask: case NI_PCLMULQDQ_CarrylessMultiply: case NI_PCLMULQDQ_V256_CarrylessMultiply: case NI_PCLMULQDQ_V512_CarrylessMultiply: - case NI_AVX10v1_AlignRight32: - case NI_AVX10v1_AlignRight64: - case NI_AVX10v1_GetMantissaScalar: - case NI_AVX10v1_Range: - case NI_AVX10v1_RangeScalar: - case NI_AVX10v1_ReduceScalar: - case NI_AVX10v1_RoundScaleScalar: - case NI_AVX10v1_SumAbsoluteDifferencesInBlock32: - case NI_AVX10v1_Shuffle2x128: - case NI_AVX10v1_V512_InsertVector128: - case NI_AVX10v1_V512_InsertVector256: - case NI_AVX10v1_V512_Range: - case NI_AVX10v2_MinMaxScalar: case NI_AVX10v2_MinMax: - case NI_AVX10v2_V512_MinMax: - case NI_AVX10v2_V512_MultipleSumAbsoluteDifferences: + case NI_AVX10v2_MinMaxScalar: + case NI_AVX10v2_MultipleSumAbsoluteDifferences: case NI_GFNI_GaloisFieldAffineTransform: case NI_GFNI_GaloisFieldAffineTransformInverse: case NI_GFNI_V256_GaloisFieldAffineTransform: @@ -11136,11 +10958,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { switch (intrinsicId) { - case NI_AVX512F_Fixup: - case NI_AVX512F_FixupScalar: - case NI_AVX512F_VL_Fixup: - case NI_AVX10v1_Fixup: - case NI_AVX10v1_FixupScalar: + case NI_AVX512_Fixup: + case NI_AVX512_FixupScalar: { if (!isContainedImm) { @@ -11163,9 +10982,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_AVX512F_TernaryLogic: - case NI_AVX512F_VL_TernaryLogic: - case NI_AVX10v1_TernaryLogic: + case NI_AVX512_TernaryLogic: { assert(comp->canUseEvexEncodingDebugOnly()); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 652584215f3242..b93051c5d7b2e1 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2522,20 +2522,16 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_FMA_MultiplySubtractNegated: case NI_FMA_MultiplySubtractNegatedScalar: case NI_FMA_MultiplySubtractScalar: - case NI_AVX512F_FusedMultiplyAdd: - case NI_AVX512F_FusedMultiplyAddScalar: - case NI_AVX512F_FusedMultiplyAddNegated: - case NI_AVX512F_FusedMultiplyAddNegatedScalar: - case NI_AVX512F_FusedMultiplyAddSubtract: - case NI_AVX512F_FusedMultiplySubtract: - case NI_AVX512F_FusedMultiplySubtractScalar: - case NI_AVX512F_FusedMultiplySubtractAdd: - case NI_AVX512F_FusedMultiplySubtractNegated: - case NI_AVX512F_FusedMultiplySubtractNegatedScalar: - case NI_AVX10v1_FusedMultiplyAddNegatedScalar: - case NI_AVX10v1_FusedMultiplyAddScalar: - case NI_AVX10v1_FusedMultiplySubtractNegatedScalar: - case NI_AVX10v1_FusedMultiplySubtractScalar: + case NI_AVX512_FusedMultiplyAdd: + case NI_AVX512_FusedMultiplyAddScalar: + case NI_AVX512_FusedMultiplyAddNegated: + case NI_AVX512_FusedMultiplyAddNegatedScalar: + case NI_AVX512_FusedMultiplyAddSubtract: + case NI_AVX512_FusedMultiplySubtract: + case NI_AVX512_FusedMultiplySubtractScalar: + case NI_AVX512_FusedMultiplySubtractAdd: + case NI_AVX512_FusedMultiplySubtractNegated: + case NI_AVX512_FusedMultiplySubtractNegatedScalar: { assert((numArgs == 3) || (intrinsicTree->OperIsEmbRoundingEnabled())); assert(isRMW); @@ -2659,7 +2655,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } - case NI_EVEX_BlendVariableMask: + case NI_AVX512_BlendVariableMask: { assert(numArgs == 3); @@ -2703,27 +2699,18 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } - case NI_AVX512F_PermuteVar8x64x2: - case NI_AVX512F_PermuteVar16x32x2: - case NI_AVX512F_VL_PermuteVar2x64x2: - case NI_AVX512F_VL_PermuteVar4x32x2: - case NI_AVX512F_VL_PermuteVar4x64x2: - case NI_AVX512F_VL_PermuteVar8x32x2: - case NI_AVX512BW_PermuteVar32x16x2: - case NI_AVX512BW_VL_PermuteVar8x16x2: - case NI_AVX512BW_VL_PermuteVar16x16x2: + case NI_AVX512_PermuteVar2x64x2: + case NI_AVX512_PermuteVar4x32x2: + case NI_AVX512_PermuteVar4x64x2: + case NI_AVX512_PermuteVar8x32x2: + case NI_AVX512_PermuteVar8x64x2: + case NI_AVX512_PermuteVar8x16x2: + case NI_AVX512_PermuteVar16x16x2: + case NI_AVX512_PermuteVar16x32x2: + case NI_AVX512_PermuteVar32x16x2: + case NI_AVX512VBMI_PermuteVar16x8x2: + case NI_AVX512VBMI_PermuteVar32x8x2: case NI_AVX512VBMI_PermuteVar64x8x2: - case NI_AVX512VBMI_VL_PermuteVar16x8x2: - case NI_AVX512VBMI_VL_PermuteVar32x8x2: - case NI_AVX10v1_PermuteVar16x8x2: - case NI_AVX10v1_PermuteVar2x64x2: - case NI_AVX10v1_PermuteVar4x32x2: - case NI_AVX10v1_PermuteVar8x16x2: - case NI_AVX10v1_PermuteVar32x8x2: - case NI_AVX10v1_PermuteVar4x64x2: - case NI_AVX10v1_PermuteVar8x32x2: - case NI_AVX10v1_PermuteVar16x16x2: - case NI_AVX10v1_V512_PermuteVar64x8x2: { assert(numArgs == 3); assert(isRMW); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 90c1a5ae7f4e03..56ccae10c4e475 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9652,25 +9652,25 @@ GenTreeHWIntrinsic* Compiler::fgOptimizeForMaskedIntrinsic(GenTreeHWIntrinsic* n { case GT_AND: { - maskIntrinsicId = NI_EVEX_AndMask; + maskIntrinsicId = NI_AVX512_AndMask; break; } case GT_NOT: { - maskIntrinsicId = NI_EVEX_NotMask; + maskIntrinsicId = NI_AVX512_NotMask; break; } case GT_OR: { - maskIntrinsicId = NI_EVEX_OrMask; + maskIntrinsicId = NI_AVX512_OrMask; break; } case GT_XOR: { - maskIntrinsicId = NI_EVEX_XorMask; + maskIntrinsicId = NI_AVX512_XorMask; break; } diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index b55d3fb4e8584d..e62560c3e8c715 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -437,9 +437,9 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH return CORINFO_TYPE_UNDEF; } - if (!compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + if (!compOpportunisticallyDependsOn(InstructionSet_AVX512)) { - // We must treat as a regular struct if AVX512F isn't supported + // We must treat as a regular struct if AVX512 isn't supported return CORINFO_TYPE_UNDEF; } diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 896873d49da097..d0a86c8cdf4e28 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -7937,10 +7937,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(GenTreeHWIntrinsic* tree, #endif // TARGET_ARM64 #if defined(TARGET_XARCH) - case NI_AVX512CD_LeadingZeroCount: - case NI_AVX512CD_VL_LeadingZeroCount: - case NI_AVX10v1_V512_LeadingZeroCount: - case NI_AVX10v1_LeadingZeroCount: + case NI_AVX512_LeadingZeroCount: { return EvaluateUnarySimd(this, GT_LZCNT, /* scalar */ false, type, baseType, arg0VN); } diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index f552e32731ace1..9cf0feec10d2b2 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -80,11 +80,10 @@ private static class XArchIntrinsicConstants public const int Avx512Vbmi = 0x10000; public const int Serialize = 0x20000; public const int Avx10v1 = 0x40000; - public const int Evex = 0x80000; - public const int Apx = 0x100000; - public const int Vpclmulqdq = 0x200000; - public const int Avx10v2 = 0x400000; - public const int Gfni = 0x800000; + public const int Apx = 0x80000; + public const int Vpclmulqdq = 0x100000; + public const int Avx10v2 = 0x200000; + public const int Gfni = 0x400000; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { @@ -119,29 +118,13 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) if ((flags & Movbe) != 0) builder.AddSupportedInstructionSet("movbe"); if ((flags & Avx512) != 0) - { - builder.AddSupportedInstructionSet("avx512f"); - builder.AddSupportedInstructionSet("avx512f_vl"); - builder.AddSupportedInstructionSet("avx512bw"); - builder.AddSupportedInstructionSet("avx512bw_vl"); - builder.AddSupportedInstructionSet("avx512cd"); - builder.AddSupportedInstructionSet("avx512cd_vl"); - builder.AddSupportedInstructionSet("avx512dq"); - builder.AddSupportedInstructionSet("avx512dq_vl"); - } + builder.AddSupportedInstructionSet("avx512"); if ((flags & Avx512Vbmi) != 0) - { builder.AddSupportedInstructionSet("avx512vbmi"); - builder.AddSupportedInstructionSet("avx512vbmi_vl"); - } if ((flags & Serialize) != 0) builder.AddSupportedInstructionSet("serialize"); if ((flags & Avx10v1) != 0) builder.AddSupportedInstructionSet("avx10v1"); - if (((flags & Avx10v1) != 0) && ((flags & Avx512) != 0)) - builder.AddSupportedInstructionSet("avx10v1_v512"); - if ((flags & Evex) != 0) - builder.AddSupportedInstructionSet("evex"); if ((flags & Apx) != 0) builder.AddSupportedInstructionSet("apx"); if ((flags & Vpclmulqdq) != 0) @@ -152,8 +135,6 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) } if ((flags & Avx10v2) != 0) builder.AddSupportedInstructionSet("avx10v2"); - if (((flags & Avx10v2) != 0) && ((flags & Avx512) != 0)) - builder.AddSupportedInstructionSet("avx10v2_v512"); if ((flags & Gfni) != 0) { builder.AddSupportedInstructionSet("gfni"); @@ -202,35 +183,19 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVXVNNI => AvxVnni, InstructionSet.X64_AVXVNNI_X64 => AvxVnni, InstructionSet.X64_MOVBE => Movbe, - InstructionSet.X64_AVX512F => Avx512, - InstructionSet.X64_AVX512F_X64 => Avx512, - InstructionSet.X64_AVX512F_VL => Avx512, - InstructionSet.X64_AVX512BW => Avx512, - InstructionSet.X64_AVX512BW_X64 => Avx512, - InstructionSet.X64_AVX512BW_VL => Avx512, - InstructionSet.X64_AVX512CD => Avx512, - InstructionSet.X64_AVX512CD_X64 => Avx512, - InstructionSet.X64_AVX512CD_VL => Avx512, - InstructionSet.X64_AVX512DQ => Avx512, - InstructionSet.X64_AVX512DQ_X64 => Avx512, - InstructionSet.X64_AVX512DQ_VL => Avx512, + InstructionSet.X64_AVX512 => Avx512, + InstructionSet.X64_AVX512_X64 => Avx512, InstructionSet.X64_AVX512VBMI => Avx512Vbmi, InstructionSet.X64_AVX512VBMI_X64 => Avx512Vbmi, - InstructionSet.X64_AVX512VBMI_VL => Avx512Vbmi, InstructionSet.X64_X86Serialize => Serialize, InstructionSet.X64_X86Serialize_X64 => Serialize, InstructionSet.X64_AVX10v1 => Avx10v1, InstructionSet.X64_AVX10v1_X64 => Avx10v1, - InstructionSet.X64_AVX10v1_V512 => (Avx10v1 | Avx512), - InstructionSet.X64_AVX10v1_V512_X64 => (Avx10v1 | Avx512), - InstructionSet.X64_EVEX => Evex, InstructionSet.X64_APX => Apx, InstructionSet.X64_PCLMULQDQ_V256 => Vpclmulqdq, InstructionSet.X64_PCLMULQDQ_V512 => (Vpclmulqdq | Avx512), InstructionSet.X64_AVX10v2 => Avx10v2, InstructionSet.X64_AVX10v2_X64 => Avx10v2, - InstructionSet.X64_AVX10v2_V512 => (Avx10v2 | Avx512), - InstructionSet.X64_AVX10v2_V512_X64 => (Avx10v2 | Avx512), InstructionSet.X64_GFNI => Gfni, InstructionSet.X64_GFNI_X64 => Gfni, InstructionSet.X64_GFNI_V256 => (Gfni | Avx), diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index de972aced7c565..2ead9f342c1678 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -332,33 +332,9 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, if (_supportedInstructionSets.Any(iSet => iSet.Contains("avx512"))) { - // We can simply try adding all of the AVX-512 ISA's here, - // since SortedSet just ignores the value if it is already present. - - _supportedInstructionSets.Add("avx512f"); - _supportedInstructionSets.Add("avx512f_vl"); - _supportedInstructionSets.Add("avx512bw"); - _supportedInstructionSets.Add("avx512bw_vl"); - _supportedInstructionSets.Add("avx512cd"); - _supportedInstructionSets.Add("avx512cd_vl"); - _supportedInstructionSets.Add("avx512dq"); - _supportedInstructionSets.Add("avx512dq_vl"); - - // If AVX-512VBMI is specified, then we have to include its VL - // counterpart as well. - - if (_supportedInstructionSets.Contains("avx512vbmi")) - _supportedInstructionSets.Add("avx512vbmi_vl"); - // These ISAs should automatically extend to 512-bit if // AVX-512 is enabled. - if (_supportedInstructionSets.Contains("avx10v1")) - _supportedInstructionSets.Add("avx10v1_v512"); - - if (_supportedInstructionSets.Contains("avx10v2")) - _supportedInstructionSets.Add("avx10v2_v512"); - if (_supportedInstructionSets.Contains("gfni")) _supportedInstructionSets.Add("gfni_v512"); @@ -417,7 +393,7 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, { Debug.Assert(InstructionSet.X86_SSE2 == InstructionSet.X64_SSE2); Debug.Assert(InstructionSet.X86_AVX2 == InstructionSet.X64_AVX2); - Debug.Assert(InstructionSet.X86_AVX512F == InstructionSet.X64_AVX512F); + Debug.Assert(InstructionSet.X86_AVX512 == InstructionSet.X64_AVX512); Debug.Assert(InstructionSet.X86_VectorT128 == InstructionSet.X64_VectorT128); Debug.Assert(InstructionSet.X86_VectorT256 == InstructionSet.X64_VectorT256); @@ -430,7 +406,7 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, Debug.Assert((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 128)); supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128); - if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX512F) && (maxVectorTBitWidth >= 512)) + if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX512) && (maxVectorTBitWidth >= 512)) { supportedInstructionSets.RemoveInstructionSet(InstructionSet.X86_VectorT128); supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT512); diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index 3ba09ed8107370..e7994faeebff0b 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -218,17 +218,9 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru } } - Debug.Assert(InstructionSet.X64_AVX512F == InstructionSet.X86_AVX512F); - if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512F)) + Debug.Assert(InstructionSet.X64_AVX512 == InstructionSet.X86_AVX512); + if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512)) { - Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512F_VL)); - Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512BW)); - Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)); - Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512CD)); - Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)); - Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512DQ)); - Debug.Assert(supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vbmi"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vbmi_vl"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v1"); @@ -258,8 +250,8 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru if (throttleAvx512) { - Debug.Assert(InstructionSet.X86_AVX512F == InstructionSet.X64_AVX512F); - if (supportedInstructionSet.HasInstructionSet(InstructionSet.X86_AVX512F)) + Debug.Assert(InstructionSet.X86_AVX512 == InstructionSet.X64_AVX512); + if (supportedInstructionSet.HasInstructionSet(InstructionSet.X86_AVX512)) { Debug.Assert(InstructionSet.X86_Vector256 == InstructionSet.X64_Vector256); Debug.Assert(InstructionSet.X86_VectorT256 == InstructionSet.X64_VectorT256); diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index a2ab0bd6d39a25..adc572ce81a754 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -40,29 +40,18 @@ public enum ReadyToRunInstructionSet Rcpc=26, Movbe=27, X86Serialize=28, - Avx512F=29, - Avx512F_VL=30, - Avx512BW=31, - Avx512BW_VL=32, - Avx512CD=33, - Avx512CD_VL=34, - Avx512DQ=35, - Avx512DQ_VL=36, + Avx512=29, Avx512Vbmi=37, - Avx512Vbmi_VL=38, VectorT128=39, VectorT256=40, VectorT512=41, Rcpc2=42, Sve=43, Avx10v1=44, - Avx10v1_V512=46, - EVEX=47, Apx=48, Pclmulqdq_V256=49, Pclmulqdq_V512=50, Avx10v2=51, - Avx10v2_V512=52, Gfni=53, Gfni_V256=54, Gfni_V512=55, diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 36847ce414ed99..0ec9eca25e89e6 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -113,34 +113,18 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_MOVBE: return ReadyToRunInstructionSet.Movbe; case InstructionSet.X64_X86Serialize: return ReadyToRunInstructionSet.X86Serialize; case InstructionSet.X64_X86Serialize_X64: return ReadyToRunInstructionSet.X86Serialize; - case InstructionSet.X64_EVEX: return ReadyToRunInstructionSet.EVEX; - case InstructionSet.X64_AVX512F: return ReadyToRunInstructionSet.Avx512F; - case InstructionSet.X64_AVX512F_X64: return ReadyToRunInstructionSet.Avx512F; - case InstructionSet.X64_AVX512F_VL: return ReadyToRunInstructionSet.Avx512F_VL; - case InstructionSet.X64_AVX512BW: return ReadyToRunInstructionSet.Avx512BW; - case InstructionSet.X64_AVX512BW_X64: return ReadyToRunInstructionSet.Avx512BW; - case InstructionSet.X64_AVX512BW_VL: return ReadyToRunInstructionSet.Avx512BW_VL; - case InstructionSet.X64_AVX512CD: return ReadyToRunInstructionSet.Avx512CD; - case InstructionSet.X64_AVX512CD_X64: return ReadyToRunInstructionSet.Avx512CD; - case InstructionSet.X64_AVX512CD_VL: return ReadyToRunInstructionSet.Avx512CD_VL; - case InstructionSet.X64_AVX512DQ: return ReadyToRunInstructionSet.Avx512DQ; - case InstructionSet.X64_AVX512DQ_X64: return ReadyToRunInstructionSet.Avx512DQ; - case InstructionSet.X64_AVX512DQ_VL: return ReadyToRunInstructionSet.Avx512DQ_VL; + case InstructionSet.X64_AVX512: return ReadyToRunInstructionSet.Avx512; + case InstructionSet.X64_AVX512_X64: return ReadyToRunInstructionSet.Avx512; case InstructionSet.X64_AVX512VBMI: return ReadyToRunInstructionSet.Avx512Vbmi; case InstructionSet.X64_AVX512VBMI_X64: return ReadyToRunInstructionSet.Avx512Vbmi; - case InstructionSet.X64_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL; case InstructionSet.X64_AVX10v1: return ReadyToRunInstructionSet.Avx10v1; case InstructionSet.X64_AVX10v1_X64: return ReadyToRunInstructionSet.Avx10v1; - case InstructionSet.X64_AVX10v1_V512: return ReadyToRunInstructionSet.Avx10v1_V512; - case InstructionSet.X64_AVX10v1_V512_X64: return ReadyToRunInstructionSet.Avx10v1_V512; case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512; case InstructionSet.X64_APX: return ReadyToRunInstructionSet.Apx; case InstructionSet.X64_AVX10v2: return ReadyToRunInstructionSet.Avx10v2; case InstructionSet.X64_AVX10v2_X64: return ReadyToRunInstructionSet.Avx10v2; - case InstructionSet.X64_AVX10v2_V512: return ReadyToRunInstructionSet.Avx10v2_V512; - case InstructionSet.X64_AVX10v2_V512_X64: return ReadyToRunInstructionSet.Avx10v2_V512; case InstructionSet.X64_GFNI: return ReadyToRunInstructionSet.Gfni; case InstructionSet.X64_GFNI_X64: return ReadyToRunInstructionSet.Gfni; case InstructionSet.X64_GFNI_V256: return ReadyToRunInstructionSet.Gfni_V256; @@ -196,34 +180,18 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_MOVBE: return ReadyToRunInstructionSet.Movbe; case InstructionSet.X86_X86Serialize: return ReadyToRunInstructionSet.X86Serialize; case InstructionSet.X86_X86Serialize_X64: return null; - case InstructionSet.X86_EVEX: return ReadyToRunInstructionSet.EVEX; - case InstructionSet.X86_AVX512F: return ReadyToRunInstructionSet.Avx512F; - case InstructionSet.X86_AVX512F_X64: return null; - case InstructionSet.X86_AVX512F_VL: return ReadyToRunInstructionSet.Avx512F_VL; - case InstructionSet.X86_AVX512BW: return ReadyToRunInstructionSet.Avx512BW; - case InstructionSet.X86_AVX512BW_X64: return null; - case InstructionSet.X86_AVX512BW_VL: return ReadyToRunInstructionSet.Avx512BW_VL; - case InstructionSet.X86_AVX512CD: return ReadyToRunInstructionSet.Avx512CD; - case InstructionSet.X86_AVX512CD_X64: return null; - case InstructionSet.X86_AVX512CD_VL: return ReadyToRunInstructionSet.Avx512CD_VL; - case InstructionSet.X86_AVX512DQ: return ReadyToRunInstructionSet.Avx512DQ; - case InstructionSet.X86_AVX512DQ_X64: return null; - case InstructionSet.X86_AVX512DQ_VL: return ReadyToRunInstructionSet.Avx512DQ_VL; + case InstructionSet.X86_AVX512: return ReadyToRunInstructionSet.Avx512; + case InstructionSet.X86_AVX512_X64: return null; case InstructionSet.X86_AVX512VBMI: return ReadyToRunInstructionSet.Avx512Vbmi; case InstructionSet.X86_AVX512VBMI_X64: return null; - case InstructionSet.X86_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL; case InstructionSet.X86_AVX10v1: return ReadyToRunInstructionSet.Avx10v1; case InstructionSet.X86_AVX10v1_X64: return null; - case InstructionSet.X86_AVX10v1_V512: return ReadyToRunInstructionSet.Avx10v1_V512; - case InstructionSet.X86_AVX10v1_V512_X64: return null; case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128; case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256; case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512; case InstructionSet.X86_APX: return ReadyToRunInstructionSet.Apx; case InstructionSet.X86_AVX10v2: return ReadyToRunInstructionSet.Avx10v2; case InstructionSet.X86_AVX10v2_X64: return null; - case InstructionSet.X86_AVX10v2_V512: return ReadyToRunInstructionSet.Avx10v2_V512; - case InstructionSet.X86_AVX10v2_V512_X64: return null; case InstructionSet.X86_GFNI: return ReadyToRunInstructionSet.Gfni; case InstructionSet.X86_GFNI_X64: return null; case InstructionSet.X86_GFNI_V256: return ReadyToRunInstructionSet.Gfni_V256; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 758d50e1706507..b9059768c55984 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -70,25 +70,14 @@ public enum InstructionSet X64_AVXVNNI = InstructionSet_X64.AVXVNNI, X64_MOVBE = InstructionSet_X64.MOVBE, X64_X86Serialize = InstructionSet_X64.X86Serialize, - X64_EVEX = InstructionSet_X64.EVEX, - X64_AVX512F = InstructionSet_X64.AVX512F, - X64_AVX512F_VL = InstructionSet_X64.AVX512F_VL, - X64_AVX512BW = InstructionSet_X64.AVX512BW, - X64_AVX512BW_VL = InstructionSet_X64.AVX512BW_VL, - X64_AVX512CD = InstructionSet_X64.AVX512CD, - X64_AVX512CD_VL = InstructionSet_X64.AVX512CD_VL, - X64_AVX512DQ = InstructionSet_X64.AVX512DQ, - X64_AVX512DQ_VL = InstructionSet_X64.AVX512DQ_VL, + X64_AVX512 = InstructionSet_X64.AVX512, X64_AVX512VBMI = InstructionSet_X64.AVX512VBMI, - X64_AVX512VBMI_VL = InstructionSet_X64.AVX512VBMI_VL, X64_AVX10v1 = InstructionSet_X64.AVX10v1, - X64_AVX10v1_V512 = InstructionSet_X64.AVX10v1_V512, X64_VectorT128 = InstructionSet_X64.VectorT128, X64_VectorT256 = InstructionSet_X64.VectorT256, X64_VectorT512 = InstructionSet_X64.VectorT512, X64_APX = InstructionSet_X64.APX, X64_AVX10v2 = InstructionSet_X64.AVX10v2, - X64_AVX10v2_V512 = InstructionSet_X64.AVX10v2_V512, X64_GFNI = InstructionSet_X64.GFNI, X64_GFNI_V256 = InstructionSet_X64.GFNI_V256, X64_GFNI_V512 = InstructionSet_X64.GFNI_V512, @@ -110,15 +99,10 @@ public enum InstructionSet X64_POPCNT_X64 = InstructionSet_X64.POPCNT_X64, X64_AVXVNNI_X64 = InstructionSet_X64.AVXVNNI_X64, X64_X86Serialize_X64 = InstructionSet_X64.X86Serialize_X64, - X64_AVX512F_X64 = InstructionSet_X64.AVX512F_X64, - X64_AVX512BW_X64 = InstructionSet_X64.AVX512BW_X64, - X64_AVX512CD_X64 = InstructionSet_X64.AVX512CD_X64, - X64_AVX512DQ_X64 = InstructionSet_X64.AVX512DQ_X64, + X64_AVX512_X64 = InstructionSet_X64.AVX512_X64, X64_AVX512VBMI_X64 = InstructionSet_X64.AVX512VBMI_X64, X64_AVX10v1_X64 = InstructionSet_X64.AVX10v1_X64, - X64_AVX10v1_V512_X64 = InstructionSet_X64.AVX10v1_V512_X64, X64_AVX10v2_X64 = InstructionSet_X64.AVX10v2_X64, - X64_AVX10v2_V512_X64 = InstructionSet_X64.AVX10v2_V512_X64, X64_GFNI_X64 = InstructionSet_X64.GFNI_X64, X86_X86Base = InstructionSet_X86.X86Base, X86_SSE = InstructionSet_X86.SSE, @@ -144,25 +128,14 @@ public enum InstructionSet X86_AVXVNNI = InstructionSet_X86.AVXVNNI, X86_MOVBE = InstructionSet_X86.MOVBE, X86_X86Serialize = InstructionSet_X86.X86Serialize, - X86_EVEX = InstructionSet_X86.EVEX, - X86_AVX512F = InstructionSet_X86.AVX512F, - X86_AVX512F_VL = InstructionSet_X86.AVX512F_VL, - X86_AVX512BW = InstructionSet_X86.AVX512BW, - X86_AVX512BW_VL = InstructionSet_X86.AVX512BW_VL, - X86_AVX512CD = InstructionSet_X86.AVX512CD, - X86_AVX512CD_VL = InstructionSet_X86.AVX512CD_VL, - X86_AVX512DQ = InstructionSet_X86.AVX512DQ, - X86_AVX512DQ_VL = InstructionSet_X86.AVX512DQ_VL, + X86_AVX512 = InstructionSet_X86.AVX512, X86_AVX512VBMI = InstructionSet_X86.AVX512VBMI, - X86_AVX512VBMI_VL = InstructionSet_X86.AVX512VBMI_VL, X86_AVX10v1 = InstructionSet_X86.AVX10v1, - X86_AVX10v1_V512 = InstructionSet_X86.AVX10v1_V512, X86_VectorT128 = InstructionSet_X86.VectorT128, X86_VectorT256 = InstructionSet_X86.VectorT256, X86_VectorT512 = InstructionSet_X86.VectorT512, X86_APX = InstructionSet_X86.APX, X86_AVX10v2 = InstructionSet_X86.AVX10v2, - X86_AVX10v2_V512 = InstructionSet_X86.AVX10v2_V512, X86_GFNI = InstructionSet_X86.GFNI, X86_GFNI_V256 = InstructionSet_X86.GFNI_V256, X86_GFNI_V512 = InstructionSet_X86.GFNI_V512, @@ -184,15 +157,10 @@ public enum InstructionSet X86_POPCNT_X64 = InstructionSet_X86.POPCNT_X64, X86_AVXVNNI_X64 = InstructionSet_X86.AVXVNNI_X64, X86_X86Serialize_X64 = InstructionSet_X86.X86Serialize_X64, - X86_AVX512F_X64 = InstructionSet_X86.AVX512F_X64, - X86_AVX512BW_X64 = InstructionSet_X86.AVX512BW_X64, - X86_AVX512CD_X64 = InstructionSet_X86.AVX512CD_X64, - X86_AVX512DQ_X64 = InstructionSet_X86.AVX512DQ_X64, + X86_AVX512_X64 = InstructionSet_X86.AVX512_X64, X86_AVX512VBMI_X64 = InstructionSet_X86.AVX512VBMI_X64, X86_AVX10v1_X64 = InstructionSet_X86.AVX10v1_X64, - X86_AVX10v1_V512_X64 = InstructionSet_X86.AVX10v1_V512_X64, X86_AVX10v2_X64 = InstructionSet_X86.AVX10v2_X64, - X86_AVX10v2_V512_X64 = InstructionSet_X86.AVX10v2_V512_X64, X86_GFNI_X64 = InstructionSet_X86.GFNI_X64, } public enum InstructionSet_ARM64 @@ -265,56 +233,40 @@ public enum InstructionSet_X64 AVXVNNI = 22, MOVBE = 23, X86Serialize = 24, - EVEX = 25, - AVX512F = 26, - AVX512F_VL = 27, - AVX512BW = 28, - AVX512BW_VL = 29, - AVX512CD = 30, - AVX512CD_VL = 31, - AVX512DQ = 32, - AVX512DQ_VL = 33, - AVX512VBMI = 34, - AVX512VBMI_VL = 35, - AVX10v1 = 36, - AVX10v1_V512 = 37, - VectorT128 = 38, - VectorT256 = 39, - VectorT512 = 40, - APX = 41, - AVX10v2 = 42, - AVX10v2_V512 = 43, - GFNI = 44, - GFNI_V256 = 45, - GFNI_V512 = 46, - X86Base_X64 = 47, - SSE_X64 = 48, - SSE2_X64 = 49, - SSE3_X64 = 50, - SSSE3_X64 = 51, - SSE41_X64 = 52, - SSE42_X64 = 53, - AVX_X64 = 54, - AVX2_X64 = 55, - AES_X64 = 56, - BMI1_X64 = 57, - BMI2_X64 = 58, - FMA_X64 = 59, - LZCNT_X64 = 60, - PCLMULQDQ_X64 = 61, - POPCNT_X64 = 62, - AVXVNNI_X64 = 63, - X86Serialize_X64 = 64, - AVX512F_X64 = 65, - AVX512BW_X64 = 66, - AVX512CD_X64 = 67, - AVX512DQ_X64 = 68, - AVX512VBMI_X64 = 69, - AVX10v1_X64 = 70, - AVX10v1_V512_X64 = 71, - AVX10v2_X64 = 72, - AVX10v2_V512_X64 = 73, - GFNI_X64 = 74, + AVX512 = 25, + AVX512VBMI = 26, + AVX10v1 = 27, + VectorT128 = 28, + VectorT256 = 29, + VectorT512 = 30, + APX = 31, + AVX10v2 = 32, + GFNI = 33, + GFNI_V256 = 34, + GFNI_V512 = 35, + X86Base_X64 = 36, + SSE_X64 = 37, + SSE2_X64 = 38, + SSE3_X64 = 39, + SSSE3_X64 = 40, + SSE41_X64 = 41, + SSE42_X64 = 42, + AVX_X64 = 43, + AVX2_X64 = 44, + AES_X64 = 45, + BMI1_X64 = 46, + BMI2_X64 = 47, + FMA_X64 = 48, + LZCNT_X64 = 49, + PCLMULQDQ_X64 = 50, + POPCNT_X64 = 51, + AVXVNNI_X64 = 52, + X86Serialize_X64 = 53, + AVX512_X64 = 54, + AVX512VBMI_X64 = 55, + AVX10v1_X64 = 56, + AVX10v2_X64 = 57, + GFNI_X64 = 58, } public enum InstructionSet_X86 @@ -345,56 +297,40 @@ public enum InstructionSet_X86 AVXVNNI = 22, MOVBE = 23, X86Serialize = 24, - EVEX = 25, - AVX512F = 26, - AVX512F_VL = 27, - AVX512BW = 28, - AVX512BW_VL = 29, - AVX512CD = 30, - AVX512CD_VL = 31, - AVX512DQ = 32, - AVX512DQ_VL = 33, - AVX512VBMI = 34, - AVX512VBMI_VL = 35, - AVX10v1 = 36, - AVX10v1_V512 = 37, - VectorT128 = 38, - VectorT256 = 39, - VectorT512 = 40, - APX = 41, - AVX10v2 = 42, - AVX10v2_V512 = 43, - GFNI = 44, - GFNI_V256 = 45, - GFNI_V512 = 46, - X86Base_X64 = 47, - SSE_X64 = 48, - SSE2_X64 = 49, - SSE3_X64 = 50, - SSSE3_X64 = 51, - SSE41_X64 = 52, - SSE42_X64 = 53, - AVX_X64 = 54, - AVX2_X64 = 55, - AES_X64 = 56, - BMI1_X64 = 57, - BMI2_X64 = 58, - FMA_X64 = 59, - LZCNT_X64 = 60, - PCLMULQDQ_X64 = 61, - POPCNT_X64 = 62, - AVXVNNI_X64 = 63, - X86Serialize_X64 = 64, - AVX512F_X64 = 65, - AVX512BW_X64 = 66, - AVX512CD_X64 = 67, - AVX512DQ_X64 = 68, - AVX512VBMI_X64 = 69, - AVX10v1_X64 = 70, - AVX10v1_V512_X64 = 71, - AVX10v2_X64 = 72, - AVX10v2_V512_X64 = 73, - GFNI_X64 = 74, + AVX512 = 25, + AVX512VBMI = 26, + AVX10v1 = 27, + VectorT128 = 28, + VectorT256 = 29, + VectorT512 = 30, + APX = 31, + AVX10v2 = 32, + GFNI = 33, + GFNI_V256 = 34, + GFNI_V512 = 35, + X86Base_X64 = 36, + SSE_X64 = 37, + SSE2_X64 = 38, + SSE3_X64 = 39, + SSSE3_X64 = 40, + SSE41_X64 = 41, + SSE42_X64 = 42, + AVX_X64 = 43, + AVX2_X64 = 44, + AES_X64 = 45, + BMI1_X64 = 46, + BMI2_X64 = 47, + FMA_X64 = 48, + LZCNT_X64 = 49, + PCLMULQDQ_X64 = 50, + POPCNT_X64 = 51, + AVXVNNI_X64 = 52, + X86Serialize_X64 = 53, + AVX512_X64 = 54, + AVX512VBMI_X64 = 55, + AVX10v1_X64 = 56, + AVX10v2_X64 = 57, + GFNI_X64 = 58, } public unsafe struct InstructionSetFlags : IEnumerable @@ -529,7 +465,7 @@ public static InstructionSet ConvertToImpliedInstructionSetForVectorInstructionS { case InstructionSet.X64_Vector128: return InstructionSet.X64_SSE; case InstructionSet.X64_Vector256: return InstructionSet.X64_AVX; - case InstructionSet.X64_Vector512: return InstructionSet.X64_AVX512F; + case InstructionSet.X64_Vector512: return InstructionSet.X64_AVX512; } break; case TargetArchitecture.X86: @@ -537,7 +473,7 @@ public static InstructionSet ConvertToImpliedInstructionSetForVectorInstructionS { case InstructionSet.X86_Vector128: return InstructionSet.X86_SSE; case InstructionSet.X86_Vector256: return InstructionSet.X86_AVX; - case InstructionSet.X86_Vector512: return InstructionSet.X86_AVX512F; + case InstructionSet.X86_Vector512: return InstructionSet.X86_AVX512; } break; } @@ -701,22 +637,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_X64); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_X64); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_X64); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_X64); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512_X64); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_X64)) @@ -725,18 +649,10 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512_X64); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) resultflags.AddInstructionSet(InstructionSet.X64_GFNI_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_X64)) @@ -769,38 +685,12 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_MOVBE)) resultflags.AddInstructionSet(InstructionSet.X64_SSE42); - if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX)) + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); - if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX)) + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512)) resultflags.AddInstructionSet(InstructionSet.X64_FMA); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_EVEX); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); + resultflags.AddInstructionSet(InstructionSet.X64_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X64_AES)) resultflags.AddInstructionSet(InstructionSet.X64_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ)) @@ -812,7 +702,7 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V512)) resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V256); if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + resultflags.AddInstructionSet(InstructionSet.X64_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize)) @@ -826,47 +716,25 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V512)) resultflags.AddInstructionSet(InstructionSet.X64_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + resultflags.AddInstructionSet(InstructionSet.X64_AVX512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1)) - resultflags.AddInstructionSet(InstructionSet.X64_EVEX); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector128)) resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector256)) resultflags.AddInstructionSet(InstructionSet.X64_AVX); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + resultflags.AddInstructionSet(InstructionSet.X64_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT128)) resultflags.AddInstructionSet(InstructionSet.X64_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT256)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); + resultflags.AddInstructionSet(InstructionSet.X64_AVX512); break; case TargetArchitecture.X86: @@ -898,38 +766,12 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_MOVBE)) resultflags.AddInstructionSet(InstructionSet.X86_SSE42); - if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX)) + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); - if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX)) + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512)) resultflags.AddInstructionSet(InstructionSet.X86_FMA); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_EVEX); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); + resultflags.AddInstructionSet(InstructionSet.X86_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X86_AES)) resultflags.AddInstructionSet(InstructionSet.X86_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ)) @@ -941,7 +783,7 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V512)) resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V256); if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + resultflags.AddInstructionSet(InstructionSet.X86_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Serialize)) @@ -955,47 +797,25 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V512)) resultflags.AddInstructionSet(InstructionSet.X86_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + resultflags.AddInstructionSet(InstructionSet.X86_AVX512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1)) - resultflags.AddInstructionSet(InstructionSet.X86_EVEX); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2)) resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector128)) resultflags.AddInstructionSet(InstructionSet.X86_SSE); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector256)) resultflags.AddInstructionSet(InstructionSet.X86_AVX); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + resultflags.AddInstructionSet(InstructionSet.X86_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT128)) resultflags.AddInstructionSet(InstructionSet.X86_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT256)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); + resultflags.AddInstructionSet(InstructionSet.X86_AVX512); break; } } while (!oldflags.Equals(resultflags)); @@ -1109,24 +929,14 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize_X64)) resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512_X64)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_X64)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2_V512_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_X64)) resultflags.AddInstructionSet(InstructionSet.X64_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) @@ -1158,37 +968,11 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42)) resultflags.AddInstructionSet(InstructionSet.X64_MOVBE); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) - resultflags.AddInstructionSet(InstructionSet.X64_EVEX); + resultflags.AddInstructionSet(InstructionSet.X64_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X64_FMA)) - resultflags.AddInstructionSet(InstructionSet.X64_EVEX); - if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) resultflags.AddInstructionSet(InstructionSet.X64_AES); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) @@ -1199,7 +983,7 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V256); if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V256)) resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512)) resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); @@ -1213,47 +997,25 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V256); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI)) resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512)) resultflags.AddInstructionSet(InstructionSet.X64_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX)) + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); + resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) resultflags.AddInstructionSet(InstructionSet.X64_Vector128); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) resultflags.AddInstructionSet(InstructionSet.X64_Vector256); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512)) resultflags.AddInstructionSet(InstructionSet.X64_Vector512); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) resultflags.AddInstructionSet(InstructionSet.X64_VectorT128); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_VectorT256); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512)) resultflags.AddInstructionSet(InstructionSet.X64_VectorT512); break; @@ -1287,37 +1049,11 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42)) resultflags.AddInstructionSet(InstructionSet.X86_MOVBE); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) - resultflags.AddInstructionSet(InstructionSet.X86_EVEX); + resultflags.AddInstructionSet(InstructionSet.X86_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X86_FMA)) - resultflags.AddInstructionSet(InstructionSet.X86_EVEX); - if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) resultflags.AddInstructionSet(InstructionSet.X86_AES); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) @@ -1328,7 +1064,7 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V256); if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V256)) resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512)) resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI); @@ -1342,47 +1078,25 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V256); if (resultflags.HasInstructionSet(InstructionSet.X86_GFNI)) resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512)) resultflags.AddInstructionSet(InstructionSet.X86_GFNI_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX)) + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); + resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1)) resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE)) resultflags.AddInstructionSet(InstructionSet.X86_Vector128); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) resultflags.AddInstructionSet(InstructionSet.X86_Vector256); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512)) resultflags.AddInstructionSet(InstructionSet.X86_Vector512); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) resultflags.AddInstructionSet(InstructionSet.X86_VectorT128); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_VectorT256); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512)) resultflags.AddInstructionSet(InstructionSet.X86_VectorT512); break; } @@ -1401,8 +1115,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe { ("x86-x64-v3", TargetArchitecture.X86), "x86-x64-v2 avx2 bmi bmi2 lzcnt movbe fma" }, { ("skylake", TargetArchitecture.X64), "x86-x64-v3" }, { ("skylake", TargetArchitecture.X86), "x86-x64-v3" }, - { ("x86-x64-v4", TargetArchitecture.X64), "x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl avx512dq avx512dq_vl" }, - { ("x86-x64-v4", TargetArchitecture.X86), "x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl avx512dq avx512dq_vl" }, + { ("x86-x64-v4", TargetArchitecture.X64), "x86-x64-v3 avx512" }, + { ("x86-x64-v4", TargetArchitecture.X86), "x86-x64-v3 avx512" }, { ("armv8-a", TargetArchitecture.ARM64), "neon" }, { ("armv8.1-a", TargetArchitecture.ARM64), "armv8-a lse crc rdma" }, { ("armv8.2-a", TargetArchitecture.ARM64), "armv8.1-a" }, @@ -1489,27 +1203,28 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("Vector256", "", InstructionSet.X64_Vector256, false); yield return new InstructionSetInfo("Vector512", "", InstructionSet.X64_Vector512, false); yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X64_AVXVNNI, true); - yield return new InstructionSetInfo("movbe", "Movbe", InstructionSet.X64_MOVBE, true); + yield return new InstructionSetInfo("movbe", "", InstructionSet.X64_MOVBE, true); yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X64_X86Serialize, true); - yield return new InstructionSetInfo("evex", "EVEX", InstructionSet.X64_EVEX, true); - yield return new InstructionSetInfo("avx512f", "Avx512F", InstructionSet.X64_AVX512F, true); - yield return new InstructionSetInfo("avx512f_vl", "Avx512F_VL", InstructionSet.X64_AVX512F_VL, true); - yield return new InstructionSetInfo("avx512bw", "Avx512BW", InstructionSet.X64_AVX512BW, true); - yield return new InstructionSetInfo("avx512bw_vl", "Avx512BW_VL", InstructionSet.X64_AVX512BW_VL, true); - yield return new InstructionSetInfo("avx512cd", "Avx512CD", InstructionSet.X64_AVX512CD, true); - yield return new InstructionSetInfo("avx512cd_vl", "Avx512CD_VL", InstructionSet.X64_AVX512CD_VL, true); - yield return new InstructionSetInfo("avx512dq", "Avx512DQ", InstructionSet.X64_AVX512DQ, true); - yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X64_AVX512DQ_VL, true); + yield return new InstructionSetInfo("avx512", "", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("evex", "", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512f", "Avx512F", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512f_vl", "Avx512F_VL", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512bw", "Avx512BW", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512bw_vl", "Avx512BW_VL", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512cd", "Avx512CD", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512cd_vl", "Avx512CD_VL", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512dq", "Avx512DQ", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X64_AVX512, true); yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X64_AVX512VBMI, true); - yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X64_AVX512VBMI_VL, true); + yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X64_AVX512VBMI, true); yield return new InstructionSetInfo("avx10v1", "Avx10v1", InstructionSet.X64_AVX10v1, true); - yield return new InstructionSetInfo("avx10v1_v512", "Avx10v1_V512", InstructionSet.X64_AVX10v1_V512, true); - yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X64_VectorT128, true); - yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X64_VectorT256, true); - yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X64_VectorT512, true); - yield return new InstructionSetInfo("apx", "Apx", InstructionSet.X64_APX, true); + yield return new InstructionSetInfo("avx10v1_v512", "Avx10v1_V512", InstructionSet.X64_AVX10v1, true); + yield return new InstructionSetInfo("vectort128", "", InstructionSet.X64_VectorT128, true); + yield return new InstructionSetInfo("vectort256", "", InstructionSet.X64_VectorT256, true); + yield return new InstructionSetInfo("vectort512", "", InstructionSet.X64_VectorT512, true); + yield return new InstructionSetInfo("apx", "", InstructionSet.X64_APX, true); yield return new InstructionSetInfo("avx10v2", "Avx10v2", InstructionSet.X64_AVX10v2, true); - yield return new InstructionSetInfo("avx10v2_v512", "Avx10v2_V512", InstructionSet.X64_AVX10v2_V512, true); + yield return new InstructionSetInfo("avx10v2_v512", "Avx10v2_V512", InstructionSet.X64_AVX10v2, true); yield return new InstructionSetInfo("gfni", "Gfni", InstructionSet.X64_GFNI, true); yield return new InstructionSetInfo("gfni_v256", "Gfni_V256", InstructionSet.X64_GFNI_V256, true); yield return new InstructionSetInfo("gfni_v512", "Gfni_V512", InstructionSet.X64_GFNI_V512, true); @@ -1538,27 +1253,28 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("Vector256", "", InstructionSet.X86_Vector256, false); yield return new InstructionSetInfo("Vector512", "", InstructionSet.X86_Vector512, false); yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X86_AVXVNNI, true); - yield return new InstructionSetInfo("movbe", "Movbe", InstructionSet.X86_MOVBE, true); + yield return new InstructionSetInfo("movbe", "", InstructionSet.X86_MOVBE, true); yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X86_X86Serialize, true); - yield return new InstructionSetInfo("evex", "EVEX", InstructionSet.X86_EVEX, true); - yield return new InstructionSetInfo("avx512f", "Avx512F", InstructionSet.X86_AVX512F, true); - yield return new InstructionSetInfo("avx512f_vl", "Avx512F_VL", InstructionSet.X86_AVX512F_VL, true); - yield return new InstructionSetInfo("avx512bw", "Avx512BW", InstructionSet.X86_AVX512BW, true); - yield return new InstructionSetInfo("avx512bw_vl", "Avx512BW_VL", InstructionSet.X86_AVX512BW_VL, true); - yield return new InstructionSetInfo("avx512cd", "Avx512CD", InstructionSet.X86_AVX512CD, true); - yield return new InstructionSetInfo("avx512cd_vl", "Avx512CD_VL", InstructionSet.X86_AVX512CD_VL, true); - yield return new InstructionSetInfo("avx512dq", "Avx512DQ", InstructionSet.X86_AVX512DQ, true); - yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X86_AVX512DQ_VL, true); + yield return new InstructionSetInfo("avx512", "", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("evex", "", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512f", "Avx512F", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512f_vl", "Avx512F_VL", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512bw", "Avx512BW", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512bw_vl", "Avx512BW_VL", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512cd", "Avx512CD", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512cd_vl", "Avx512CD_VL", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512dq", "Avx512DQ", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X86_AVX512, true); yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X86_AVX512VBMI, true); - yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X86_AVX512VBMI_VL, true); + yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X86_AVX512VBMI, true); yield return new InstructionSetInfo("avx10v1", "Avx10v1", InstructionSet.X86_AVX10v1, true); - yield return new InstructionSetInfo("avx10v1_v512", "Avx10v1_V512", InstructionSet.X86_AVX10v1_V512, true); - yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X86_VectorT128, true); - yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X86_VectorT256, true); - yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X86_VectorT512, true); - yield return new InstructionSetInfo("apx", "Apx", InstructionSet.X86_APX, true); + yield return new InstructionSetInfo("avx10v1_v512", "Avx10v1_V512", InstructionSet.X86_AVX10v1, true); + yield return new InstructionSetInfo("vectort128", "", InstructionSet.X86_VectorT128, true); + yield return new InstructionSetInfo("vectort256", "", InstructionSet.X86_VectorT256, true); + yield return new InstructionSetInfo("vectort512", "", InstructionSet.X86_VectorT512, true); + yield return new InstructionSetInfo("apx", "", InstructionSet.X86_APX, true); yield return new InstructionSetInfo("avx10v2", "Avx10v2", InstructionSet.X86_AVX10v2, true); - yield return new InstructionSetInfo("avx10v2_v512", "Avx10v2_V512", InstructionSet.X86_AVX10v2_V512, true); + yield return new InstructionSetInfo("avx10v2_v512", "Avx10v2_V512", InstructionSet.X86_AVX10v2, true); yield return new InstructionSetInfo("gfni", "Gfni", InstructionSet.X86_GFNI, true); yield return new InstructionSetInfo("gfni_v256", "Gfni_V256", InstructionSet.X86_GFNI_V256, true); yield return new InstructionSetInfo("gfni_v512", "Gfni_V512", InstructionSet.X86_GFNI_V512, true); @@ -1634,24 +1350,14 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); if (HasInstructionSet(InstructionSet.X64_X86Serialize)) AddInstructionSet(InstructionSet.X64_X86Serialize_X64); - if (HasInstructionSet(InstructionSet.X64_AVX512F)) - AddInstructionSet(InstructionSet.X64_AVX512F_X64); - if (HasInstructionSet(InstructionSet.X64_AVX512BW)) - AddInstructionSet(InstructionSet.X64_AVX512BW_X64); - if (HasInstructionSet(InstructionSet.X64_AVX512CD)) - AddInstructionSet(InstructionSet.X64_AVX512CD_X64); - if (HasInstructionSet(InstructionSet.X64_AVX512DQ)) - AddInstructionSet(InstructionSet.X64_AVX512DQ_X64); + if (HasInstructionSet(InstructionSet.X64_AVX512)) + AddInstructionSet(InstructionSet.X64_AVX512_X64); if (HasInstructionSet(InstructionSet.X64_AVX512VBMI)) AddInstructionSet(InstructionSet.X64_AVX512VBMI_X64); if (HasInstructionSet(InstructionSet.X64_AVX10v1)) AddInstructionSet(InstructionSet.X64_AVX10v1_X64); - if (HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) - AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); if (HasInstructionSet(InstructionSet.X64_AVX10v2)) AddInstructionSet(InstructionSet.X64_AVX10v2_X64); - if (HasInstructionSet(InstructionSet.X64_AVX10v2_V512)) - AddInstructionSet(InstructionSet.X64_AVX10v2_V512_X64); if (HasInstructionSet(InstructionSet.X64_GFNI)) AddInstructionSet(InstructionSet.X64_GFNI_X64); break; @@ -1701,15 +1407,10 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X64_POPCNT_X64); AddInstructionSet(InstructionSet.X64_AVXVNNI_X64); AddInstructionSet(InstructionSet.X64_X86Serialize_X64); - AddInstructionSet(InstructionSet.X64_AVX512F_X64); - AddInstructionSet(InstructionSet.X64_AVX512BW_X64); - AddInstructionSet(InstructionSet.X64_AVX512CD_X64); - AddInstructionSet(InstructionSet.X64_AVX512DQ_X64); + AddInstructionSet(InstructionSet.X64_AVX512_X64); AddInstructionSet(InstructionSet.X64_AVX512VBMI_X64); AddInstructionSet(InstructionSet.X64_AVX10v1_X64); - AddInstructionSet(InstructionSet.X64_AVX10v1_V512_X64); AddInstructionSet(InstructionSet.X64_AVX10v2_X64); - AddInstructionSet(InstructionSet.X64_AVX10v2_V512_X64); AddInstructionSet(InstructionSet.X64_GFNI_X64); break; @@ -1732,15 +1433,10 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc AddInstructionSet(InstructionSet.X86_POPCNT_X64); AddInstructionSet(InstructionSet.X86_AVXVNNI_X64); AddInstructionSet(InstructionSet.X86_X86Serialize_X64); - AddInstructionSet(InstructionSet.X86_AVX512F_X64); - AddInstructionSet(InstructionSet.X86_AVX512BW_X64); - AddInstructionSet(InstructionSet.X86_AVX512CD_X64); - AddInstructionSet(InstructionSet.X86_AVX512DQ_X64); + AddInstructionSet(InstructionSet.X86_AVX512_X64); AddInstructionSet(InstructionSet.X86_AVX512VBMI_X64); AddInstructionSet(InstructionSet.X86_AVX10v1_X64); - AddInstructionSet(InstructionSet.X86_AVX10v1_V512_X64); AddInstructionSet(InstructionSet.X86_AVX10v2_X64); - AddInstructionSet(InstructionSet.X86_AVX10v2_V512_X64); AddInstructionSet(InstructionSet.X86_GFNI_X64); break; } @@ -1987,60 +1683,69 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X64_AVXVNNI; } - case "Movbe": - { return InstructionSet.X64_MOVBE; } - case "X86Serialize": if (nestedTypeName == "X64") { return InstructionSet.X64_X86Serialize_X64; } else { return InstructionSet.X64_X86Serialize; } - case "EVEX": - { return InstructionSet.X64_EVEX; } - case "Avx512F": if (nestedTypeName == "X64") - { return InstructionSet.X64_AVX512F_X64; } + { return InstructionSet.X64_AVX512_X64; } + else + if (nestedTypeName == "VL_X64") + { return InstructionSet.X64_AVX512_X64; } else if (nestedTypeName == "VL") - { return InstructionSet.X64_AVX512F_VL; } + { return InstructionSet.X64_AVX512; } else - { return InstructionSet.X64_AVX512F; } + { return InstructionSet.X64_AVX512; } case "Avx512BW": if (nestedTypeName == "X64") - { return InstructionSet.X64_AVX512BW_X64; } + { return InstructionSet.X64_AVX512_X64; } + else + if (nestedTypeName == "VL_X64") + { return InstructionSet.X64_AVX512_X64; } else if (nestedTypeName == "VL") - { return InstructionSet.X64_AVX512BW_VL; } + { return InstructionSet.X64_AVX512; } else - { return InstructionSet.X64_AVX512BW; } + { return InstructionSet.X64_AVX512; } case "Avx512CD": if (nestedTypeName == "X64") - { return InstructionSet.X64_AVX512CD_X64; } + { return InstructionSet.X64_AVX512_X64; } + else + if (nestedTypeName == "VL_X64") + { return InstructionSet.X64_AVX512_X64; } else if (nestedTypeName == "VL") - { return InstructionSet.X64_AVX512CD_VL; } + { return InstructionSet.X64_AVX512; } else - { return InstructionSet.X64_AVX512CD; } + { return InstructionSet.X64_AVX512; } case "Avx512DQ": if (nestedTypeName == "X64") - { return InstructionSet.X64_AVX512DQ_X64; } + { return InstructionSet.X64_AVX512_X64; } + else + if (nestedTypeName == "VL_X64") + { return InstructionSet.X64_AVX512_X64; } else if (nestedTypeName == "VL") - { return InstructionSet.X64_AVX512DQ_VL; } + { return InstructionSet.X64_AVX512; } else - { return InstructionSet.X64_AVX512DQ; } + { return InstructionSet.X64_AVX512; } case "Avx512Vbmi": if (nestedTypeName == "X64") { return InstructionSet.X64_AVX512VBMI_X64; } else + if (nestedTypeName == "VL_X64") + { return InstructionSet.X64_AVX512VBMI_X64; } + else if (nestedTypeName == "VL") - { return InstructionSet.X64_AVX512VBMI_VL; } + { return InstructionSet.X64_AVX512VBMI; } else { return InstructionSet.X64_AVX512VBMI; } @@ -2049,34 +1754,22 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite { return InstructionSet.X64_AVX10v1_X64; } else if (nestedTypeName == "V512_X64") - { return InstructionSet.X64_AVX10v1_V512_X64; } + { return InstructionSet.X64_AVX10v1_X64; } else if (nestedTypeName == "V512") - { return InstructionSet.X64_AVX10v1_V512; } + { return InstructionSet.X64_AVX10v1; } else { return InstructionSet.X64_AVX10v1; } - case "VectorT128": - { return InstructionSet.X64_VectorT128; } - - case "VectorT256": - { return InstructionSet.X64_VectorT256; } - - case "VectorT512": - { return InstructionSet.X64_VectorT512; } - - case "Apx": - { return InstructionSet.X64_APX; } - case "Avx10v2": if (nestedTypeName == "X64") { return InstructionSet.X64_AVX10v2_X64; } else if (nestedTypeName == "V512_X64") - { return InstructionSet.X64_AVX10v2_V512_X64; } + { return InstructionSet.X64_AVX10v2_X64; } else if (nestedTypeName == "V512") - { return InstructionSet.X64_AVX10v2_V512; } + { return InstructionSet.X64_AVX10v2; } else { return InstructionSet.X64_AVX10v2; } @@ -2156,66 +1849,48 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "AvxVnni": { return InstructionSet.X86_AVXVNNI; } - case "Movbe": - { return InstructionSet.X86_MOVBE; } - case "X86Serialize": { return InstructionSet.X86_X86Serialize; } - case "EVEX": - { return InstructionSet.X86_EVEX; } - case "Avx512F": if (nestedTypeName == "VL") - { return InstructionSet.X86_AVX512F_VL; } + { return InstructionSet.X86_AVX512; } else - { return InstructionSet.X86_AVX512F; } + { return InstructionSet.X86_AVX512; } case "Avx512BW": if (nestedTypeName == "VL") - { return InstructionSet.X86_AVX512BW_VL; } + { return InstructionSet.X86_AVX512; } else - { return InstructionSet.X86_AVX512BW; } + { return InstructionSet.X86_AVX512; } case "Avx512CD": if (nestedTypeName == "VL") - { return InstructionSet.X86_AVX512CD_VL; } + { return InstructionSet.X86_AVX512; } else - { return InstructionSet.X86_AVX512CD; } + { return InstructionSet.X86_AVX512; } case "Avx512DQ": if (nestedTypeName == "VL") - { return InstructionSet.X86_AVX512DQ_VL; } + { return InstructionSet.X86_AVX512; } else - { return InstructionSet.X86_AVX512DQ; } + { return InstructionSet.X86_AVX512; } case "Avx512Vbmi": if (nestedTypeName == "VL") - { return InstructionSet.X86_AVX512VBMI_VL; } + { return InstructionSet.X86_AVX512VBMI; } else { return InstructionSet.X86_AVX512VBMI; } case "Avx10v1": if (nestedTypeName == "V512") - { return InstructionSet.X86_AVX10v1_V512; } + { return InstructionSet.X86_AVX10v1; } else { return InstructionSet.X86_AVX10v1; } - case "VectorT128": - { return InstructionSet.X86_VectorT128; } - - case "VectorT256": - { return InstructionSet.X86_VectorT256; } - - case "VectorT512": - { return InstructionSet.X86_VectorT512; } - - case "Apx": - { return InstructionSet.X86_APX; } - case "Avx10v2": if (nestedTypeName == "V512") - { return InstructionSet.X86_AVX10v2_V512; } + { return InstructionSet.X86_AVX10v2; } else { return InstructionSet.X86_AVX10v2; } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 53ee7c3f0e8c3e..92faeab30d3f6d 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -27,52 +27,53 @@ ; Definition of X86 instruction sets definearch ,X86 ,32Bit ,X64, X64 -instructionset ,X86 ,X86Base , ,22 ,X86Base ,base -instructionset ,X86 ,Sse , ,1 ,SSE ,sse -instructionset ,X86 ,Sse2 , ,2 ,SSE2 ,sse2 -instructionset ,X86 ,Sse3 , ,3 ,SSE3 ,sse3 -instructionset ,X86 ,Ssse3 , ,4 ,SSSE3 ,ssse3 -instructionset ,X86 ,Sse41 , ,5 ,SSE41 ,sse4.1 -instructionset ,X86 ,Sse42 , ,6 ,SSE42 ,sse4.2 -instructionset ,X86 ,Avx , ,7 ,AVX ,avx -instructionset ,X86 ,Avx2 , ,8 ,AVX2 ,avx2 -instructionset ,X86 ,Aes , ,9 ,AES ,aes -instructionset ,X86 ,Bmi1 , ,10 ,BMI1 ,bmi -instructionset ,X86 ,Bmi2 , ,11 ,BMI2 ,bmi2 -instructionset ,X86 ,Fma , ,12 ,FMA ,fma -instructionset ,X86 ,Lzcnt , ,13 ,LZCNT ,lzcnt -instructionset ,X86 ,Pclmulqdq , ,14 ,PCLMULQDQ ,pclmul -instructionset ,X86 ,Pclmulqdq_V256 , ,49 ,PCLMULQDQ_V256 ,vpclmul -instructionset ,X86 ,Pclmulqdq_V512 , ,50 ,PCLMULQDQ_V512 ,vpclmul_v512 -instructionset ,X86 ,Popcnt , ,15 ,POPCNT ,popcnt -instructionset ,X86 , , , ,Vector128 , -instructionset ,X86 , , , ,Vector256 , -instructionset ,X86 , , , ,Vector512 , -instructionset ,X86 ,AvxVnni , ,25 ,AVXVNNI ,avxvnni -instructionset ,X86 ,Movbe , ,27 ,MOVBE ,movbe -instructionset ,X86 ,X86Serialize , ,28 ,X86Serialize ,serialize -instructionset ,X86 ,EVEX , ,47 ,EVEX ,evex -instructionset ,X86 ,Avx512F , ,29 ,AVX512F ,avx512f -instructionset ,X86 ,Avx512F_VL , ,30 ,AVX512F_VL ,avx512f_vl -instructionset ,X86 ,Avx512BW , ,31 ,AVX512BW ,avx512bw -instructionset ,X86 ,Avx512BW_VL , ,32 ,AVX512BW_VL ,avx512bw_vl -instructionset ,X86 ,Avx512CD , ,33 ,AVX512CD ,avx512cd -instructionset ,X86 ,Avx512CD_VL , ,34 ,AVX512CD_VL ,avx512cd_vl -instructionset ,X86 ,Avx512DQ , ,35 ,AVX512DQ ,avx512dq -instructionset ,X86 ,Avx512DQ_VL , ,36 ,AVX512DQ_VL ,avx512dq_vl -instructionset ,X86 ,Avx512Vbmi , ,37 ,AVX512VBMI ,avx512vbmi -instructionset ,X86 ,Avx512Vbmi_VL , ,38 ,AVX512VBMI_VL ,avx512vbmi_vl -instructionset ,X86 ,Avx10v1 , ,44 ,AVX10v1 ,avx10v1 -instructionset ,X86 ,Avx10v1_V512 , ,46 ,AVX10v1_V512 ,avx10v1_v512 -instructionset ,X86 ,VectorT128 , ,39 ,VectorT128 ,vectort128 -instructionset ,X86 ,VectorT256 , ,40 ,VectorT256 ,vectort256 -instructionset ,X86 ,VectorT512 , ,41 ,VectorT512 ,vectort512 -instructionset ,X86 ,Apx , ,48 ,APX ,apx -instructionset ,X86 ,Avx10v2 , ,51 ,AVX10v2 ,avx10v2 -instructionset ,X86 ,Avx10v2_V512 , ,52 ,AVX10v2_V512 ,avx10v2_v512 -instructionset ,X86 ,Gfni , ,53 ,GFNI ,gfni -instructionset ,X86 ,Gfni_V256 , ,54 ,GFNI_V256 ,gfni_v256 -instructionset ,X86 ,Gfni_V512 , ,55 ,GFNI_V512 ,gfni_v512 +instructionset ,X86 ,X86Base , ,22 ,X86Base ,base +instructionset ,X86 ,Sse , ,1 ,SSE ,sse +instructionset ,X86 ,Sse2 , ,2 ,SSE2 ,sse2 +instructionset ,X86 ,Sse3 , ,3 ,SSE3 ,sse3 +instructionset ,X86 ,Ssse3 , ,4 ,SSSE3 ,ssse3 +instructionset ,X86 ,Sse41 , ,5 ,SSE41 ,sse4.1 +instructionset ,X86 ,Sse42 , ,6 ,SSE42 ,sse4.2 +instructionset ,X86 ,Avx , ,7 ,AVX ,avx +instructionset ,X86 ,Avx2 , ,8 ,AVX2 ,avx2 +instructionset ,X86 ,Aes , ,9 ,AES ,aes +instructionset ,X86 ,Bmi1 , ,10 ,BMI1 ,bmi +instructionset ,X86 ,Bmi2 , ,11 ,BMI2 ,bmi2 +instructionset ,X86 ,Fma , ,12 ,FMA ,fma +instructionset ,X86 ,Lzcnt , ,13 ,LZCNT ,lzcnt +instructionset ,X86 ,Pclmulqdq , ,14 ,PCLMULQDQ ,pclmul +instructionset ,X86 ,Pclmulqdq_V256 , ,49 ,PCLMULQDQ_V256 ,vpclmul +instructionset ,X86 ,Pclmulqdq_V512 , ,50 ,PCLMULQDQ_V512 ,vpclmul_v512 +instructionset ,X86 ,Popcnt , ,15 ,POPCNT ,popcnt +instructionset ,X86 , , , ,Vector128 , +instructionset ,X86 , , , ,Vector256 , +instructionset ,X86 , , , ,Vector512 , +instructionset ,X86 ,AvxVnni , ,25 ,AVXVNNI ,avxvnni +instructionset ,X86 , ,Movbe ,27 ,MOVBE ,movbe +instructionset ,X86 ,X86Serialize , ,28 ,X86Serialize ,serialize +instructionset ,X86 , ,Avx512 ,29 ,AVX512 ,avx512 +instructionset ,X86 , ,Avx512 ,29 ,AVX512 ,evex +instructionset ,X86 ,Avx512F ,Avx512 ,29 ,AVX512 ,avx512f +instructionset ,X86 ,Avx512F_VL ,Avx512 ,29 ,AVX512 ,avx512f_vl +instructionset ,X86 ,Avx512BW ,Avx512 ,29 ,AVX512 ,avx512bw +instructionset ,X86 ,Avx512BW_VL ,Avx512 ,29 ,AVX512 ,avx512bw_vl +instructionset ,X86 ,Avx512CD ,Avx512 ,29 ,AVX512 ,avx512cd +instructionset ,X86 ,Avx512CD_VL ,Avx512 ,29 ,AVX512 ,avx512cd_vl +instructionset ,X86 ,Avx512DQ ,Avx512 ,29 ,AVX512 ,avx512dq +instructionset ,X86 ,Avx512DQ_VL ,Avx512 ,29 ,AVX512 ,avx512dq_vl +instructionset ,X86 ,Avx512Vbmi , ,37 ,AVX512VBMI ,avx512vbmi +instructionset ,X86 ,Avx512Vbmi_VL ,Avx512Vbmi ,37 ,AVX512VBMI ,avx512vbmi_vl +instructionset ,X86 ,Avx10v1 , ,44 ,AVX10v1 ,avx10v1 +instructionset ,X86 ,Avx10v1_V512 ,Avx10v1 ,44 ,AVX10v1 ,avx10v1_v512 +instructionset ,X86 , ,VectorT128 ,39 ,VectorT128 ,vectort128 +instructionset ,X86 , ,VectorT256 ,40 ,VectorT256 ,vectort256 +instructionset ,X86 , ,VectorT512 ,41 ,VectorT512 ,vectort512 +instructionset ,X86 , ,Apx ,48 ,APX ,apx +instructionset ,X86 ,Avx10v2 , ,51 ,AVX10v2 ,avx10v2 +instructionset ,X86 ,Avx10v2_V512 ,Avx10v2 ,51 ,AVX10v2 ,avx10v2_v512 +instructionset ,X86 ,Gfni , ,53 ,GFNI ,gfni +instructionset ,X86 ,Gfni_V256 , ,54 ,GFNI_V256 ,gfni_v256 +instructionset ,X86 ,Gfni_V512 , ,55 ,GFNI_V512 ,gfni_v512 instructionset64bit,X86 ,X86Base instructionset64bit,X86 ,SSE @@ -92,15 +93,10 @@ instructionset64bit,X86 ,PCLMULQDQ instructionset64bit,X86 ,POPCNT instructionset64bit,X86 ,AVXVNNI instructionset64bit,X86 ,X86Serialize -instructionset64bit,X86 ,AVX512F -instructionset64bit,X86 ,AVX512BW -instructionset64bit,X86 ,AVX512CD -instructionset64bit,X86 ,AVX512DQ +instructionset64bit,X86 ,AVX512 instructionset64bit,X86 ,AVX512VBMI instructionset64bit,X86 ,AVX10v1 -instructionset64bit,X86 ,AVX10v1_V512 instructionset64bit,X86 ,AVX10v2 -instructionset64bit,X86 ,AVX10v2_V512 instructionset64bit,X86 ,GFNI vectorinstructionset,X86 ,Vector128 @@ -132,22 +128,9 @@ implication ,X86 ,MOVBE ,SSE42 ; x86-64-v4 -implication ,X86 ,EVEX ,AVX2 -implication ,X86 ,EVEX ,FMA -implication ,X86 ,AVX512F ,EVEX -implication ,X86 ,AVX512F_VL ,AVX512F -implication ,X86 ,AVX512BW ,AVX512F -implication ,X86 ,AVX512BW_VL ,AVX512BW -implication ,X86 ,AVX512BW_VL ,AVX512F_VL -implication ,X86 ,AVX512CD ,AVX512F -implication ,X86 ,AVX512CD_VL ,AVX512CD -implication ,X86 ,AVX512CD_VL ,AVX512F_VL -implication ,X86 ,AVX512DQ ,AVX512F -implication ,X86 ,AVX512DQ_VL ,AVX512DQ -implication ,X86 ,AVX512DQ_VL ,AVX512F_VL -implication ,X86 ,AVX512VBMI ,AVX512BW -implication ,X86 ,AVX512VBMI_VL ,AVX512VBMI -implication ,X86 ,AVX512VBMI_VL ,AVX512BW_VL +implication ,X86 ,AVX512 ,AVX2 +implication ,X86 ,AVX512 ,FMA +implication ,X86 ,AVX512VBMI ,AVX512 ; Unversioned @@ -156,29 +139,18 @@ implication ,X86 ,PCLMULQDQ ,SSE2 implication ,X86 ,PCLMULQDQ_V256 ,PCLMULQDQ implication ,X86 ,PCLMULQDQ_V256 ,AVX implication ,X86 ,PCLMULQDQ_V512 ,PCLMULQDQ_V256 -implication ,X86 ,PCLMULQDQ_V512 ,AVX512F +implication ,X86 ,PCLMULQDQ_V512 ,AVX512 implication ,X86 ,AVXVNNI ,AVX2 implication ,X86 ,X86Serialize ,X86Base implication ,X86 ,GFNI ,SSE41 implication ,X86 ,GFNI_V256 ,GFNI implication ,X86 ,GFNI_V256 ,AVX implication ,X86 ,GFNI_V512 ,GFNI -implication ,X86 ,GFNI_V512 ,AVX512F - -implication ,X86 ,AVX10v1 ,EVEX -implication ,X86 ,AVX10v1_V512 ,AVX10v1 -implication ,X86 ,AVX10v1_V512 ,AVX512F -implication ,X86 ,AVX10v1_V512 ,AVX512F_VL -implication ,X86 ,AVX10v1_V512 ,AVX512CD -implication ,X86 ,AVX10v1_V512 ,AVX512CD_VL -implication ,X86 ,AVX10v1_V512 ,AVX512BW -implication ,X86 ,AVX10v1_V512 ,AVX512BW_VL -implication ,X86 ,AVX10v1_V512 ,AVX512DQ -implication ,X86 ,AVX10v1_V512 ,AVX512DQ_VL -implication ,X86 ,AVX10v1_V512 ,AVX512VBMI -implication ,X86 ,AVX10v1_V512 ,AVX512VBMI_VL +implication ,X86 ,GFNI_V512 ,AVX512 + +implication ,X86 ,AVX10v1 ,AVX512 +implication ,X86 ,AVX10v1 ,AVX512VBMI implication ,X86 ,AVX10v2 ,AVX10v1 -implication ,X86 ,AVX10v2_V512 ,AVX10v1_V512 ; These synthetic ISAs need to appear after the core ISAs ; as they depend on the other implications being correct first @@ -186,11 +158,11 @@ implication ,X86 ,AVX10v2_V512 ,AVX10v1_V512 implication ,X86 ,Vector128 ,SSE implication ,X86 ,Vector256 ,AVX -implication ,X86 ,Vector512 ,AVX512F +implication ,X86 ,Vector512 ,AVX512 implication ,X86 ,VectorT128 ,SSE2 implication ,X86 ,VectorT256 ,AVX2 -implication ,X86 ,VectorT512 ,AVX512F +implication ,X86 ,VectorT512 ,AVX512 ; Definition of X64 instruction sets definearch ,X64 ,64Bit ,X64, X64 @@ -216,7 +188,7 @@ instructionset ,ARM64 , ,Rcpc ,26 ,Rcpc instructionset ,ARM64 ,VectorT128 , ,39 ,VectorT128 ,vectort128 instructionset ,ARM64 , ,Rcpc2 ,42 ,Rcpc2 ,rcpc2 instructionset ,ARM64 ,Sve , ,43 ,Sve ,sve -instructionset ,ARM64 ,Sve2 , ,59 ,Sve2 ,sve2 +instructionset ,ARM64 ,Sve2 , ,59 ,Sve2 ,sve2 instructionset64bit,ARM64 ,ArmBase instructionset64bit,ARM64 ,AdvSimd @@ -256,12 +228,11 @@ implication ,RiscV64 ,Zbb ,RiscV64Base implication ,RiscV64 ,Zba ,RiscV64Base ; ,name and aliases ,archs ,lower baselines included by implication -; instructionsetgroup ,x86-x64 ,X64 X86 ,sse2 instructionsetgroup ,x86-x64-v2 ,X64 X86 ,sse4.2 popcnt instructionsetgroup ,x86-x64-v3 ,X64 X86 ,x86-x64-v2 avx2 bmi bmi2 lzcnt movbe fma instructionsetgroup ,skylake ,X64 X86 ,x86-x64-v3 -instructionsetgroup ,x86-x64-v4 ,X64 X86 ,x86-x64-v3 avx512f avx512f_vl avx512bw avx512bw_vl avx512cd avx512cd_vl avx512dq avx512dq_vl +instructionsetgroup ,x86-x64-v4 ,X64 X86 ,x86-x64-v3 avx512 instructionsetgroup ,armv8-a ,ARM64 ,neon instructionsetgroup ,armv8.1-a ,ARM64 ,armv8-a lse crc rdma diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs index a4cb9ffce7ef8a..3d12f0c12efac2 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs @@ -88,8 +88,8 @@ public InstructionSetImplication(string architecture, InstructionSetImplication private SortedDictionary _r2rNamesByName = new SortedDictionary(); private SortedDictionary _r2rNamesByNumber = new SortedDictionary(); private SortedSet _architectures = new SortedSet(); - private Dictionary> _architectureJitNames = new Dictionary>(); - private Dictionary> _architectureVectorInstructionSetJitNames = new Dictionary>(); + private Dictionary> _architectureJitNames = new Dictionary>(); + private Dictionary> _architectureVectorInstructionSetJitNames = new Dictionary>(); private HashSet _64BitArchitectures = new HashSet(); private Dictionary _64BitVariantArchitectureJitNameSuffix = new Dictionary(); private Dictionary _64BitVariantArchitectureManagedNameSuffix = new Dictionary(); @@ -103,9 +103,9 @@ private void ArchitectureEncountered(string arch) _64bitVariants.Add(arch, new HashSet()); _architectures.Add(arch); if (!_architectureJitNames.ContainsKey(arch)) - _architectureJitNames.Add(arch, new List()); + _architectureJitNames.Add(arch, new HashSet()); if (!_architectureVectorInstructionSetJitNames.ContainsKey(arch)) - _architectureVectorInstructionSetJitNames.Add(arch, new List()); + _architectureVectorInstructionSetJitNames.Add(arch, new HashSet()); } private void ValidateArchitectureEncountered(string arch) @@ -335,9 +335,12 @@ public static class ReadyToRunInstructionSetHelper switch (instructionSet) {{ "); + HashSet handledJitNames = new HashSet(); + foreach (var instructionSet in _instructionSets) { if (instructionSet.Architecture != architecture) continue; + if (!handledJitNames.Add(instructionSet.JitName)) continue; string r2rEnumerationValue; if (!string.IsNullOrEmpty(instructionSet.R2rName)) @@ -602,11 +605,14 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target tr.Write($@" case TargetArchitecture.{architecture}: "); + HashSet handledJitNames = new HashSet(); + foreach (var instructionSet in _instructionSets) { if (instructionSet.Architecture != architecture) continue; if (_64BitArchitectures.Contains(architecture) && _64bitVariants[architecture].Contains(instructionSet.JitName)) { + if (!handledJitNames.Add(instructionSet.JitName)) continue; AddImplication(architecture, instructionSet.JitName, $"{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)}"); AddImplication(architecture, $"{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)}", instructionSet.JitName); } @@ -645,11 +651,16 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe tr.Write($@" case TargetArchitecture.{architecture}: "); + HashSet handledJitNames = new HashSet(); + foreach (var instructionSet in _instructionSets) { if (instructionSet.Architecture != architecture) continue; if (_64BitArchitectures.Contains(architecture) && _64bitVariants[architecture].Contains(instructionSet.JitName)) + { + if (!handledJitNames.Add(instructionSet.JitName)) continue; AddReverseImplication(architecture, instructionSet.JitName, $"{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)}"); + } } foreach (var implication in _implications) { @@ -739,12 +750,14 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) tr.Write($@" case TargetArchitecture.{architecture}: "); + HashSet handledJitNames = new HashSet(); + foreach (var instructionSet in _instructionSets) { if (instructionSet.Architecture != architecture) continue; - if (_64BitArchitectures.Contains(architecture) && _64bitVariants[architecture].Contains(instructionSet.JitName)) { + if (!handledJitNames.Add(instructionSet.JitName)) continue; tr.WriteLine($" if (HasInstructionSet(InstructionSet.{architecture}_{instructionSet.JitName}))"); tr.WriteLine($" AddInstructionSet(InstructionSet.{architecture}_{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)});"); } @@ -765,12 +778,16 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc tr.Write($@" case TargetArchitecture.{architecture}: "); + HashSet handledJitNames = new HashSet(); + foreach (var instructionSet in _instructionSets) { if (instructionSet.Architecture != architecture) continue; - if (_64bitVariants[architecture].Contains(instructionSet.JitName)) + { + if (!handledJitNames.Add(instructionSet.JitName)) continue; tr.WriteLine($" AddInstructionSet(InstructionSet.{architecture}_{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)});"); + } } tr.WriteLine(" break;"); @@ -1036,12 +1053,14 @@ void Set64BitInstructionSetVariants() foreach (string architecture in _architectures) { tr.WriteLine($"#ifdef TARGET_{ArchToIfDefArch(architecture)}"); + HashSet handledJitNames = new HashSet(); + foreach (var instructionSet in _instructionSets) { if (instructionSet.Architecture != architecture) continue; - if (_64BitArchitectures.Contains(architecture) && _64bitVariants[architecture].Contains(instructionSet.JitName)) { + if (!handledJitNames.Add(instructionSet.JitName)) continue; tr.WriteLine($" if (HasInstructionSet(InstructionSet_{instructionSet.JitName}))"); tr.WriteLine($" AddInstructionSet(InstructionSet_{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)});"); } @@ -1069,11 +1088,14 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins foreach (string architecture in _architectures) { tr.WriteLine($"#ifdef TARGET_{ArchToIfDefArch(architecture)}"); + HashSet handledJitNames = new HashSet(); + foreach (var instructionSet in _instructionSets) { if (instructionSet.Architecture != architecture) continue; if (_64BitArchitectures.Contains(architecture) && _64bitVariants[architecture].Contains(instructionSet.JitName)) { + if (!handledJitNames.Add(instructionSet.JitName)) continue; AddImplication(architecture, instructionSet.JitName, $"{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)}"); AddImplication(architecture, $"{instructionSet.JitName}_{ArchToInstructionSetSuffixArch(architecture)}", instructionSet.JitName); } @@ -1103,9 +1125,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins foreach (string architecture in _architectures) { tr.WriteLine($"#ifdef TARGET_{ArchToIfDefArch(architecture)}"); + HashSet handledJitNames = new HashSet(); + foreach (var instructionSet in _instructionSets) { if (instructionSet.Architecture != architecture) continue; + if (!handledJitNames.Add(instructionSet.JitName)) continue; tr.WriteLine($" case InstructionSet_{instructionSet.JitName} :"); tr.WriteLine($" return \"{instructionSet.JitName}\";"); if (_64BitArchitectures.Contains(architecture) && _64bitVariants[architecture].Contains(instructionSet.JitName)) @@ -1138,15 +1163,14 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst foreach (string architecture in _architectures) { tr.WriteLine($"#ifdef TARGET_{ArchToIfDefArch(architecture)}"); + HashSet handledR2rNames = new HashSet(); + foreach (var instructionSet in _instructionSets) { if (instructionSet.Architecture != architecture) continue; - string r2rEnumerationValue; - if (string.IsNullOrEmpty(instructionSet.R2rName)) - continue; - - r2rEnumerationValue = $"READYTORUN_INSTRUCTION_{instructionSet.R2rName}"; - + if (string.IsNullOrEmpty(instructionSet.R2rName)) continue; + if (!handledR2rNames.Add(instructionSet.R2rName)) continue; + string r2rEnumerationValue = $"READYTORUN_INSTRUCTION_{instructionSet.R2rName}"; tr.WriteLine($" case {r2rEnumerationValue}: return InstructionSet_{instructionSet.JitName};"); } tr.WriteLine($"#endif // TARGET_{ArchToIfDefArch(architecture)}"); diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 0df03a93cb9aba..17105809564522 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1341,17 +1341,10 @@ void EEJitManager::SetCpuInfo() // x86-64-v4 - if (((cpuFeatures & XArchIntrinsicConstants_Evex) != 0) && - ((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0)) + if ((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) { - // While the AVX-512 ISAs can be individually lit-up, they really - // need F, BW, CD, DQ, and VL to be fully functional without adding - // significant complexity into the JIT. Additionally, unlike AVX/AVX2 - // there was never really any hardware that didn't provide all 5 at - // once, with the notable exception being Knight's Landing which - // provided a similar but not quite the same feature. - - if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F) && + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F_VL) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW_VL) && @@ -1360,15 +1353,11 @@ void EEJitManager::SetCpuInfo() CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ_VL)) { - CPUCompileFlags.Set(InstructionSet_EVEX); - CPUCompileFlags.Set(InstructionSet_AVX512F); - CPUCompileFlags.Set(InstructionSet_AVX512F_VL); - CPUCompileFlags.Set(InstructionSet_AVX512BW); - CPUCompileFlags.Set(InstructionSet_AVX512BW_VL); - CPUCompileFlags.Set(InstructionSet_AVX512CD); - CPUCompileFlags.Set(InstructionSet_AVX512CD_VL); - CPUCompileFlags.Set(InstructionSet_AVX512DQ); - CPUCompileFlags.Set(InstructionSet_AVX512DQ_VL); + // These ISAs are grouped together and if any are disabled then + // you lose access to all of them. We recommend modern code just + // use EnableAVX512, but we continue checking the older knobs for + // back-compat + CPUCompileFlags.Set(InstructionSet_AVX512); } } @@ -1377,8 +1366,8 @@ void EEJitManager::SetCpuInfo() if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI_VL)) { + // These ISAs are likewise grouped together CPUCompileFlags.Set(InstructionSet_AVX512VBMI); - CPUCompileFlags.Set(InstructionSet_AVX512VBMI_VL); } } @@ -1417,14 +1406,11 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_GFNI_V512); } - if (((cpuFeatures & XArchIntrinsicConstants_Evex) != 0) && - ((cpuFeatures & XArchIntrinsicConstants_Avx10v1) != 0)) + if ((cpuFeatures & XArchIntrinsicConstants_Avx10v1) != 0) { if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v1)) { - CPUCompileFlags.Set(InstructionSet_EVEX); CPUCompileFlags.Set(InstructionSet_AVX10v1); - CPUCompileFlags.Set(InstructionSet_AVX10v1_V512); } } @@ -1433,7 +1419,6 @@ void EEJitManager::SetCpuInfo() if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v2)) { CPUCompileFlags.Set(InstructionSet_AVX10v2); - CPUCompileFlags.Set(InstructionSet_AVX10v2_V512); } } diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 8d5874922a7bca..e5cabd11b60321 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -321,13 +321,34 @@ int minipal_getcpufeatures(void) // once, with the notable exception being Knight's Landing which // provided a similar but not quite the same feature. - result |= XArchIntrinsicConstants_Evex; result |= XArchIntrinsicConstants_Avx512; if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI { result |= XArchIntrinsicConstants_Avx512Vbmi; } + + if (((cpuidInfo[CPUID_EDX] & (1 << 19)) != 0) && // Avx10 + ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) && // Avx10/V256 + ((cpuidInfo[CPUID_EBX] & (1 << 18)) != 0)) // Avx10/V512 + { + // While AVX10 was originally spec'd to allow no V512 support + // this was later changed and all implementations must provide + // V512 support + + __cpuidex(cpuidInfo, 0x00000024, 0x00000000); + uint8_t avx10Version = (uint8_t)(cpuidInfo[CPUID_EBX] & 0xFF); + + if (avx10Version >= 1) // Avx10.1 + { + result |= XArchIntrinsicConstants_Avx10v1; + } + + if (avx10Version >= 2) // Avx10.2 + { + result |= XArchIntrinsicConstants_Avx10v2; + } + } } } @@ -345,29 +366,6 @@ int minipal_getcpufeatures(void) result |= XArchIntrinsicConstants_Apx; } } - - if ((cpuidInfo[CPUID_EDX] & (1 << 19)) != 0) // Avx10 - { - __cpuidex(cpuidInfo, 0x00000024, 0x00000000); - uint8_t avx10Version = (uint8_t)(cpuidInfo[CPUID_EBX] & 0xFF); - - if((avx10Version >= 1) && - ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0)) // Avx10/V256 - { - result |= XArchIntrinsicConstants_Evex; - result |= XArchIntrinsicConstants_Avx10v1; // Avx10.1 - - if (avx10Version >= 2) // Avx10.2 - { - result |= XArchIntrinsicConstants_Avx10v2; - } - - // We assume that the Avx10/V512 support can be inferred from - // both Avx10v1 and Avx512 being present. - assert(((cpuidInfo[CPUID_EBX] & (1 << 18)) != 0) == // Avx10/V512 - ((result & XArchIntrinsicConstants_Avx512) != 0)); - } - } } } } diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 9def54fa001063..a0671b5fea3401 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -30,11 +30,10 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Avx512Vbmi = 0x10000, XArchIntrinsicConstants_Serialize = 0x20000, XArchIntrinsicConstants_Avx10v1 = 0x40000, - XArchIntrinsicConstants_Evex = 0x80000, - XArchIntrinsicConstants_Apx = 0x100000, - XArchIntrinsicConstants_Vpclmulqdq = 0x200000, - XArchIntrinsicConstants_Avx10v2 = 0x400000, - XArchIntrinsicConstants_Gfni = 0x800000, + XArchIntrinsicConstants_Apx = 0x80000, + XArchIntrinsicConstants_Vpclmulqdq = 0x100000, + XArchIntrinsicConstants_Avx10v2 = 0x200000, + XArchIntrinsicConstants_Gfni = 0x400000, }; #endif // HOST_X86 || HOST_AMD64 diff --git a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs index aeeb847495fd5a..fce083d7154f12 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs @@ -269,16 +269,24 @@ public unsafe static void CpuId() } } - if (IsBitIncorrect(ecx, 1, typeof(Avx512Vbmi), Avx512Vbmi.IsSupported, "AVX512VBMI", ref isHierarchyDisabled)) + for (int i = 0; i < 2; i++) { - testResult = Fail; - } + // AVX512VBMI and AVX512VBMI.VL are likewise provided together or not at all + // so we loop twice to ensure it all lines up as expected. - if (IsBitIncorrect(ecx, 1, typeof(Avx512Vbmi.VL), Avx512Vbmi.VL.IsSupported, "AVX512VBMI_VL", ref isHierarchyDisabled)) - { - testResult = Fail; + if (IsBitIncorrect(ecx, 1, typeof(Avx512Vbmi), Avx512Vbmi.IsSupported, "AVX512VBMI", ref isHierarchyDisabled)) + { + testResult = Fail; + } + + if (IsBitIncorrect(ecx, 1, typeof(Avx512Vbmi.VL), Avx512Vbmi.VL.IsSupported, "AVX512VBMI_VL", ref isHierarchyDisabled)) + { + testResult = Fail; + } } + bool isAvx10HierarchyDisabled = isHierarchyDisabled; + isHierarchyDisabled = isX86BaseDisabled; if (IsBitIncorrect(edx, 14, typeof(X86Serialize), X86Serialize.IsSupported, "SERIALIZE", ref isHierarchyDisabled)) @@ -330,11 +338,22 @@ public unsafe static void CpuId() testResult = Fail; } - isHierarchyDisabled = isAvxHierarchyDisabled | isFmaHierarchyDisabled; + isHierarchyDisabled = isAvx10HierarchyDisabled; - if (IsBitIncorrect(edx, 19, typeof(Avx10v1), Avx10v1.IsSupported, "AVX10V1", ref isHierarchyDisabled)) + for (int i = 0; i < 2; i++) { - testResult = Fail; + // AVX10v1 and AVX10v1.V512 are likewise provided together or not at all + // so we loop twice to ensure it all lines up as expected. + + if (IsBitIncorrect(edx, 19, typeof(Avx10v1), Avx10v1.IsSupported, "AVX10V1", ref isHierarchyDisabled)) + { + testResult = Fail; + } + + if (IsBitIncorrect(edx, 19, typeof(Avx10v1.V512), Avx10v1.V512.IsSupported, "AVX10V1", ref isHierarchyDisabled)) + { + testResult = Fail; + } } (eax, ebx, ecx, edx) = X86Base.CpuId(unchecked((int)0x80000000), 0x00000000); diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx512.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx512.csproj index 331f0ed9bca7d2..8b51024bb0795d 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx512.csproj +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx512.csproj @@ -14,7 +14,7 @@ - + diff --git a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx512_VectorT512.csproj b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx512_VectorT512.csproj index 94084aebc41e6b..3e11069a32662f 100644 --- a/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx512_VectorT512.csproj +++ b/src/tests/nativeaot/SmokeTests/HardwareIntrinsics/X64Avx512_VectorT512.csproj @@ -14,7 +14,7 @@ - + diff --git a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx512.csproj b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx512.csproj index f0d2f45a2e120f..1da9fd4dd189f7 100644 --- a/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx512.csproj +++ b/src/tests/readytorun/HardwareIntrinsics/X86/CpuId_R2R_Avx512.csproj @@ -13,7 +13,7 @@ - $(CrossGen2TestExtraArguments) --instruction-set:avx512f,avx512f_vl,avx512bw,avx512bw_vl,avx512cd,avx512cd_vl,avx512dq,avx512dq_vl + $(CrossGen2TestExtraArguments) --instruction-set:avx512 From 64eb9321e93df84a4d9f3fc5a07baa06d4ccaa51 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 27 May 2025 01:20:45 -0700 Subject: [PATCH 2/7] Rework how X86Base+SSE+SSE2 are supported by the JIT to greatly simplify things --- src/coreclr/inc/clrconfigvalues.h | 6 +- src/coreclr/inc/corinfoinstructionset.h | 276 ++++----- src/coreclr/inc/readytoruninstructionset.h | 2 - src/coreclr/jit/assertionprop.cpp | 38 +- src/coreclr/jit/codegen.h | 2 - src/coreclr/jit/codegenxarch.cpp | 12 +- src/coreclr/jit/compiler.cpp | 42 +- src/coreclr/jit/compiler.h | 6 +- src/coreclr/jit/decomposelongs.cpp | 2 +- src/coreclr/jit/fgdiagnostic.cpp | 14 +- src/coreclr/jit/gentree.cpp | 431 +++++--------- src/coreclr/jit/hwintrinsic.cpp | 6 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 159 ++--- src/coreclr/jit/hwintrinsiclistxarch.h | 377 +++++------- src/coreclr/jit/hwintrinsicxarch.cpp | 322 +++------- src/coreclr/jit/importercalls.cpp | 42 +- src/coreclr/jit/jitconfigvalues.h | 6 +- src/coreclr/jit/lowerxarch.cpp | 553 ++++++------------ src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/jit/lsraxarch.cpp | 2 +- src/coreclr/jit/valuenum.cpp | 2 +- .../Compiler/HardwareIntrinsicHelpers.cs | 5 - .../Common/Compiler/InstructionSetSupport.cs | 4 +- .../Runtime/ReadyToRunInstructionSet.cs | 2 - .../Runtime/ReadyToRunInstructionSetHelper.cs | 8 - .../tools/Common/JitInterface/CorInfoImpl.cs | 4 +- .../JitInterface/CorInfoInstructionSet.cs | 332 +++++------ .../ThunkGenerator/InstructionSetDesc.txt | 25 +- src/coreclr/vm/codeman.cpp | 29 +- .../HardwareIntrinsics/X86/X86Base/CpuId.cs | 34 +- 30 files changed, 965 insertions(+), 1780 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index dc7570512bd0fc..54726ea8d4b53c 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -696,8 +696,6 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePCLMULQDQ, W("EnablePCLMULQ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableVPCLMULQDQ, W("EnableVPCLMULQDQ"), 1, "Allows VPCLMULQDQ+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableMOVBE, W("EnableMOVBE"), 1, "Allows MOVBE+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePOPCNT, W("EnablePOPCNT"), 1, "Allows POPCNT+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE, W("EnableSSE"), 1, "Allows SSE+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE2, W("EnableSSE2"), 1, "Allows SSE2+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE3, W("EnableSSE3"), 1, "Allows SSE3+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE3_4, W("EnableSSE3_4"), 1, "Allows SSE3+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE41, W("EnableSSE41"), 1, "Allows SSE4.1+ hardware intrinsics to be disabled") @@ -740,6 +738,10 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F_VL, W("EnableAVX512F // These have been superceded by EnableAVX512VBMI as you get all of them or none of them RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VBMI_VL, W("EnableAVX512VBMI_VL"), 1, "Allows AVX512VBMI_VL+ hardware intrinsics to be disabled") + +// These have been superceded by EnableHWIntrinsic as they are part of the baseline +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE, W("EnableSSE"), 1, "Allows SSE+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE2, W("EnableSSE2"), 1, "Allows SSE2+ hardware intrinsics to be disabled") #endif /// diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 73676c6e84dfc3..819cde31f7e7a3 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -51,123 +51,115 @@ enum CORINFO_InstructionSet #endif // TARGET_RISCV64 #ifdef TARGET_AMD64 InstructionSet_X86Base=1, - InstructionSet_SSE=2, - InstructionSet_SSE2=3, - InstructionSet_SSE3=4, - InstructionSet_SSSE3=5, - InstructionSet_SSE41=6, - InstructionSet_SSE42=7, - InstructionSet_AVX=8, - InstructionSet_AVX2=9, - InstructionSet_AES=10, - InstructionSet_BMI1=11, - InstructionSet_BMI2=12, - InstructionSet_FMA=13, - InstructionSet_LZCNT=14, - InstructionSet_PCLMULQDQ=15, - InstructionSet_PCLMULQDQ_V256=16, - InstructionSet_PCLMULQDQ_V512=17, - InstructionSet_POPCNT=18, - InstructionSet_Vector128=19, - InstructionSet_Vector256=20, - InstructionSet_Vector512=21, - InstructionSet_AVXVNNI=22, - InstructionSet_MOVBE=23, - InstructionSet_X86Serialize=24, - InstructionSet_AVX512=25, - InstructionSet_AVX512VBMI=26, - InstructionSet_AVX10v1=27, - InstructionSet_VectorT128=28, - InstructionSet_VectorT256=29, - InstructionSet_VectorT512=30, - InstructionSet_APX=31, - InstructionSet_AVX10v2=32, - InstructionSet_GFNI=33, - InstructionSet_GFNI_V256=34, - InstructionSet_GFNI_V512=35, - InstructionSet_X86Base_X64=36, - InstructionSet_SSE_X64=37, - InstructionSet_SSE2_X64=38, - InstructionSet_SSE3_X64=39, - InstructionSet_SSSE3_X64=40, - InstructionSet_SSE41_X64=41, - InstructionSet_SSE42_X64=42, - InstructionSet_AVX_X64=43, - InstructionSet_AVX2_X64=44, - InstructionSet_AES_X64=45, - InstructionSet_BMI1_X64=46, - InstructionSet_BMI2_X64=47, - InstructionSet_FMA_X64=48, - InstructionSet_LZCNT_X64=49, - InstructionSet_PCLMULQDQ_X64=50, - InstructionSet_POPCNT_X64=51, - InstructionSet_AVXVNNI_X64=52, - InstructionSet_X86Serialize_X64=53, - InstructionSet_AVX512_X64=54, - InstructionSet_AVX512VBMI_X64=55, - InstructionSet_AVX10v1_X64=56, - InstructionSet_AVX10v2_X64=57, - InstructionSet_GFNI_X64=58, + InstructionSet_SSE3=2, + InstructionSet_SSSE3=3, + InstructionSet_SSE41=4, + InstructionSet_SSE42=5, + InstructionSet_AVX=6, + InstructionSet_AVX2=7, + InstructionSet_AES=8, + InstructionSet_BMI1=9, + InstructionSet_BMI2=10, + InstructionSet_FMA=11, + InstructionSet_LZCNT=12, + InstructionSet_PCLMULQDQ=13, + InstructionSet_PCLMULQDQ_V256=14, + InstructionSet_PCLMULQDQ_V512=15, + InstructionSet_POPCNT=16, + InstructionSet_Vector128=17, + InstructionSet_Vector256=18, + InstructionSet_Vector512=19, + InstructionSet_AVXVNNI=20, + InstructionSet_MOVBE=21, + InstructionSet_X86Serialize=22, + InstructionSet_AVX512=23, + InstructionSet_AVX512VBMI=24, + InstructionSet_AVX10v1=25, + InstructionSet_VectorT128=26, + InstructionSet_VectorT256=27, + InstructionSet_VectorT512=28, + InstructionSet_APX=29, + InstructionSet_AVX10v2=30, + InstructionSet_GFNI=31, + InstructionSet_GFNI_V256=32, + InstructionSet_GFNI_V512=33, + InstructionSet_X86Base_X64=34, + InstructionSet_SSE3_X64=35, + InstructionSet_SSSE3_X64=36, + InstructionSet_SSE41_X64=37, + InstructionSet_SSE42_X64=38, + InstructionSet_AVX_X64=39, + InstructionSet_AVX2_X64=40, + InstructionSet_AES_X64=41, + InstructionSet_BMI1_X64=42, + InstructionSet_BMI2_X64=43, + InstructionSet_FMA_X64=44, + InstructionSet_LZCNT_X64=45, + InstructionSet_PCLMULQDQ_X64=46, + InstructionSet_POPCNT_X64=47, + InstructionSet_AVXVNNI_X64=48, + InstructionSet_X86Serialize_X64=49, + InstructionSet_AVX512_X64=50, + InstructionSet_AVX512VBMI_X64=51, + InstructionSet_AVX10v1_X64=52, + InstructionSet_AVX10v2_X64=53, + InstructionSet_GFNI_X64=54, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, - InstructionSet_SSE=2, - InstructionSet_SSE2=3, - InstructionSet_SSE3=4, - InstructionSet_SSSE3=5, - InstructionSet_SSE41=6, - InstructionSet_SSE42=7, - InstructionSet_AVX=8, - InstructionSet_AVX2=9, - InstructionSet_AES=10, - InstructionSet_BMI1=11, - InstructionSet_BMI2=12, - InstructionSet_FMA=13, - InstructionSet_LZCNT=14, - InstructionSet_PCLMULQDQ=15, - InstructionSet_PCLMULQDQ_V256=16, - InstructionSet_PCLMULQDQ_V512=17, - InstructionSet_POPCNT=18, - InstructionSet_Vector128=19, - InstructionSet_Vector256=20, - InstructionSet_Vector512=21, - InstructionSet_AVXVNNI=22, - InstructionSet_MOVBE=23, - InstructionSet_X86Serialize=24, - InstructionSet_AVX512=25, - InstructionSet_AVX512VBMI=26, - InstructionSet_AVX10v1=27, - InstructionSet_VectorT128=28, - InstructionSet_VectorT256=29, - InstructionSet_VectorT512=30, - InstructionSet_APX=31, - InstructionSet_AVX10v2=32, - InstructionSet_GFNI=33, - InstructionSet_GFNI_V256=34, - InstructionSet_GFNI_V512=35, - InstructionSet_X86Base_X64=36, - InstructionSet_SSE_X64=37, - InstructionSet_SSE2_X64=38, - InstructionSet_SSE3_X64=39, - InstructionSet_SSSE3_X64=40, - InstructionSet_SSE41_X64=41, - InstructionSet_SSE42_X64=42, - InstructionSet_AVX_X64=43, - InstructionSet_AVX2_X64=44, - InstructionSet_AES_X64=45, - InstructionSet_BMI1_X64=46, - InstructionSet_BMI2_X64=47, - InstructionSet_FMA_X64=48, - InstructionSet_LZCNT_X64=49, - InstructionSet_PCLMULQDQ_X64=50, - InstructionSet_POPCNT_X64=51, - InstructionSet_AVXVNNI_X64=52, - InstructionSet_X86Serialize_X64=53, - InstructionSet_AVX512_X64=54, - InstructionSet_AVX512VBMI_X64=55, - InstructionSet_AVX10v1_X64=56, - InstructionSet_AVX10v2_X64=57, - InstructionSet_GFNI_X64=58, + InstructionSet_SSE3=2, + InstructionSet_SSSE3=3, + InstructionSet_SSE41=4, + InstructionSet_SSE42=5, + InstructionSet_AVX=6, + InstructionSet_AVX2=7, + InstructionSet_AES=8, + InstructionSet_BMI1=9, + InstructionSet_BMI2=10, + InstructionSet_FMA=11, + InstructionSet_LZCNT=12, + InstructionSet_PCLMULQDQ=13, + InstructionSet_PCLMULQDQ_V256=14, + InstructionSet_PCLMULQDQ_V512=15, + InstructionSet_POPCNT=16, + InstructionSet_Vector128=17, + InstructionSet_Vector256=18, + InstructionSet_Vector512=19, + InstructionSet_AVXVNNI=20, + InstructionSet_MOVBE=21, + InstructionSet_X86Serialize=22, + InstructionSet_AVX512=23, + InstructionSet_AVX512VBMI=24, + InstructionSet_AVX10v1=25, + InstructionSet_VectorT128=26, + InstructionSet_VectorT256=27, + InstructionSet_VectorT512=28, + InstructionSet_APX=29, + InstructionSet_AVX10v2=30, + InstructionSet_GFNI=31, + InstructionSet_GFNI_V256=32, + InstructionSet_GFNI_V512=33, + InstructionSet_X86Base_X64=34, + InstructionSet_SSE3_X64=35, + InstructionSet_SSSE3_X64=36, + InstructionSet_SSE41_X64=37, + InstructionSet_SSE42_X64=38, + InstructionSet_AVX_X64=39, + InstructionSet_AVX2_X64=40, + InstructionSet_AES_X64=41, + InstructionSet_BMI1_X64=42, + InstructionSet_BMI2_X64=43, + InstructionSet_FMA_X64=44, + InstructionSet_LZCNT_X64=45, + InstructionSet_PCLMULQDQ_X64=46, + InstructionSet_POPCNT_X64=47, + InstructionSet_AVXVNNI_X64=48, + InstructionSet_X86Serialize_X64=49, + InstructionSet_AVX512_X64=50, + InstructionSet_AVX512VBMI_X64=51, + InstructionSet_AVX10v1_X64=52, + InstructionSet_AVX10v2_X64=53, + InstructionSet_GFNI_X64=54, #endif // TARGET_X86 }; @@ -289,10 +281,6 @@ struct CORINFO_InstructionSetFlags #ifdef TARGET_AMD64 if (HasInstructionSet(InstructionSet_X86Base)) AddInstructionSet(InstructionSet_X86Base_X64); - if (HasInstructionSet(InstructionSet_SSE)) - AddInstructionSet(InstructionSet_SSE_X64); - if (HasInstructionSet(InstructionSet_SSE2)) - AddInstructionSet(InstructionSet_SSE2_X64); if (HasInstructionSet(InstructionSet_SSE3)) AddInstructionSet(InstructionSet_SSE3_X64); if (HasInstructionSet(InstructionSet_SSSE3)) @@ -429,14 +417,6 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_X86Base); if (resultflags.HasInstructionSet(InstructionSet_X86Base_X64) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_X86Base_X64); - if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_SSE_X64)) - resultflags.RemoveInstructionSet(InstructionSet_SSE); - if (resultflags.HasInstructionSet(InstructionSet_SSE_X64) && !resultflags.HasInstructionSet(InstructionSet_SSE)) - resultflags.RemoveInstructionSet(InstructionSet_SSE_X64); - if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE2_X64)) - resultflags.RemoveInstructionSet(InstructionSet_SSE2); - if (resultflags.HasInstructionSet(InstructionSet_SSE2_X64) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) - resultflags.RemoveInstructionSet(InstructionSet_SSE2_X64); if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE3_X64)) resultflags.RemoveInstructionSet(InstructionSet_SSE3); if (resultflags.HasInstructionSet(InstructionSet_SSE3_X64) && !resultflags.HasInstructionSet(InstructionSet_SSE3)) @@ -517,11 +497,7 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_GFNI); if (resultflags.HasInstructionSet(InstructionSet_GFNI_X64) && !resultflags.HasInstructionSet(InstructionSet_GFNI)) resultflags.RemoveInstructionSet(InstructionSet_GFNI_X64); - if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) - resultflags.RemoveInstructionSet(InstructionSet_SSE); - if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) - resultflags.RemoveInstructionSet(InstructionSet_SSE2); - if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE3); if (resultflags.HasInstructionSet(InstructionSet_SSSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE3)) resultflags.RemoveInstructionSet(InstructionSet_SSSE3); @@ -551,9 +527,9 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX512); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_AES); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ); if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ)) resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); @@ -583,13 +559,13 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_SSE)) + if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_Vector256); if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_Vector512); - if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_VectorT128); if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_VectorT256); @@ -597,11 +573,7 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_VectorT512); #endif // TARGET_AMD64 #ifdef TARGET_X86 - if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) - resultflags.RemoveInstructionSet(InstructionSet_SSE); - if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE)) - resultflags.RemoveInstructionSet(InstructionSet_SSE2); - if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_SSE3) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_SSE3); if (resultflags.HasInstructionSet(InstructionSet_SSSE3) && !resultflags.HasInstructionSet(InstructionSet_SSE3)) resultflags.RemoveInstructionSet(InstructionSet_SSSE3); @@ -631,9 +603,9 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX512); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_AES); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ); if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ_V256) && !resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ)) resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ_V256); @@ -663,13 +635,13 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet_AVX10v2) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_SSE)) + if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_Vector256); if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512)) resultflags.RemoveInstructionSet(InstructionSet_Vector512); - if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_VectorT128); if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_VectorT256); @@ -759,14 +731,6 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "X86Base"; case InstructionSet_X86Base_X64 : return "X86Base_X64"; - case InstructionSet_SSE : - return "SSE"; - case InstructionSet_SSE_X64 : - return "SSE_X64"; - case InstructionSet_SSE2 : - return "SSE2"; - case InstructionSet_SSE2_X64 : - return "SSE2_X64"; case InstructionSet_SSE3 : return "SSE3"; case InstructionSet_SSE3_X64 : @@ -875,10 +839,6 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) #ifdef TARGET_X86 case InstructionSet_X86Base : return "X86Base"; - case InstructionSet_SSE : - return "SSE"; - case InstructionSet_SSE2 : - return "SSE2"; case InstructionSet_SSE3 : return "SSE3"; case InstructionSet_SSSE3 : @@ -985,8 +945,6 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst #endif // TARGET_RISCV64 #ifdef TARGET_AMD64 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; - case READYTORUN_INSTRUCTION_Sse: return InstructionSet_SSE; - case READYTORUN_INSTRUCTION_Sse2: return InstructionSet_SSE2; case READYTORUN_INSTRUCTION_Sse3: return InstructionSet_SSE3; case READYTORUN_INSTRUCTION_Ssse3: return InstructionSet_SSSE3; case READYTORUN_INSTRUCTION_Sse41: return InstructionSet_SSE41; @@ -1019,8 +977,6 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; - case READYTORUN_INSTRUCTION_Sse: return InstructionSet_SSE; - case READYTORUN_INSTRUCTION_Sse2: return InstructionSet_SSE2; case READYTORUN_INSTRUCTION_Sse3: return InstructionSet_SSE3; case READYTORUN_INSTRUCTION_Ssse3: return InstructionSet_SSSE3; case READYTORUN_INSTRUCTION_Sse41: return InstructionSet_SSE41; diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 592b602d1f762a..a2bbcccaca3721 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -9,8 +9,6 @@ #define READYTORUNINSTRUCTIONSET_H enum ReadyToRunInstructionSet { - READYTORUN_INSTRUCTION_Sse=1, - READYTORUN_INSTRUCTION_Sse2=2, READYTORUN_INSTRUCTION_Sse3=3, READYTORUN_INSTRUCTION_Ssse3=4, READYTORUN_INSTRUCTION_Sse41=5, diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index ed83b7d6384b33..dcd1eb64813c61 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -239,30 +239,18 @@ bool IntegralRange::Contains(int64_t value) const case NI_Vector256_op_Inequality: case NI_Vector512_op_Equality: case NI_Vector512_op_Inequality: - case NI_SSE_CompareScalarOrderedEqual: - case NI_SSE_CompareScalarOrderedNotEqual: - case NI_SSE_CompareScalarOrderedLessThan: - case NI_SSE_CompareScalarOrderedLessThanOrEqual: - case NI_SSE_CompareScalarOrderedGreaterThan: - case NI_SSE_CompareScalarOrderedGreaterThanOrEqual: - case NI_SSE_CompareScalarUnorderedEqual: - case NI_SSE_CompareScalarUnorderedNotEqual: - case NI_SSE_CompareScalarUnorderedLessThanOrEqual: - case NI_SSE_CompareScalarUnorderedLessThan: - case NI_SSE_CompareScalarUnorderedGreaterThanOrEqual: - case NI_SSE_CompareScalarUnorderedGreaterThan: - case NI_SSE2_CompareScalarOrderedEqual: - case NI_SSE2_CompareScalarOrderedNotEqual: - case NI_SSE2_CompareScalarOrderedLessThan: - case NI_SSE2_CompareScalarOrderedLessThanOrEqual: - case NI_SSE2_CompareScalarOrderedGreaterThan: - case NI_SSE2_CompareScalarOrderedGreaterThanOrEqual: - case NI_SSE2_CompareScalarUnorderedEqual: - case NI_SSE2_CompareScalarUnorderedNotEqual: - case NI_SSE2_CompareScalarUnorderedLessThanOrEqual: - case NI_SSE2_CompareScalarUnorderedLessThan: - case NI_SSE2_CompareScalarUnorderedGreaterThanOrEqual: - case NI_SSE2_CompareScalarUnorderedGreaterThan: + case NI_X86Base_CompareScalarOrderedEqual: + case NI_X86Base_CompareScalarOrderedNotEqual: + case NI_X86Base_CompareScalarOrderedLessThan: + case NI_X86Base_CompareScalarOrderedLessThanOrEqual: + case NI_X86Base_CompareScalarOrderedGreaterThan: + case NI_X86Base_CompareScalarOrderedGreaterThanOrEqual: + case NI_X86Base_CompareScalarUnorderedEqual: + case NI_X86Base_CompareScalarUnorderedNotEqual: + case NI_X86Base_CompareScalarUnorderedLessThanOrEqual: + case NI_X86Base_CompareScalarUnorderedLessThan: + case NI_X86Base_CompareScalarUnorderedGreaterThanOrEqual: + case NI_X86Base_CompareScalarUnorderedGreaterThan: case NI_SSE41_TestC: case NI_SSE41_TestZ: case NI_SSE41_TestNotZAndNotC: @@ -271,7 +259,7 @@ bool IntegralRange::Contains(int64_t value) const case NI_AVX_TestNotZAndNotC: return {SymbolicIntegerValue::Zero, SymbolicIntegerValue::One}; - case NI_SSE2_Extract: + case NI_X86Base_Extract: case NI_SSE41_Extract: case NI_SSE41_X64_Extract: case NI_Vector128_ToScalar: diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 7b6658b7867ddc..3e3ef0f7b92b00 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -940,8 +940,6 @@ class CodeGen final : public CodeGenInterface void genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); void genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); - void genSSEIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); - void genSSE2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); void genSSE41Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); void genSSE42Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); void genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 3b15589856c0ac..d0623b71e4ce77 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -5692,10 +5692,10 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) case NI_Vector128_ToScalar: case NI_Vector256_ToScalar: case NI_Vector512_ToScalar: - case NI_SSE2_ConvertToInt32: - case NI_SSE2_ConvertToUInt32: - case NI_SSE2_X64_ConvertToInt64: - case NI_SSE2_X64_ConvertToUInt64: + case NI_X86Base_ConvertToInt32: + case NI_X86Base_ConvertToUInt32: + case NI_X86Base_X64_ConvertToInt64: + case NI_X86Base_X64_ConvertToUInt64: case NI_AVX2_ConvertToInt32: case NI_AVX2_ConvertToUInt32: { @@ -5718,7 +5718,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) FALLTHROUGH; } - case NI_SSE2_Extract: + case NI_X86Base_Extract: case NI_SSE41_Extract: case NI_SSE41_X64_Extract: case NI_AVX_ExtractVector128: @@ -5730,7 +5730,7 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(hwintrinsic->GetSimdSize())); - if (intrinsicId == NI_SSE2_Extract) + if (intrinsicId == NI_X86Base_Extract) { // The encoding that supports containment is SSE4.1 only ins = INS_pextrw_sse41; diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 43c2fea00cd1f7..154a11ae964301 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2015,7 +2015,7 @@ void Compiler::compSetProcessor() !instructionSetFlags.HasInstructionSet(InstructionSet_Vector256) && !instructionSetFlags.HasInstructionSet(InstructionSet_Vector512)); - if (instructionSetFlags.HasInstructionSet(InstructionSet_SSE)) + if (instructionSetFlags.HasInstructionSet(InstructionSet_X86Base)) { instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); } @@ -6088,21 +6088,15 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, } } - if (JitConfig.EnableHWIntrinsic() != 0) + if ((JitConfig.EnableHWIntrinsic() != 0) && (JitConfig.EnableSSE() != 0) && (JitConfig.EnableSSE2() != 0)) { + // These ISAs are grouped together and if any are disabled then + // you lose access to all of them. We recommend modern code just + // use EnableHWIntrinsic, but we continue checking the older knobs + // for back-compat instructionSetFlags.AddInstructionSet(InstructionSet_X86Base); } - if (JitConfig.EnableSSE() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_SSE); - } - - if (JitConfig.EnableSSE2() != 0) - { - instructionSetFlags.AddInstructionSet(InstructionSet_SSE2); - } - if ((JitConfig.EnableSSE3() != 0) && (JitConfig.EnableSSE3_4() != 0)) { instructionSetFlags.AddInstructionSet(InstructionSet_SSE3); @@ -6186,27 +6180,21 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, instructionSetFlags.AddInstructionSet(InstructionSet_AVXVNNI); } - if ((JitConfig.EnableAVX512() != 0) && - (JitConfig.EnableAVX512F() != 0) && - (JitConfig.EnableAVX512F_VL() != 0) && - (JitConfig.EnableAVX512BW() != 0) && - (JitConfig.EnableAVX512BW_VL() != 0) && - (JitConfig.EnableAVX512CD() != 0) && - (JitConfig.EnableAVX512CD_VL() != 0) && - (JitConfig.EnableAVX512DQ() != 0) && + if ((JitConfig.EnableAVX512() != 0) && (JitConfig.EnableAVX512F() != 0) && + (JitConfig.EnableAVX512F_VL() != 0) && (JitConfig.EnableAVX512BW() != 0) && + (JitConfig.EnableAVX512BW_VL() != 0) && (JitConfig.EnableAVX512CD() != 0) && + (JitConfig.EnableAVX512CD_VL() != 0) && (JitConfig.EnableAVX512DQ() != 0) && (JitConfig.EnableAVX512DQ_VL() != 0)) { - // These ISAs are grouped together and if any are disabled then - // you lose access to all of them. We recommend modern code just - // use EnableAVX512, but we continue checking the older knobs for - // back-compat + // These ISAs are likewise grouped together and should be checked + // via EnableAVX512 instructionSetFlags.AddInstructionSet(InstructionSet_AVX512); } - if ((JitConfig.EnableAVX512VBMI() != 0) && - (JitConfig.EnableAVX512VBMI_VL() != 0)) + if ((JitConfig.EnableAVX512VBMI() != 0) && (JitConfig.EnableAVX512VBMI_VL() != 0)) { - // These ISAs are likewise grouped together + // These ISAs are likewise grouped together and should be checked + // via EnableAVX512VBMI instructionSetFlags.AddInstructionSet(InstructionSet_AVX512VBMI); } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8b3f435562b415..417c20880359e9 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8918,7 +8918,7 @@ class Compiler { #ifdef FEATURE_SIMD #if defined(TARGET_XARCH) - CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2; + CORINFO_InstructionSet minimumIsa = InstructionSet_X86Base; #elif defined(TARGET_ARM64) CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd; #elif defined(TARGET_LOONGARCH64) @@ -8940,7 +8940,7 @@ class Compiler { #ifdef FEATURE_SIMD #if defined(TARGET_XARCH) - CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2; + CORINFO_InstructionSet minimumIsa = InstructionSet_X86Base; #elif defined(TARGET_ARM64) CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd; #else @@ -9234,7 +9234,7 @@ class Compiler { return YMM_REGSIZE_BYTES; } - else if (compOpportunisticallyDependsOn(InstructionSet_SSE)) + else if (compOpportunisticallyDependsOn(InstructionSet_X86Base)) { return XMM_REGSIZE_BYTES; } diff --git a/src/coreclr/jit/decomposelongs.cpp b/src/coreclr/jit/decomposelongs.cpp index 12466cc8cb390c..4cd474c9338d80 100644 --- a/src/coreclr/jit/decomposelongs.cpp +++ b/src/coreclr/jit/decomposelongs.cpp @@ -1970,7 +1970,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicToScalar(LIR::Use& use, GenTreeHWIn } else { - assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_SSE2)); + assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_X86Base)); GenTree* thirtyTwo = m_compiler->gtNewIconNode(32); GenTree* shift = m_compiler->gtNewSimdBinOpNode(GT_RSZ, op1->TypeGet(), simdTmpVar, thirtyTwo, diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index f32e14ad4b1e13..6998273e33d960 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -3424,9 +3424,9 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block) switch (intrinsicId) { #if defined(TARGET_XARCH) - case NI_SSE_StoreFence: - case NI_SSE2_LoadFence: - case NI_SSE2_MemoryFence: + case NI_X86Base_LoadFence: + case NI_X86Base_MemoryFence: + case NI_X86Base_StoreFence: case NI_X86Serialize_Serialize: { assert(tree->OperRequiresAsgFlag()); @@ -3435,10 +3435,10 @@ void Compiler::fgDebugCheckFlags(GenTree* tree, BasicBlock* block) } case NI_X86Base_Pause: - case NI_SSE_Prefetch0: - case NI_SSE_Prefetch1: - case NI_SSE_Prefetch2: - case NI_SSE_PrefetchNonTemporal: + case NI_X86Base_Prefetch0: + case NI_X86Base_Prefetch1: + case NI_X86Base_Prefetch2: + case NI_X86Base_PrefetchNonTemporal: { assert(tree->OperRequiresCallFlag(this)); expectedFlags |= GTF_GLOB_REF; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index faae11cfec630c..d33eaaa62ad4c5 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20292,14 +20292,8 @@ bool GenTree::isCommutativeHWIntrinsic() const switch (id) { #ifdef TARGET_XARCH - case NI_SSE_Max: - case NI_SSE_Min: - { - return false; - } - - case NI_SSE2_Max: - case NI_SSE2_Min: + case NI_X86Base_Max: + case NI_X86Base_Min: { return !varTypeIsFloating(node->GetSimdBaseType()); } @@ -20342,10 +20336,8 @@ bool GenTree::isContainableHWIntrinsic() const #ifdef TARGET_XARCH switch (AsHWIntrinsic()->GetHWIntrinsicId()) { - case NI_SSE_LoadAlignedVector128: - case NI_SSE_LoadScalarVector128: - case NI_SSE2_LoadAlignedVector128: - case NI_SSE2_LoadScalarVector128: + case NI_X86Base_LoadAlignedVector128: + case NI_X86Base_LoadScalarVector128: case NI_AVX_LoadAlignedVector256: case NI_AVX512_LoadAlignedVector512: { @@ -20369,11 +20361,11 @@ bool GenTree::isContainableHWIntrinsic() const case NI_Vector128_ToScalar: case NI_Vector256_ToScalar: case NI_Vector512_ToScalar: - case NI_SSE2_ConvertToInt32: - case NI_SSE2_ConvertToUInt32: - case NI_SSE2_X64_ConvertToInt64: - case NI_SSE2_X64_ConvertToUInt64: - case NI_SSE2_Extract: + case NI_X86Base_ConvertToInt32: + case NI_X86Base_ConvertToUInt32: + case NI_X86Base_X64_ConvertToInt64: + case NI_X86Base_X64_ConvertToUInt64: + case NI_X86Base_Extract: case NI_SSE41_Extract: case NI_SSE41_X64_Extract: case NI_AVX_ExtractVector128: @@ -21378,31 +21370,32 @@ GenTree* Compiler::gtNewSimdBinOpNode( // op1Dup = Sse2.ShiftRightLogical128BitLane(op1Dup, 4) op1Dup = gtNewSimdHWIntrinsicNode(type, op1Dup, gtNewIconNode(4, TYP_INT), - NI_SSE2_ShiftRightLogical128BitLane, simdBaseJitType, simdSize); + NI_X86Base_ShiftRightLogical128BitLane, simdBaseJitType, simdSize); // op2Dup = Sse2.ShiftRightLogical128BitLane(op2Dup, 4) op2Dup = gtNewSimdHWIntrinsicNode(type, op2Dup, gtNewIconNode(4, TYP_INT), - NI_SSE2_ShiftRightLogical128BitLane, simdBaseJitType, simdSize); + NI_X86Base_ShiftRightLogical128BitLane, simdBaseJitType, simdSize); // op2Dup = Sse2.Multiply(op1Dup.AsUInt32(), op2Dup.AsUInt32()).AsInt32() - op2Dup = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_SSE2_Multiply, CORINFO_TYPE_ULONG, simdSize); + op2Dup = + gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_X86Base_Multiply, CORINFO_TYPE_ULONG, simdSize); // op2Dup = Sse2.Shuffle(op2Dup, (0, 0, 2, 0)) - op2Dup = gtNewSimdHWIntrinsicNode(type, op2Dup, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), NI_SSE2_Shuffle, - simdBaseJitType, simdSize); + op2Dup = gtNewSimdHWIntrinsicNode(type, op2Dup, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), + NI_X86Base_Shuffle, simdBaseJitType, simdSize); // op1 = Sse2.Multiply(op1.AsUInt32(), op2.AsUInt32()).AsInt32() - op1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSE2_Multiply, CORINFO_TYPE_ULONG, simdSize); + op1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_X86Base_Multiply, CORINFO_TYPE_ULONG, simdSize); // op1 = Sse2.Shuffle(op1, (0, 0, 2, 0)) - op1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), NI_SSE2_Shuffle, + op1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(SHUFFLE_XXZX, TYP_INT), NI_X86Base_Shuffle, simdBaseJitType, simdSize); // op2 = op2Dup; op2 = op2Dup; // result = Sse2.UnpackLow(op1, op2) - return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSE2_UnpackLow, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, op2, NI_X86Base_UnpackLow, simdBaseJitType, simdSize); } else if (varTypeIsLong(simdBaseType)) { @@ -21415,7 +21408,7 @@ GenTree* Compiler::gtNewSimdBinOpNode( assert(!canUseEvexEncodingDebugOnly()); assert((simdSize == 16) || compIsaSupportedDebugOnly(InstructionSet_AVX2)); - NamedIntrinsic muludq = (simdSize == 16) ? NI_SSE2_Multiply : NI_AVX2_Multiply; + NamedIntrinsic muludq = (simdSize == 16) ? NI_X86Base_Multiply : NI_AVX2_Multiply; GenTree* op1Dup1 = fgMakeMultiUse(&op1); GenTree* op1Dup2 = gtCloneExpr(op1Dup1); @@ -21776,7 +21769,7 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, case 16: { - hwIntrinsicID = NI_SSE2_ConvertToVector128Int32WithTruncation; + hwIntrinsicID = NI_X86Base_ConvertToVector128Int32WithTruncation; break; } @@ -22046,7 +22039,7 @@ GenTree* Compiler::gtNewSimdCmpOpNode( GenTree* tmp = gtNewSimdCmpOpNode(op, type, op1, op2, CORINFO_TYPE_INT, simdSize); op1 = fgMakeMultiUse(&tmp); - op2 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(SHUFFLE_ZWXY), NI_SSE2_Shuffle, CORINFO_TYPE_INT, + op2 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(SHUFFLE_ZWXY), NI_X86Base_Shuffle, CORINFO_TYPE_INT, simdSize); return gtNewSimdBinOpNode(GT_AND, type, tmp, op2, simdBaseJitType, simdSize); @@ -22250,11 +22243,11 @@ GenTree* Compiler::gtNewSimdCmpOpNode( GenTree* u = gtNewSimdCmpOpNode(GT_EQ, type, op1Dup1, op2Dup1, CORINFO_TYPE_INT, simdSize); GenTree* v = gtNewSimdCmpOpNode(op, type, op1Dup2, op2Dup2, CORINFO_TYPE_UINT, simdSize); - op1 = gtNewSimdHWIntrinsicNode(type, t, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle, + op1 = gtNewSimdHWIntrinsicNode(type, t, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_X86Base_Shuffle, CORINFO_TYPE_INT, simdSize); - u = gtNewSimdHWIntrinsicNode(type, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_SSE2_Shuffle, + u = gtNewSimdHWIntrinsicNode(type, u, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), NI_X86Base_Shuffle, CORINFO_TYPE_INT, simdSize); - v = gtNewSimdHWIntrinsicNode(type, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_SSE2_Shuffle, + v = gtNewSimdHWIntrinsicNode(type, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_X86Base_Shuffle, CORINFO_TYPE_INT, simdSize); op2 = gtNewSimdBinOpNode(GT_AND, type, u, v, simdBaseJitType, simdSize); @@ -23337,7 +23330,7 @@ GenTree* Compiler::gtNewSimdGetElementNode( case TYP_SHORT: case TYP_USHORT: { - assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); + assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); break; } @@ -24165,13 +24158,9 @@ GenTree* Compiler::gtNewSimdLoadAlignedNode(var_types type, assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); intrinsic = NI_AVX_LoadAlignedVector256; } - else if (simdBaseType != TYP_FLOAT) - { - intrinsic = NI_SSE2_LoadAlignedVector128; - } else { - intrinsic = NI_SSE_LoadAlignedVector128; + intrinsic = NI_X86Base_LoadAlignedVector128; } assert(intrinsic != NI_Illegal); @@ -24246,13 +24235,9 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(var_types type, intrinsic = NI_SSE41_LoadAlignedVector128NonTemporal; isNonTemporal = true; } - else if (simdBaseType != TYP_FLOAT) - { - intrinsic = NI_SSE2_LoadAlignedVector128; - } else { - intrinsic = NI_SSE_LoadAlignedVector128; + intrinsic = NI_X86Base_LoadAlignedVector128; } if (isNonTemporal) @@ -24483,16 +24468,11 @@ GenTree* Compiler::gtNewSimdMaxNativeNode( } case TYP_FLOAT: - { - intrinsic = NI_SSE_Max; - break; - } - case TYP_UBYTE: case TYP_SHORT: case TYP_DOUBLE: { - intrinsic = NI_SSE2_Max; + intrinsic = NI_X86Base_Max; break; } @@ -24728,16 +24708,11 @@ GenTree* Compiler::gtNewSimdMinNativeNode( } case TYP_FLOAT: - { - intrinsic = NI_SSE_Min; - break; - } - case TYP_UBYTE: case TYP_SHORT: case TYP_DOUBLE: { - intrinsic = NI_SSE2_Min; + intrinsic = NI_X86Base_Min; break; } @@ -24916,7 +24891,7 @@ GenTree* Compiler::gtNewSimdNarrowNode( } else { - intrinsicId = NI_SSE2_ConvertToVector128Single; + intrinsicId = NI_X86Base_ConvertToVector128Single; } opBaseJitType = CORINFO_TYPE_DOUBLE; @@ -24934,7 +24909,7 @@ GenTree* Compiler::gtNewSimdNarrowNode( if (simdSize == 16) { - return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE_MoveLowToHigh, CORINFO_TYPE_FLOAT, simdSize); + return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_MoveLowToHigh, CORINFO_TYPE_FLOAT, simdSize); } intrinsicId = (simdSize == 64) ? NI_Vector256_ToVector512Unsafe : NI_Vector128_ToVector256Unsafe; @@ -25106,7 +25081,7 @@ GenTree* Compiler::gtNewSimdNarrowNode( tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize); tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize); - return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE2_PackUnsignedSaturate, CORINFO_TYPE_UBYTE, + return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_PackUnsignedSaturate, CORINFO_TYPE_UBYTE, simdSize); } @@ -25164,18 +25139,18 @@ GenTree* Compiler::gtNewSimdNarrowNode( GenTree* op1Dup = fgMakeMultiUse(&op1); GenTree* op2Dup = fgMakeMultiUse(&op2); - tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSE2_UnpackLow, simdBaseJitType, simdSize); - tmp2 = - gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_SSE2_UnpackHigh, simdBaseJitType, simdSize); + tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_X86Base_UnpackLow, simdBaseJitType, simdSize); + tmp2 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_X86Base_UnpackHigh, simdBaseJitType, + simdSize); GenTree* tmp1Dup = fgMakeMultiUse(&tmp1); GenTree* tmp2Dup = fgMakeMultiUse(&tmp2); - tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE2_UnpackLow, simdBaseJitType, simdSize); - tmp4 = - gtNewSimdHWIntrinsicNode(type, tmp1Dup, tmp2Dup, NI_SSE2_UnpackHigh, simdBaseJitType, simdSize); + tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_UnpackLow, simdBaseJitType, simdSize); + tmp4 = gtNewSimdHWIntrinsicNode(type, tmp1Dup, tmp2Dup, NI_X86Base_UnpackHigh, simdBaseJitType, + simdSize); - return gtNewSimdHWIntrinsicNode(type, tmp3, tmp4, NI_SSE2_UnpackLow, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, tmp3, tmp4, NI_X86Base_UnpackLow, simdBaseJitType, simdSize); } } @@ -25196,10 +25171,10 @@ GenTree* Compiler::gtNewSimdNarrowNode( GenTree* op1Dup = fgMakeMultiUse(&op1); GenTree* op2Dup = fgMakeMultiUse(&op2); - tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSE2_UnpackLow, simdBaseJitType, simdSize); - tmp2 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_SSE2_UnpackHigh, simdBaseJitType, simdSize); + tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_X86Base_UnpackLow, simdBaseJitType, simdSize); + tmp2 = gtNewSimdHWIntrinsicNode(type, op1Dup, op2Dup, NI_X86Base_UnpackHigh, simdBaseJitType, simdSize); - return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE2_UnpackLow, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_UnpackLow, simdBaseJitType, simdSize); } case TYP_FLOAT: @@ -25217,10 +25192,12 @@ GenTree* Compiler::gtNewSimdNarrowNode( CorInfoType opBaseJitType = CORINFO_TYPE_DOUBLE; - tmp1 = gtNewSimdHWIntrinsicNode(type, op1, NI_SSE2_ConvertToVector128Single, opBaseJitType, simdSize); - tmp2 = gtNewSimdHWIntrinsicNode(type, op2, NI_SSE2_ConvertToVector128Single, opBaseJitType, simdSize); + tmp1 = + gtNewSimdHWIntrinsicNode(type, op1, NI_X86Base_ConvertToVector128Single, opBaseJitType, simdSize); + tmp2 = + gtNewSimdHWIntrinsicNode(type, op2, NI_X86Base_ConvertToVector128Single, opBaseJitType, simdSize); - return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_SSE_MoveLowToHigh, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_X86Base_MoveLowToHigh, simdBaseJitType, simdSize); } default: @@ -25530,7 +25507,8 @@ GenTree* Compiler::gtNewSimdShuffleVariableNode( } else { - op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_SSE2_ShiftLeftLogical, simdBaseJitType, simdSize); + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_X86Base_ShiftLeftLogical, simdBaseJitType, + simdSize); } // the below are implemented with float/int/uint @@ -25561,12 +25539,12 @@ GenTree* Compiler::gtNewSimdShuffleVariableNode( if (varTypeIsFloating(simdBaseType)) { GenTree* op2Dup = fgMakeMultiUse(&op2); - op2 = - gtNewSimdHWIntrinsicNode(type, op2, op2Dup, cnsNode, NI_SSE_Shuffle, simdBaseJitType, simdSize); + op2 = gtNewSimdHWIntrinsicNode(type, op2, op2Dup, cnsNode, NI_X86Base_Shuffle, simdBaseJitType, + simdSize); } else { - op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_SSE2_Shuffle, simdBaseJitType, simdSize); + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_X86Base_Shuffle, simdBaseJitType, simdSize); } } @@ -25758,7 +25736,7 @@ GenTree* Compiler::gtNewSimdShuffleVariableNode( // shift all indices to the left by tzcnt(size) cnsNode = gtNewIconNode(BitOperations::TrailingZeroCount(static_cast(elementSize)), TYP_INT); - op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_SSE2_ShiftLeftLogical, simdBaseJitType, simdSize); + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_X86Base_ShiftLeftLogical, simdBaseJitType, simdSize); // the below are implemented with byte/sbyte simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; @@ -26481,7 +26459,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( if (varTypeIsIntegral(simdBaseType)) { - retNode = gtNewSimdHWIntrinsicNode(type, op1, cnsNode, NI_SSE2_Shuffle, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(type, op1, cnsNode, NI_X86Base_Shuffle, simdBaseJitType, simdSize); } else if (compOpportunisticallyDependsOn(InstructionSet_AVX)) { @@ -26490,7 +26468,7 @@ GenTree* Compiler::gtNewSimdShuffleNode( else { // for double we need SSE2, but we can't use the integral path ^ because we still need op1Dup here - NamedIntrinsic ni = simdBaseType == TYP_DOUBLE ? NI_SSE2_Shuffle : NI_SSE_Shuffle; + NamedIntrinsic ni = NI_X86Base_Shuffle; GenTree* op1Dup = fgMakeMultiUse(&op1); retNode = gtNewSimdHWIntrinsicNode(type, op1, op1Dup, cnsNode, ni, simdBaseJitType, simdSize); } @@ -26588,13 +26566,9 @@ GenTree* Compiler::gtNewSimdSqrtNode(var_types type, GenTree* op1, CorInfoType s assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); intrinsic = NI_AVX512_Sqrt; } - else if (simdBaseType == TYP_FLOAT) - { - intrinsic = NI_SSE_Sqrt; - } else { - intrinsic = NI_SSE2_Sqrt; + intrinsic = NI_X86Base_Sqrt; } #elif defined(TARGET_ARM64) if ((simdSize == 8) && (simdBaseType == TYP_DOUBLE)) @@ -26677,13 +26651,9 @@ GenTree* Compiler::gtNewSimdStoreAlignedNode(GenTree* op1, GenTree* op2, CorInfo assert(compIsaSupportedDebugOnly(InstructionSet_AVX512)); intrinsic = NI_AVX512_StoreAligned; } - else if (simdBaseType != TYP_FLOAT) - { - intrinsic = NI_SSE2_StoreAligned; - } else { - intrinsic = NI_SSE_StoreAligned; + intrinsic = NI_X86Base_StoreAligned; } return gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, intrinsic, simdBaseJitType, simdSize); @@ -26740,13 +26710,9 @@ GenTree* Compiler::gtNewSimdStoreNonTemporalNode(GenTree* op1, assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); intrinsic = NI_AVX_StoreAlignedNonTemporal; } - else if (simdBaseType != TYP_FLOAT) - { - intrinsic = NI_SSE2_StoreAlignedNonTemporal; - } else { - intrinsic = NI_SSE_StoreAlignedNonTemporal; + intrinsic = NI_X86Base_StoreAlignedNonTemporal; } return gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, intrinsic, simdBaseJitType, simdSize); @@ -26834,7 +26800,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { if (simdBaseType == TYP_FLOAT) { - assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); + assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); GenTree* op1Shuffled = fgMakeMultiUse(&op1); if (compOpportunisticallyDependsOn(InstructionSet_AVX)) @@ -26852,17 +26818,17 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si } else { - assert(compIsaSupportedDebugOnly(InstructionSet_SSE)); + assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); // The shuffle below gives us [0, 1, 2, 3] -> [1, 0, 3, 2] op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op1Shuffled, gtNewIconNode((int)0b10110001, TYP_INT), - NI_SSE_Shuffle, simdBaseJitType, simdSize); + NI_X86Base_Shuffle, simdBaseJitType, simdSize); op1Shuffled = fgMakeMultiUse(&op1Shuffled); // The add below now results in [0 + 1, 1 + 0, 2 + 3, 3 + 2] op1 = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD16, op1, op1Shuffled, simdBaseJitType, simdSize); op1Shuffled = fgMakeMultiUse(&op1); // The shuffle below gives us [0 + 1, 1 + 0, 2 + 3, 3 + 2] -> [2 + 3, 3 + 2, 0 + 1, 1 + 0] op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op1Shuffled, gtNewIconNode((int)0b01001110, TYP_INT), - NI_SSE_Shuffle, simdBaseJitType, simdSize); + NI_X86Base_Shuffle, simdBaseJitType, simdSize); op1Shuffled = fgMakeMultiUse(&op1Shuffled); } // Finally adding the results gets us [(0 + 1) + (2 + 3), (1 + 0) + (3 + 2), (2 + 3) + (0 + 1), (3 + 2) + (1 @@ -26872,7 +26838,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si } else { - assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); + assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); GenTree* op1Shuffled = fgMakeMultiUse(&op1); if (compOpportunisticallyDependsOn(InstructionSet_AVX)) @@ -26886,7 +26852,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { // The shuffle below gives us [0, 1] -> [1, 0] op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op1Shuffled, gtNewIconNode((int)0b0001, TYP_INT), - NI_SSE2_Shuffle, simdBaseJitType, simdSize); + NI_X86Base_Shuffle, simdBaseJitType, simdSize); op1Shuffled = fgMakeMultiUse(&op1Shuffled); } // Finally adding the results gets us [0 + 1, 1 + 0] @@ -26909,7 +26875,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si { tmp = fgMakeMultiUse(&op1); opShifted = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode(shiftVal, TYP_INT), - NI_SSE2_ShiftRightLogical128BitLane, simdBaseJitType, simdSize); + NI_X86Base_ShiftRightLogical128BitLane, simdBaseJitType, simdSize); op1 = gtNewSimdBinOpNode(GT_ADD, TYP_SIMD16, opShifted, tmp, simdBaseJitType, simdSize); shiftVal = shiftVal / 2; } @@ -27401,7 +27367,7 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo case TYP_FLOAT: { - intrinsic = NI_SSE2_ConvertToVector128Double; + intrinsic = NI_X86Base_ConvertToVector128Double; break; } @@ -27422,10 +27388,10 @@ GenTree* Compiler::gtNewSimdWidenLowerNode(var_types type, GenTree* op1, CorInfo { GenTree* op1Dup = fgMakeMultiUse(&op1); - tmp1 = gtNewSimdHWIntrinsicNode(type, op1Dup, tmp1, NI_SSE2_CompareLessThan, simdBaseJitType, simdSize); + tmp1 = gtNewSimdHWIntrinsicNode(type, op1Dup, tmp1, NI_X86Base_CompareLessThan, simdBaseJitType, simdSize); } - return gtNewSimdHWIntrinsicNode(type, op1, tmp1, NI_SSE2_UnpackLow, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, tmp1, NI_X86Base_UnpackLow, simdBaseJitType, simdSize); } #elif defined(TARGET_ARM64) if (simdSize == 16) @@ -27594,12 +27560,12 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo GenTree* op1Dup = fgMakeMultiUse(&op1); - tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op1Dup, NI_SSE_MoveHighToLow, simdBaseJitType, simdSize); - return gtNewSimdHWIntrinsicNode(type, tmp1, NI_SSE2_ConvertToVector128Double, simdBaseJitType, simdSize); + tmp1 = gtNewSimdHWIntrinsicNode(type, op1, op1Dup, NI_X86Base_MoveHighToLow, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, tmp1, NI_X86Base_ConvertToVector128Double, simdBaseJitType, simdSize); } else if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) { - tmp1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(8), NI_SSE2_ShiftRightLogical128BitLane, + tmp1 = gtNewSimdHWIntrinsicNode(type, op1, gtNewIconNode(8), NI_X86Base_ShiftRightLogical128BitLane, simdBaseJitType, simdSize); switch (simdBaseType) @@ -27642,10 +27608,10 @@ GenTree* Compiler::gtNewSimdWidenUpperNode(var_types type, GenTree* op1, CorInfo { GenTree* op1Dup = fgMakeMultiUse(&op1); - tmp1 = gtNewSimdHWIntrinsicNode(type, op1Dup, tmp1, NI_SSE2_CompareLessThan, simdBaseJitType, simdSize); + tmp1 = gtNewSimdHWIntrinsicNode(type, op1Dup, tmp1, NI_X86Base_CompareLessThan, simdBaseJitType, simdSize); } - return gtNewSimdHWIntrinsicNode(type, op1, tmp1, NI_SSE2_UnpackHigh, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, op1, tmp1, NI_X86Base_UnpackHigh, simdBaseJitType, simdSize); } #elif defined(TARGET_ARM64) if (simdSize == 16) @@ -27725,7 +27691,7 @@ GenTree* Compiler::gtNewSimdWithElementNode( case TYP_FLOAT: case TYP_SHORT: case TYP_USHORT: - assert(compIsaSupportedDebugOnly(InstructionSet_SSE2)); + assert(compIsaSupportedDebugOnly(InstructionSet_X86Base)); break; default: @@ -28041,10 +28007,8 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const switch (intrinsicId) { #ifdef TARGET_XARCH - case NI_SSE_LoadLow: - case NI_SSE_LoadHigh: - case NI_SSE2_LoadLow: - case NI_SSE2_LoadHigh: + case NI_X86Base_LoadLow: + case NI_X86Base_LoadHigh: addr = Op(2); break; #endif // TARGET_XARCH @@ -28248,7 +28212,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore(GenTree** pAddr) const switch (intrinsicId) { #ifdef TARGET_XARCH - case NI_SSE2_MaskMove: + case NI_X86Base_MaskMove: addr = Op(3); break; @@ -28531,10 +28495,10 @@ bool GenTreeHWIntrinsic::OperRequiresCallFlag() const { #if defined(TARGET_XARCH) case NI_X86Base_Pause: - case NI_SSE_Prefetch0: - case NI_SSE_Prefetch1: - case NI_SSE_Prefetch2: - case NI_SSE_PrefetchNonTemporal: + case NI_X86Base_Prefetch0: + case NI_X86Base_Prefetch1: + case NI_X86Base_Prefetch2: + case NI_X86Base_PrefetchNonTemporal: { return true; } @@ -28725,9 +28689,9 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId) switch (intrinsicId) { #if defined(TARGET_XARCH) - case NI_SSE_StoreFence: - case NI_SSE2_LoadFence: - case NI_SSE2_MemoryFence: + case NI_X86Base_LoadFence: + case NI_X86Base_MemoryFence: + case NI_X86Base_StoreFence: case NI_X86Serialize_Serialize: { // Mark as a store and global reference, much as is done for GT_MEMORYBARRIER @@ -28736,10 +28700,10 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId) } case NI_X86Base_Pause: - case NI_SSE_Prefetch0: - case NI_SSE_Prefetch1: - case NI_SSE_Prefetch2: - case NI_SSE_PrefetchNonTemporal: + case NI_X86Base_Prefetch0: + case NI_X86Base_Prefetch1: + case NI_X86Base_Prefetch2: + case NI_X86Base_PrefetchNonTemporal: { // Mark as a call and global reference, much as is done for GT_KEEPALIVE gtFlags |= (GTF_CALL | GTF_GLOB_REF); @@ -28807,8 +28771,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty switch (id) { #if defined(TARGET_XARCH) - case NI_SSE_And: - case NI_SSE2_And: + case NI_X86Base_And: case NI_AVX_And: case NI_AVX2_And: case NI_AVX512_And: @@ -28830,8 +28793,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_Xor: - case NI_SSE2_Xor: + case NI_X86Base_Xor: case NI_AVX_Xor: case NI_AVX2_Xor: case NI_AVX512_Xor: @@ -28844,8 +28806,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_Or: - case NI_SSE2_Or: + case NI_X86Base_Or: case NI_AVX_Or: case NI_AVX2_Or: case NI_AVX512_Or: @@ -28858,8 +28819,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_AndNot: - case NI_SSE2_AndNot: + case NI_X86Base_AndNot: case NI_AVX_AndNot: case NI_AVX2_AndNot: case NI_AVX512_AndNot: @@ -28872,8 +28832,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_Add: - case NI_SSE2_Add: + case NI_X86Base_Add: case NI_AVX_Add: case NI_AVX2_Add: case NI_AVX512_Add: @@ -28886,8 +28845,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_AddScalar: - case NI_SSE2_AddScalar: + case NI_X86Base_AddScalar: case NI_AVX512_AddScalar: { *isScalar = true; @@ -28907,8 +28865,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_Divide: - case NI_SSE2_Divide: + case NI_X86Base_Divide: case NI_AVX_Divide: case NI_AVX512_Divide: #elif defined(TARGET_ARM64) @@ -28919,8 +28876,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_DivideScalar: - case NI_SSE2_DivideScalar: + case NI_X86Base_DivideScalar: case NI_AVX512_DivideScalar: { *isScalar = true; @@ -28940,8 +28896,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_Multiply: - case NI_SSE2_MultiplyLow: + case NI_X86Base_MultiplyLow: case NI_SSE41_MultiplyLow: case NI_AVX_Multiply: case NI_AVX2_MultiplyLow: @@ -28955,7 +28910,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE2_Multiply: + case NI_X86Base_Multiply: case NI_AVX512_Multiply: { if (varTypeIsFloating(simdBaseType)) @@ -28967,8 +28922,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_MultiplyScalar: - case NI_SSE2_MultiplyScalar: + case NI_X86Base_MultiplyScalar: case NI_AVX512_MultiplyScalar: { *isScalar = true; @@ -29020,7 +28974,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif // TARGET_XARCH #if defined(TARGET_XARCH) - case NI_SSE2_ShiftLeftLogical: + case NI_X86Base_ShiftLeftLogical: case NI_AVX2_ShiftLeftLogical: case NI_AVX2_ShiftLeftLogicalVariable: case NI_AVX512_ShiftLeftLogical: @@ -29044,7 +28998,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE2_ShiftRightArithmetic: + case NI_X86Base_ShiftRightArithmetic: case NI_AVX2_ShiftRightArithmetic: case NI_AVX2_ShiftRightArithmeticVariable: case NI_AVX512_ShiftRightArithmetic: @@ -29068,7 +29022,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE2_ShiftRightLogical: + case NI_X86Base_ShiftRightLogical: case NI_AVX2_ShiftRightLogical: case NI_AVX2_ShiftRightLogicalVariable: case NI_AVX512_ShiftRightLogical: @@ -29092,8 +29046,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_Subtract: - case NI_SSE2_Subtract: + case NI_X86Base_Subtract: case NI_AVX_Subtract: case NI_AVX2_Subtract: case NI_AVX512_Subtract: @@ -29106,8 +29059,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_SubtractScalar: - case NI_SSE2_SubtractScalar: + case NI_X86Base_SubtractScalar: case NI_AVX512_SubtractScalar: { *isScalar = true; @@ -29127,8 +29079,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_CompareEqual: - case NI_SSE2_CompareEqual: + case NI_X86Base_CompareEqual: case NI_SSE41_CompareEqual: case NI_AVX_CompareEqual: case NI_AVX2_CompareEqual: @@ -29142,8 +29093,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_CompareScalarEqual: - case NI_SSE2_CompareScalarEqual: + case NI_X86Base_CompareScalarEqual: { *isScalar = true; return GT_EQ; @@ -29162,8 +29112,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_CompareGreaterThan: - case NI_SSE2_CompareGreaterThan: + case NI_X86Base_CompareGreaterThan: case NI_SSE42_CompareGreaterThan: case NI_AVX_CompareGreaterThan: case NI_AVX2_CompareGreaterThan: @@ -29177,8 +29126,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_CompareScalarGreaterThan: - case NI_SSE2_CompareScalarGreaterThan: + case NI_X86Base_CompareScalarGreaterThan: { *isScalar = true; return GT_GT; @@ -29197,8 +29145,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_CompareGreaterThanOrEqual: - case NI_SSE2_CompareGreaterThanOrEqual: + case NI_X86Base_CompareGreaterThanOrEqual: case NI_AVX_CompareGreaterThanOrEqual: case NI_AVX512_CompareGreaterThanOrEqualMask: #elif defined(TARGET_ARM64) @@ -29210,8 +29157,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_CompareScalarGreaterThanOrEqual: - case NI_SSE2_CompareScalarGreaterThanOrEqual: + case NI_X86Base_CompareScalarGreaterThanOrEqual: { *isScalar = true; return GT_GE; @@ -29230,8 +29176,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_CompareLessThan: - case NI_SSE2_CompareLessThan: + case NI_X86Base_CompareLessThan: case NI_SSE42_CompareLessThan: case NI_AVX_CompareLessThan: case NI_AVX2_CompareLessThan: @@ -29245,8 +29190,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_CompareScalarLessThan: - case NI_SSE2_CompareScalarLessThan: + case NI_X86Base_CompareScalarLessThan: { *isScalar = true; return GT_LT; @@ -29265,8 +29209,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_CompareLessThanOrEqual: - case NI_SSE2_CompareLessThanOrEqual: + case NI_X86Base_CompareLessThanOrEqual: case NI_AVX_CompareLessThanOrEqual: case NI_AVX512_CompareLessThanOrEqualMask: #elif defined(TARGET_ARM64) @@ -29278,8 +29221,7 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty } #if defined(TARGET_XARCH) - case NI_SSE_CompareScalarLessThanOrEqual: - case NI_SSE2_CompareScalarLessThanOrEqual: + case NI_X86Base_CompareScalarLessThanOrEqual: { *isScalar = true; return GT_LE; @@ -29298,16 +29240,14 @@ genTreeOps GenTreeHWIntrinsic::GetOperForHWIntrinsicId(NamedIntrinsic id, var_ty #endif #if defined(TARGET_XARCH) - case NI_SSE_CompareNotEqual: - case NI_SSE2_CompareNotEqual: + case NI_X86Base_CompareNotEqual: case NI_AVX_CompareNotEqual: case NI_AVX512_CompareNotEqualMask: { return GT_NE; } - case NI_SSE_CompareScalarNotEqual: - case NI_SSE2_CompareScalarNotEqual: + case NI_X86Base_CompareScalarNotEqual: { *isScalar = true; return GT_NE; @@ -29502,14 +29442,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_AVX_Add; } } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_AddScalar : NI_SSE_Add; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_AddScalar : NI_SSE2_Add; + id = isScalar ? NI_X86Base_AddScalar : NI_X86Base_Add; } #elif defined(TARGET_ARM64) if ((simdSize == 8) && (isScalar || (genTypeSize(simdBaseType) == 8))) @@ -29557,14 +29492,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_AVX_And; } } - else if (simdBaseType == TYP_FLOAT) - { - id = NI_SSE_And; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_And; + id = NI_X86Base_And; } #elif defined(TARGET_ARM64) id = NI_AdvSimd_And; @@ -29609,14 +29539,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_AVX_AndNot; } } - else if (simdBaseType == TYP_FLOAT) - { - id = NI_SSE_AndNot; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_AndNot; + id = NI_X86Base_AndNot; } #elif defined(TARGET_ARM64) @@ -29645,14 +29570,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, { id = NI_AVX_Divide; } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_DivideScalar : NI_SSE_Divide; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_DivideScalar : NI_SSE2_Divide; + id = isScalar ? NI_X86Base_DivideScalar : NI_X86Base_Divide; } } #elif defined(TARGET_ARM64) @@ -29697,8 +29617,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_ShiftLeftLogical; + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); + id = NI_X86Base_ShiftLeftLogical; } } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) @@ -29715,8 +29635,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsInt(op2)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_ShiftLeftLogical; + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); + id = NI_X86Base_ShiftLeftLogical; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) { @@ -29779,14 +29699,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_AVX2_MultiplyLow; } } - else if (simdBaseType == TYP_FLOAT) + else if (varTypeIsFloating(simdBaseType)) { - id = isScalar ? NI_SSE_MultiplyScalar : NI_SSE_Multiply; - } - else if (simdBaseType == TYP_DOUBLE) - { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_MultiplyScalar : NI_SSE2_Multiply; + id = isScalar ? NI_X86Base_MultiplyScalar : NI_X86Base_Multiply; } else if (varTypeIsInt(simdBaseType)) { @@ -29797,8 +29712,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsShort(simdBaseType)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_MultiplyLow; + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); + id = NI_X86Base_MultiplyLow; } #elif defined(TARGET_ARM64) if ((simdSize == 8) && (isScalar || (simdBaseType == TYP_DOUBLE))) @@ -29846,14 +29761,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_AVX_Or; } } - else if (simdBaseType == TYP_FLOAT) - { - id = NI_SSE_Or; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_Or; + id = NI_X86Base_Or; } #elif defined(TARGET_ARM64) @@ -29948,8 +29858,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_ShiftRightArithmetic; + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); + id = NI_X86Base_ShiftRightArithmetic; } } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) @@ -29966,8 +29876,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsInt(op2)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_ShiftRightArithmetic; + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); + id = NI_X86Base_ShiftRightArithmetic; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) { @@ -30016,8 +29926,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_ShiftRightLogical; + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); + id = NI_X86Base_ShiftRightLogical; } } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX512)) @@ -30034,8 +29944,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, } else if (varTypeIsInt(op2)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_ShiftRightLogical; + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); + id = NI_X86Base_ShiftRightLogical; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX2)) { @@ -30083,14 +29993,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_AVX_Subtract; } } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_SubtractScalar : NI_SSE_Subtract; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_SubtractScalar : NI_SSE2_Subtract; + id = isScalar ? NI_X86Base_SubtractScalar : NI_X86Base_Subtract; } #elif defined(TARGET_ARM64) if ((simdSize == 8) && (isScalar || (genTypeSize(simdBaseType) == 8))) @@ -30138,14 +30043,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp, id = NI_AVX_Xor; } } - else if (simdBaseType == TYP_FLOAT) - { - id = NI_SSE_Xor; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_Xor; + id = NI_X86Base_Xor; } #elif defined(TARGET_ARM64) id = NI_AdvSimd_Xor; @@ -30246,10 +30146,6 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, id = NI_AVX_CompareEqual; } } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_CompareScalarEqual : NI_SSE_CompareEqual; - } else if (varTypeIsLong(simdBaseType)) { if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) @@ -30259,8 +30155,7 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_CompareScalarEqual : NI_SSE2_CompareEqual; + id = isScalar ? NI_X86Base_CompareScalarEqual : NI_X86Base_CompareEqual; } #elif defined(TARGET_ARM64) if (genTypeSize(simdBaseType) == 8) @@ -30293,14 +30188,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, { id = NI_AVX_CompareGreaterThanOrEqual; } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_CompareScalarGreaterThanOrEqual : NI_SSE_CompareGreaterThanOrEqual; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_CompareScalarGreaterThanOrEqual : NI_SSE2_CompareGreaterThanOrEqual; + id = isScalar ? NI_X86Base_CompareScalarGreaterThanOrEqual : NI_X86Base_CompareGreaterThanOrEqual; } #elif defined(TARGET_ARM64) if (genTypeSize(simdBaseType) == 8) @@ -30343,8 +30233,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_CompareGreaterThan; + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); + id = NI_X86Base_CompareGreaterThan; } } else @@ -30357,14 +30247,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, { id = NI_AVX_CompareGreaterThan; } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_CompareScalarGreaterThan : NI_SSE_CompareGreaterThan; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_CompareScalarGreaterThan : NI_SSE2_CompareGreaterThan; + id = isScalar ? NI_X86Base_CompareScalarGreaterThan : NI_X86Base_CompareGreaterThan; } #elif defined(TARGET_ARM64) if (genTypeSize(simdBaseType) == 8) @@ -30397,14 +30282,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, { id = NI_AVX_CompareLessThanOrEqual; } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_CompareScalarLessThanOrEqual : NI_SSE_CompareLessThanOrEqual; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_CompareScalarLessThanOrEqual : NI_SSE2_CompareLessThanOrEqual; + id = isScalar ? NI_X86Base_CompareScalarLessThanOrEqual : NI_X86Base_CompareLessThanOrEqual; } #elif defined(TARGET_ARM64) if (genTypeSize(simdBaseType) == 8) @@ -30447,8 +30327,8 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = NI_SSE2_CompareLessThan; + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); + id = NI_X86Base_CompareLessThan; } } else @@ -30461,14 +30341,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, { id = NI_AVX_CompareLessThan; } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_CompareScalarLessThan : NI_SSE_CompareLessThan; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_CompareScalarLessThan : NI_SSE2_CompareLessThan; + id = isScalar ? NI_X86Base_CompareScalarLessThan : NI_X86Base_CompareLessThan; } #elif defined(TARGET_ARM64) if (genTypeSize(simdBaseType) == 8) @@ -30501,14 +30376,9 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForCmpOp(Compiler* comp, { id = NI_AVX_CompareNotEqual; } - else if (simdBaseType == TYP_FLOAT) - { - id = isScalar ? NI_SSE_CompareScalarNotEqual : NI_SSE_CompareNotEqual; - } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - id = isScalar ? NI_SSE2_CompareScalarNotEqual : NI_SSE2_CompareNotEqual; + id = isScalar ? NI_X86Base_CompareScalarNotEqual : NI_X86Base_CompareNotEqual; } #endif // TARGET_XARCH break; @@ -30700,8 +30570,7 @@ bool GenTreeHWIntrinsic::ShouldConstantProp(GenTree* operand, GenTreeVecCon* vec } #if defined(TARGET_XARCH) - case NI_SSE_Xor: - case NI_SSE2_Xor: + case NI_X86Base_Xor: case NI_AVX_Xor: case NI_AVX2_Xor: case NI_AVX512_Xor: diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 00d15b6749eaaa..24c0d4e533d461 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -903,8 +903,6 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { // clang-format off #if defined(TARGET_XARCH) { FIRST_NI_X86Base, LAST_NI_X86Base }, - { FIRST_NI_SSE, LAST_NI_SSE }, - { FIRST_NI_SSE2, LAST_NI_SSE2 }, { FIRST_NI_SSE3, LAST_NI_SSE3 }, { FIRST_NI_SSSE3, LAST_NI_SSSE3 }, { FIRST_NI_SSE41, LAST_NI_SSE41 }, @@ -938,8 +936,6 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { FIRST_NI_GFNI_V256, LAST_NI_GFNI_V256 }, { FIRST_NI_GFNI_V512, LAST_NI_GFNI_V512 }, { FIRST_NI_X86Base_X64, LAST_NI_X86Base_X64 }, - { FIRST_NI_SSE_X64, LAST_NI_SSE_X64 }, - { FIRST_NI_SSE2_X64, LAST_NI_SSE2_X64 }, { NI_Illegal, NI_Illegal }, // SSE3_X64 { NI_Illegal, NI_Illegal }, // SSSE3_X64 { FIRST_NI_SSE41_X64, LAST_NI_SSE41_X64 }, @@ -1226,7 +1222,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, if (isa == InstructionSet_Vector128) { - isa = InstructionSet_SSE2; + isa = InstructionSet_X86Base; vectorByteLength = 16; } else if (isa == InstructionSet_Vector256) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 0d35a5178e5487..8005cfa8922097 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -844,7 +844,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else { - assert(intrinsicId == NI_SSE2_MaskMove); + assert(intrinsicId == NI_X86Base_MaskMove); assert(targetReg == REG_NA); // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI @@ -971,19 +971,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case InstructionSet_SSE: - case InstructionSet_SSE_X64: - { - genSSEIntrinsic(node, instOptions); - break; - } - case InstructionSet_SSE2: - case InstructionSet_SSE2_X64: - { - genSSE2Intrinsic(node, instOptions); - break; - } - case InstructionSet_SSE41: case InstructionSet_SSE41_X64: { @@ -1821,7 +1808,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) regNumber targetReg = node->GetRegNum(); var_types baseType = node->GetSimdBaseType(); - assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE)); + assert(compiler->compIsaSupportedDebugOnly(InstructionSet_X86Base)); assert((baseType >= TYP_BYTE) && (baseType <= TYP_DOUBLE)); GenTree* op1 = (node->GetOperandCount() >= 1) ? node->Op(1) : nullptr; @@ -2374,6 +2361,10 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) { NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); + regNumber targetReg = node->GetRegNum(); + var_types targetType = node->TypeGet(); + var_types baseType = node->GetSimdBaseType(); + emitter* emit = GetEmitter(); genConsumeMultiOpOperands(node); @@ -2384,10 +2375,8 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) case NI_X86Base_X64_BitScanForward: case NI_X86Base_X64_BitScanReverse: { - GenTree* op1 = node->Op(1); - regNumber targetReg = node->GetRegNum(); - var_types targetType = node->TypeGet(); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType, compiler); + GenTree* op1 = node->Op(1); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType, compiler); genHWIntrinsic_R_RM(node, ins, emitTypeSize(targetType), targetReg, op1, instOptions); break; @@ -2396,7 +2385,7 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) case NI_X86Base_Pause: { assert(node->GetSimdBaseType() == TYP_UNKNOWN); - GetEmitter()->emitIns(INS_pause); + emit->emitIns(INS_pause); break; } @@ -2407,18 +2396,19 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) assert(instOptions == INS_OPTS_NONE); // SIMD base type is from signature and can distinguish signed and unsigned - var_types targetType = node->GetSimdBaseType(); - GenTree* op1 = node->Op(1); - GenTree* op2 = node->Op(2); - GenTree* op3 = node->Op(3); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType, compiler); + targetType = node->GetSimdBaseType(); + + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); + GenTree* op3 = node->Op(3); + + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType, compiler); regNumber op1Reg = op1->GetRegNum(); regNumber op2Reg = op2->GetRegNum(); regNumber op3Reg = op3->GetRegNum(); emitAttr attr = emitTypeSize(targetType); - emitter* emit = GetEmitter(); // op1: EAX, op2: EDX, op3: free assert(op1Reg != REG_EDX); @@ -2438,54 +2428,19 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } - default: - unreached(); - break; - } - - genProduceReg(node); -} - -//------------------------------------------------------------------------ -// genSSEIntrinsic: Generates the code for an SSE hardware intrinsic node -// -// Arguments: -// node - The hardware intrinsic node -// instOptions - The options used to when generating the instruction. -// -void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) -{ - NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); - regNumber targetReg = node->GetRegNum(); - var_types targetType = node->TypeGet(); - var_types baseType = node->GetSimdBaseType(); - emitter* emit = GetEmitter(); - - genConsumeMultiOpOperands(node); - - switch (intrinsicId) - { - case NI_SSE_X64_ConvertToInt64: - case NI_SSE_X64_ConvertToInt64WithTruncation: - { - assert(targetType == TYP_LONG); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); - genHWIntrinsic_R_RM(node, ins, EA_8BYTE, targetReg, node->Op(1), instOptions); - break; - } - - case NI_SSE_X64_ConvertScalarToVector128Single: + case NI_X86Base_X64_ConvertScalarToVector128Double: + case NI_X86Base_X64_ConvertScalarToVector128Single: { - assert(baseType == TYP_LONG); + assert(baseType == TYP_LONG || baseType == TYP_ULONG); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); genHWIntrinsic_R_R_RM(node, ins, EA_8BYTE, instOptions); break; } - case NI_SSE_Prefetch0: - case NI_SSE_Prefetch1: - case NI_SSE_Prefetch2: - case NI_SSE_PrefetchNonTemporal: + case NI_X86Base_Prefetch0: + case NI_X86Base_Prefetch1: + case NI_X86Base_Prefetch2: + case NI_X86Base_PrefetchNonTemporal: { assert(baseType == TYP_UBYTE); assert(instOptions == INS_OPTS_NONE); @@ -2497,63 +2452,21 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } - case NI_SSE_StoreFence: + case NI_X86Base_StoreFence: { assert(baseType == TYP_UNKNOWN); emit->emitIns(INS_sfence); break; } - default: - unreached(); - break; - } - - genProduceReg(node); -} - -//------------------------------------------------------------------------ -// genSSE2Intrinsic: Generates the code for an SSE2 hardware intrinsic node -// -// Arguments: -// node - The hardware intrinsic node -// instOptions - The options used to when generating the instruction. -// -void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) -{ - NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); - regNumber targetReg = node->GetRegNum(); - var_types targetType = node->TypeGet(); - var_types baseType = node->GetSimdBaseType(); - emitter* emit = GetEmitter(); - - genConsumeMultiOpOperands(node); - - switch (intrinsicId) - { - case NI_SSE2_X64_ConvertScalarToVector128Double: - { - assert(baseType == TYP_LONG); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); - genHWIntrinsic_R_R_RM(node, ins, EA_8BYTE, instOptions); - break; - } - - case NI_SSE2_X64_ConvertScalarToVector128Int64: - case NI_SSE2_X64_ConvertScalarToVector128UInt64: - { - assert(baseType == TYP_LONG || baseType == TYP_ULONG); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); - genHWIntrinsic_R_RM(node, ins, emitTypeSize(baseType), targetReg, node->Op(1), instOptions); - break; - } - - case NI_SSE2_ConvertToInt32: - case NI_SSE2_ConvertToInt32WithTruncation: - case NI_SSE2_ConvertToUInt32: - case NI_SSE2_X64_ConvertToInt64: - case NI_SSE2_X64_ConvertToInt64WithTruncation: - case NI_SSE2_X64_ConvertToUInt64: + case NI_X86Base_X64_ConvertScalarToVector128Int64: + case NI_X86Base_X64_ConvertScalarToVector128UInt64: + case NI_X86Base_ConvertToInt32: + case NI_X86Base_ConvertToInt32WithTruncation: + case NI_X86Base_ConvertToUInt32: + case NI_X86Base_X64_ConvertToInt64: + case NI_X86Base_X64_ConvertToInt64WithTruncation: + case NI_X86Base_X64_ConvertToUInt64: { emitAttr attr; if (varTypeIsIntegral(baseType)) @@ -2572,22 +2485,22 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } - case NI_SSE2_LoadFence: + case NI_X86Base_LoadFence: { assert(baseType == TYP_UNKNOWN); emit->emitIns(INS_lfence); break; } - case NI_SSE2_MemoryFence: + case NI_X86Base_MemoryFence: { assert(baseType == TYP_UNKNOWN); emit->emitIns(INS_mfence); break; } - case NI_SSE2_StoreNonTemporal: - case NI_SSE2_X64_StoreNonTemporal: + case NI_X86Base_StoreNonTemporal: + case NI_X86Base_X64_StoreNonTemporal: { assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler); diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index ce25ed1ad81c83..de4c3ac963c454 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -402,12 +402,126 @@ HARDWARE_INTRINSIC(Vector512, op_UnsignedRightShift, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // X86Base Intrinsics -#define FIRST_NI_X86Base NI_X86Base_BitScanForward -HARDWARE_INTRINSIC(X86Base, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_idiv, INS_div, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(X86Base, Pause, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -#define LAST_NI_X86Base NI_X86Base_Pause +#define FIRST_NI_X86Base NI_X86Base_Add +HARDWARE_INTRINSIC(X86Base, Add, 16, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, AddSaturate, 16, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, AddScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, And, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_pandd, INS_pandd, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_pandnd, INS_pandnd, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, Average, 16, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareEqual, 16, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareGreaterThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarOrderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareScalarUnorderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, CompareUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, ConvertScalarToVector128UInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_cvtsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si32, INS_cvttsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Int32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_idiv, INS_div, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(X86Base, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, DivideScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, Extract, 16, 2, {INS_invalid, INS_invalid, INS_pextrw, INS_pextrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, Insert, 16, 3, {INS_invalid, INS_invalid, INS_pinsrw, INS_pinsrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(X86Base, LoadAlignedVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, LoadFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Base, LoadHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_movhpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, LoadLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_movlpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, LoadScalarVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_movss, INS_movsd_simd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, LoadVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(X86Base, MaskMove, 16, 3, {INS_maskmovdqu, INS_maskmovdqu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, Max, 16, 2, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(X86Base, MaxScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_maxsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, MemoryFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Base, Min, 16, 2, {INS_invalid, INS_pminub, INS_pminsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(X86Base, MinScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_minsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, MoveHighToLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(X86Base, MoveLowToHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(X86Base, MoveMask, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(X86Base, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuludq, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, MultiplyAddAdjacent, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, MultiplyHigh, 16, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, MultiplyLow, 16, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(X86Base, MultiplyScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, Or, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_pord, INS_pord, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, PackSignedSaturate, 16, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, PackUnsignedSaturate, 16, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, Pause, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, Prefetch0, 0, 1, {INS_invalid, INS_prefetcht0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, Prefetch1, 0, 1, {INS_invalid, INS_prefetcht1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, Prefetch2, 0, 1, {INS_invalid, INS_prefetcht2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, PrefetchNonTemporal, 0, 1, {INS_invalid, INS_prefetchnta, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) +HARDWARE_INTRINSIC(X86Base, Reciprocal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ReciprocalScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ReciprocalSqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ReciprocalSqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(X86Base, ShiftLeftLogical, 16, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShiftLeftLogical128BitLane, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, ShiftRightArithmetic, 16, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShiftRightLogical, 16, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShiftRightLogical128BitLane, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShuffleHigh, 16, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, ShuffleLow, 16, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(X86Base, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, Store, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) +HARDWARE_INTRINSIC(X86Base, StoreFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) +HARDWARE_INTRINSIC(X86Base, StoreHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_movhpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti32, INS_movnti32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_movss, INS_movsd_simd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base, Subtract, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, SubtractSaturate, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, SubtractScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(X86Base, SumAbsoluteDifferences, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, UnpackHigh, 16, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, UnpackLow, 16, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(X86Base, Xor, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_pxord, INS_pxord, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) +#define LAST_NI_X86Base NI_X86Base_Xor // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -415,239 +529,18 @@ HARDWARE_INTRINSIC(X86Base, Pause, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // X86Base 64-bit-only Intrinsics #define FIRST_NI_X86Base_X64 NI_X86Base_X64_BitScanForward -HARDWARE_INTRINSIC(X86Base_X64, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base_X64, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(X86Base_X64, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) -#define LAST_NI_X86Base_X64 NI_X86Base_X64_DivRem - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// SSE Intrinsics -#define FIRST_NI_SSE NI_SSE_Add -HARDWARE_INTRINSIC(SSE, Add, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE, AddScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, And, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(SSE, CompareEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarOrderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarOrderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarOrderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarOrderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarOrderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarOrderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si32, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, ConvertToInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si32, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE, DivideScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, LoadAlignedVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, LoadHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, LoadLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, LoadScalarVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, LoadVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(SSE, Max, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(SSE, MaxScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, Min, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(SSE, MinScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, MoveHighToLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(SSE, MoveLowToHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(SSE, MoveMask, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, MoveScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(SSE, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE, MultiplyScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, Or, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE, Prefetch0, 0, 1, {INS_invalid, INS_prefetcht0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(SSE, Prefetch1, 0, 1, {INS_invalid, INS_prefetcht1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(SSE, Prefetch2, 0, 1, {INS_invalid, INS_prefetcht2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(SSE, PrefetchNonTemporal, 0, 1, {INS_invalid, INS_prefetchnta, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) -HARDWARE_INTRINSIC(SSE, Reciprocal, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, ReciprocalScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, ReciprocalSqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, ReciprocalSqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, Shuffle, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, Store, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE, StoreFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) -HARDWARE_INTRINSIC(SSE, StoreHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE, StoreLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE, Subtract, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE, SubtractScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, UnpackHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE, UnpackLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE, Xor, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -#define LAST_NI_SSE NI_SSE_Xor - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// SSE 64-bit-only Intrinsics -#define FIRST_NI_SSE_X64 NI_SSE_X64_ConvertScalarToVector128Single -HARDWARE_INTRINSIC(SSE_X64, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(SSE_X64, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si64, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(SSE_X64, ConvertToInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si64, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -#define LAST_NI_SSE_X64 NI_SSE_X64_ConvertToInt64WithTruncation - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// SSE2 Intrinsics -#define FIRST_NI_SSE2 NI_SSE2_Add -HARDWARE_INTRINSIC(SSE2, Add, 16, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE2, AddSaturate, 16, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE2, AddScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, And, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandd, INS_pandd, INS_pandd, INS_pandd, INS_invalid, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(SSE2, AndNot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pandnd, INS_pandnd, INS_pandnd, INS_pandnd, INS_invalid, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(SSE2, Average, 16, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE2, CompareEqual, 16, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareGreaterThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarLessThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarNotGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarNotGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarNotLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarNotLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedLessThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedLessThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedNotEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128UInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToVector128Int32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, DivideScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, Extract, 16, 2, {INS_invalid, INS_invalid, INS_pextrw, INS_pextrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, Insert, 16, 3, {INS_invalid, INS_invalid, INS_pinsrw, INS_pinsrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(SSE2, LoadAlignedVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_invalid, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(SSE2, LoadFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) -HARDWARE_INTRINSIC(SSE2, LoadHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, LoadLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, LoadScalarVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_invalid, INS_movsd_simd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, LoadVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) -HARDWARE_INTRINSIC(SSE2, MaskMove, 16, 3, {INS_maskmovdqu, INS_maskmovdqu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, Max, 16, 2, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(SSE2, MaxScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, MemoryFence, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) -HARDWARE_INTRINSIC(SSE2, Min, 16, 2, {INS_invalid, INS_pminub, INS_pminsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative) -HARDWARE_INTRINSIC(SSE2, MinScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, MoveMask, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, MoveScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(SSE2, Multiply, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuludq, INS_invalid, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE2, MultiplyAddAdjacent, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE2, MultiplyHigh, 16, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE2, MultiplyLow, 16, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE2, MultiplyScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, Or, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pord, INS_pord, INS_pord, INS_pord, INS_invalid, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(SSE2, PackSignedSaturate, 16, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, PackUnsignedSaturate, 16, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, ShiftLeftLogical, 16, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE2, ShiftLeftLogical128BitLane, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(SSE2, ShiftRightArithmetic, 16, 2, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE2, ShiftRightLogical, 16, 2, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE2, ShiftRightLogical128BitLane, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(SSE2, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE2, ShuffleHigh, 16, 2, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE2, ShuffleLow, 16, 2, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE2, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, Store, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_invalid, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(SSE2, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_invalid, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NormalizeSmallTypeToInt) -HARDWARE_INTRINSIC(SSE2, StoreHigh, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, StoreLow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti32, INS_movnti32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_movd32, INS_movq, INS_movq, INS_invalid, INS_movsd_simd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, Subtract, 16, 2, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, SubtractSaturate, 16, 2, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, SubtractScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, SumAbsoluteDifferences, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, UnpackHigh, 16, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, UnpackLow, 16, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, Xor, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pxord, INS_pxord, INS_pxord, INS_pxord, INS_invalid, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt) -#define LAST_NI_SSE2 NI_SSE2_Xor - -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// ISA Function name SIMD size NumArg Instructions Category Flags -// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} -// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** -// SSE2 64-bit-only Intrinsics -#define FIRST_NI_SSE2_X64 NI_SSE2_X64_ConvertScalarToVector128Double -HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(SSE2_X64, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid, INS_cvtsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_X64, ConvertToInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2_X64, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti64, INS_movnti64, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) -#define LAST_NI_SSE2_X64 NI_SSE2_X64_StoreNonTemporal +HARDWARE_INTRINSIC(X86Base_X64, BitScanForward, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsf, INS_bsf, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base_X64, BitScanReverse, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bsr, INS_bsr, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss64, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertScalarToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_cvtss2si64, INS_cvtsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertToInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si64, INS_cvttsd2si64}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(X86Base_X64, DivRem, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_idiv, INS_div, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg|HW_Flag_MultiReg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(X86Base_X64, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti64, INS_movnti64, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) +#define LAST_NI_X86Base_X64 NI_X86Base_X64_StoreNonTemporal // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags @@ -1359,10 +1252,8 @@ HARDWARE_INTRINSIC(GFNI_V512, GaloisFieldMultiply, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Special intrinsics that are generated during lowering -HARDWARE_INTRINSIC(SSE, COMISS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, UCOMISS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, COMISD, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, UCOMISD, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, COMIS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(X86Base, UCOMIS, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41, PTEST, 16, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, PTEST, 0, 2, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX512, KORTEST, 0, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index b3f99b252e504c..e9c45aa8ed8546 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -20,10 +20,6 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa) { case InstructionSet_X86Base: return InstructionSet_X86Base_X64; - case InstructionSet_SSE: - return InstructionSet_SSE_X64; - case InstructionSet_SSE2: - return InstructionSet_SSE2_X64; case InstructionSet_SSE3: return InstructionSet_SSE3_X64; case InstructionSet_SSSE3: @@ -269,13 +265,9 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className) { if (strncmp(className, "Sse", 3) == 0) { - if (className[3] == '\0') + if ((className[3] == '\0') || (strcmp(className + 3, "2") == 0)) { - return InstructionSet_SSE; - } - else if (strcmp(className + 3, "2") == 0) - { - return InstructionSet_SSE2; + return InstructionSet_X86Base; } else if (strcmp(className + 3, "3") == 0) { @@ -597,7 +589,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarEqual : NI_SSE2_CompareScalarEqual; + return NI_X86Base_CompareScalarEqual; } assert(intrinsic == NI_AVX_Compare); @@ -606,7 +598,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareEqual; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareEqual : NI_SSE2_CompareEqual; + return NI_X86Base_CompareEqual; } case FloatComparisonMode::OrderedGreaterThanSignaling: @@ -617,7 +609,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarGreaterThan : NI_SSE2_CompareScalarGreaterThan; + return NI_X86Base_CompareScalarGreaterThan; } assert(intrinsic == NI_AVX_Compare); @@ -626,7 +618,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareGreaterThan; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareGreaterThan : NI_SSE2_CompareGreaterThan; + return NI_X86Base_CompareGreaterThan; } case FloatComparisonMode::OrderedGreaterThanOrEqualSignaling: @@ -637,8 +629,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarGreaterThanOrEqual - : NI_SSE2_CompareScalarGreaterThanOrEqual; + return NI_X86Base_CompareScalarGreaterThanOrEqual; } assert(intrinsic == NI_AVX_Compare); @@ -647,7 +638,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareGreaterThanOrEqual; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareGreaterThanOrEqual : NI_SSE2_CompareGreaterThanOrEqual; + return NI_X86Base_CompareGreaterThanOrEqual; } case FloatComparisonMode::OrderedLessThanSignaling: @@ -658,7 +649,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarLessThan : NI_SSE2_CompareScalarLessThan; + return NI_X86Base_CompareScalarLessThan; } assert(intrinsic == NI_AVX_Compare); @@ -667,7 +658,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareLessThan; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareLessThan : NI_SSE2_CompareLessThan; + return NI_X86Base_CompareLessThan; } case FloatComparisonMode::OrderedLessThanOrEqualSignaling: @@ -678,8 +669,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarLessThanOrEqual - : NI_SSE2_CompareScalarLessThanOrEqual; + return NI_X86Base_CompareScalarLessThanOrEqual; } assert(intrinsic == NI_AVX_Compare); @@ -688,7 +678,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareLessThanOrEqual; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareLessThanOrEqual : NI_SSE2_CompareLessThanOrEqual; + return NI_X86Base_CompareLessThanOrEqual; } case FloatComparisonMode::UnorderedNotEqualNonSignaling: @@ -699,7 +689,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarNotEqual : NI_SSE2_CompareScalarNotEqual; + return NI_X86Base_CompareScalarNotEqual; } assert(intrinsic == NI_AVX_Compare); @@ -708,7 +698,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareNotEqual; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareNotEqual : NI_SSE2_CompareNotEqual; + return NI_X86Base_CompareNotEqual; } case FloatComparisonMode::UnorderedNotGreaterThanSignaling: @@ -719,8 +709,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarNotGreaterThan - : NI_SSE2_CompareScalarNotGreaterThan; + return NI_X86Base_CompareScalarNotGreaterThan; } assert(intrinsic == NI_AVX_Compare); @@ -729,7 +718,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareNotGreaterThan; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareNotGreaterThan : NI_SSE2_CompareNotGreaterThan; + return NI_X86Base_CompareNotGreaterThan; } case FloatComparisonMode::UnorderedNotGreaterThanOrEqualSignaling: @@ -740,8 +729,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarNotGreaterThanOrEqual - : NI_SSE2_CompareScalarNotGreaterThanOrEqual; + return NI_X86Base_CompareScalarNotGreaterThanOrEqual; } assert(intrinsic == NI_AVX_Compare); @@ -750,8 +738,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareNotGreaterThanOrEqual; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareNotGreaterThanOrEqual - : NI_SSE2_CompareNotGreaterThanOrEqual; + return NI_X86Base_CompareNotGreaterThanOrEqual; } case FloatComparisonMode::UnorderedNotLessThanSignaling: @@ -762,7 +749,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarNotLessThan : NI_SSE2_CompareScalarNotLessThan; + return NI_X86Base_CompareScalarNotLessThan; } assert(intrinsic == NI_AVX_Compare); @@ -771,7 +758,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareNotLessThan; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareNotLessThan : NI_SSE2_CompareNotLessThan; + return NI_X86Base_CompareNotLessThan; } case FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling: @@ -782,8 +769,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarNotLessThanOrEqual - : NI_SSE2_CompareScalarNotLessThanOrEqual; + return NI_X86Base_CompareScalarNotLessThanOrEqual; } assert(intrinsic == NI_AVX_Compare); @@ -792,7 +778,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareNotLessThanOrEqual; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareNotLessThanOrEqual : NI_SSE2_CompareNotLessThanOrEqual; + return NI_X86Base_CompareNotLessThanOrEqual; } case FloatComparisonMode::OrderedNonSignaling: @@ -803,7 +789,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarOrdered : NI_SSE2_CompareScalarOrdered; + return NI_X86Base_CompareScalarOrdered; } assert(intrinsic == NI_AVX_Compare); @@ -812,7 +798,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareOrdered; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareOrdered : NI_SSE2_CompareOrdered; + return NI_X86Base_CompareOrdered; } case FloatComparisonMode::UnorderedNonSignaling: @@ -823,7 +809,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic } else if (intrinsic == NI_AVX_CompareScalar) { - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareScalarUnordered : NI_SSE2_CompareScalarUnordered; + return NI_X86Base_CompareScalarUnordered; } assert(intrinsic == NI_AVX_Compare); @@ -832,7 +818,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupIdForFloatComparisonMode(NamedIntrinsic { return NI_AVX_CompareUnordered; } - return (simdBaseType == TYP_FLOAT) ? NI_SSE_CompareUnordered : NI_SSE2_CompareUnordered; + return NI_X86Base_CompareUnordered; } default: @@ -881,10 +867,6 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_PCLMULQDQ_V512: case InstructionSet_POPCNT: case InstructionSet_POPCNT_X64: - case InstructionSet_SSE: - case InstructionSet_SSE_X64: - case InstructionSet_SSE2: - case InstructionSet_SSE2_X64: case InstructionSet_SSE3: case InstructionSet_SSE3_X64: case InstructionSet_SSSE3: @@ -967,10 +949,8 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim { switch (id) { - case NI_SSE_CompareEqual: - case NI_SSE_CompareScalarEqual: - case NI_SSE2_CompareEqual: - case NI_SSE2_CompareScalarEqual: + case NI_X86Base_CompareEqual: + case NI_X86Base_CompareScalarEqual: case NI_AVX_CompareEqual: case NI_AVX512_CompareEqualMask: { @@ -985,10 +965,8 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim break; } - case NI_SSE_CompareGreaterThan: - case NI_SSE_CompareScalarGreaterThan: - case NI_SSE2_CompareGreaterThan: - case NI_SSE2_CompareScalarGreaterThan: + case NI_X86Base_CompareGreaterThan: + case NI_X86Base_CompareScalarGreaterThan: case NI_AVX_CompareGreaterThan: case NI_AVX512_CompareGreaterThanMask: { @@ -1008,10 +986,8 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim break; } - case NI_SSE_CompareLessThan: - case NI_SSE_CompareScalarLessThan: - case NI_SSE2_CompareLessThan: - case NI_SSE2_CompareScalarLessThan: + case NI_X86Base_CompareLessThan: + case NI_X86Base_CompareScalarLessThan: case NI_AVX_CompareLessThan: case NI_AVX512_CompareLessThanMask: { @@ -1027,10 +1003,8 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim break; } - case NI_SSE_CompareGreaterThanOrEqual: - case NI_SSE_CompareScalarGreaterThanOrEqual: - case NI_SSE2_CompareGreaterThanOrEqual: - case NI_SSE2_CompareScalarGreaterThanOrEqual: + case NI_X86Base_CompareGreaterThanOrEqual: + case NI_X86Base_CompareScalarGreaterThanOrEqual: case NI_AVX_CompareGreaterThanOrEqual: case NI_AVX512_CompareGreaterThanOrEqualMask: { @@ -1050,10 +1024,8 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim break; } - case NI_SSE_CompareLessThanOrEqual: - case NI_SSE_CompareScalarLessThanOrEqual: - case NI_SSE2_CompareLessThanOrEqual: - case NI_SSE2_CompareScalarLessThanOrEqual: + case NI_X86Base_CompareLessThanOrEqual: + case NI_X86Base_CompareScalarLessThanOrEqual: case NI_AVX_CompareLessThanOrEqual: case NI_AVX512_CompareLessThanOrEqualMask: { @@ -1069,10 +1041,8 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim break; } - case NI_SSE_CompareNotEqual: - case NI_SSE_CompareScalarNotEqual: - case NI_SSE2_CompareNotEqual: - case NI_SSE2_CompareScalarNotEqual: + case NI_X86Base_CompareNotEqual: + case NI_X86Base_CompareScalarNotEqual: case NI_AVX_CompareNotEqual: case NI_AVX512_CompareNotEqualMask: { @@ -1088,10 +1058,8 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim break; } - case NI_SSE_CompareNotGreaterThan: - case NI_SSE_CompareScalarNotGreaterThan: - case NI_SSE2_CompareNotGreaterThan: - case NI_SSE2_CompareScalarNotGreaterThan: + case NI_X86Base_CompareNotGreaterThan: + case NI_X86Base_CompareScalarNotGreaterThan: case NI_AVX_CompareNotGreaterThan: case NI_AVX512_CompareNotGreaterThanMask: { @@ -1111,10 +1079,8 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim break; } - case NI_SSE_CompareNotLessThan: - case NI_SSE_CompareScalarNotLessThan: - case NI_SSE2_CompareNotLessThan: - case NI_SSE2_CompareScalarNotLessThan: + case NI_X86Base_CompareNotLessThan: + case NI_X86Base_CompareScalarNotLessThan: case NI_AVX_CompareNotLessThan: case NI_AVX512_CompareNotLessThanMask: { @@ -1130,10 +1096,8 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim break; } - case NI_SSE_CompareNotGreaterThanOrEqual: - case NI_SSE_CompareScalarNotGreaterThanOrEqual: - case NI_SSE2_CompareNotGreaterThanOrEqual: - case NI_SSE2_CompareScalarNotGreaterThanOrEqual: + case NI_X86Base_CompareNotGreaterThanOrEqual: + case NI_X86Base_CompareScalarNotGreaterThanOrEqual: case NI_AVX_CompareNotGreaterThanOrEqual: case NI_AVX512_CompareNotGreaterThanOrEqualMask: { @@ -1153,10 +1117,8 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim break; } - case NI_SSE_CompareNotLessThanOrEqual: - case NI_SSE_CompareScalarNotLessThanOrEqual: - case NI_SSE2_CompareNotLessThanOrEqual: - case NI_SSE2_CompareScalarNotLessThanOrEqual: + case NI_X86Base_CompareNotLessThanOrEqual: + case NI_X86Base_CompareScalarNotLessThanOrEqual: case NI_AVX_CompareNotLessThanOrEqual: case NI_AVX512_CompareNotLessThanOrEqualMask: { @@ -1172,10 +1134,8 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim break; } - case NI_SSE_CompareOrdered: - case NI_SSE_CompareScalarOrdered: - case NI_SSE2_CompareOrdered: - case NI_SSE2_CompareScalarOrdered: + case NI_X86Base_CompareOrdered: + case NI_X86Base_CompareScalarOrdered: case NI_AVX_CompareOrdered: case NI_AVX512_CompareOrderedMask: { @@ -1183,10 +1143,8 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim return static_cast(FloatComparisonMode::OrderedNonSignaling); } - case NI_SSE_CompareUnordered: - case NI_SSE_CompareScalarUnordered: - case NI_SSE2_CompareUnordered: - case NI_SSE2_CompareScalarUnordered: + case NI_X86Base_CompareUnordered: + case NI_X86Base_CompareScalarUnordered: case NI_AVX_CompareUnordered: case NI_AVX512_CompareUnorderedMask: { @@ -1274,9 +1232,9 @@ GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdT assert(HWIntrinsicInfo::NoJmpTableImm(intrinsic) || HWIntrinsicInfo::MaybeNoJmpTableImm(intrinsic)); switch (intrinsic) { - case NI_SSE2_ShiftLeftLogical: - case NI_SSE2_ShiftRightArithmetic: - case NI_SSE2_ShiftRightLogical: + case NI_X86Base_ShiftLeftLogical: + case NI_X86Base_ShiftRightArithmetic: + case NI_X86Base_ShiftRightLogical: case NI_AVX2_ShiftLeftLogical: case NI_AVX2_ShiftRightArithmetic: case NI_AVX2_ShiftRightLogical: @@ -1415,8 +1373,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_SSE_AndNot: - case NI_SSE2_AndNot: + case NI_X86Base_AndNot: case NI_AVX_AndNot: case NI_AVX2_AndNot: case NI_AVX512_AndNot: @@ -1432,23 +1389,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - if (IsBaselineSimdIsaSupported()) - { - op1 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize)); - retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize); - } - else - { - // We need to ensure we import even if SSE2 is disabled - assert(intrinsic == NI_SSE_AndNot); - - op3 = gtNewAllBitsSetConNode(retType); - - op1 = gtNewSimdHWIntrinsicNode(retType, op1, op3, NI_SSE_Xor, simdBaseJitType, simdSize); - op1 = gtFoldExpr(op1); - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, NI_SSE_And, simdBaseJitType, simdSize); - } + op1 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize)); + retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize); break; } @@ -1497,7 +1439,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, else { assert(simdSize == 16); - intrinsic = NI_SSE2_AddSaturate; + intrinsic = NI_X86Base_AddSaturate; } retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); @@ -2071,7 +2013,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, switch (simdSize) { case 16: - intrinsic = NI_SSE2_ConvertToVector128Single; + intrinsic = NI_X86Base_ConvertToVector128Single; break; case 32: intrinsic = NI_AVX_ConvertToVector256Single; @@ -2559,7 +2501,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case TYP_UBYTE: { op1 = impSIMDPopStack(); - moveMaskIntrinsic = (simdSize == 32) ? NI_AVX2_MoveMask : NI_SSE2_MoveMask; + moveMaskIntrinsic = (simdSize == 32) ? NI_AVX2_MoveMask : NI_X86Base_MoveMask; break; } @@ -2587,12 +2529,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, simdVal.u64[3] = 0x8080808080808080; shuffleIntrinsic = NI_AVX2_Shuffle; - moveMaskIntrinsic = NI_SSE2_MoveMask; + moveMaskIntrinsic = NI_X86Base_MoveMask; } else if (compOpportunisticallyDependsOn(InstructionSet_SSSE3)) { shuffleIntrinsic = NI_SSSE3_Shuffle; - moveMaskIntrinsic = NI_SSE2_MoveMask; + moveMaskIntrinsic = NI_X86Base_MoveMask; } else { @@ -2633,7 +2575,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { simdBaseJitType = CORINFO_TYPE_FLOAT; op1 = impSIMDPopStack(); - moveMaskIntrinsic = (simdSize == 32) ? NI_AVX_MoveMask : NI_SSE_MoveMask; + moveMaskIntrinsic = (simdSize == 32) ? NI_AVX_MoveMask : NI_X86Base_MoveMask; break; } @@ -2643,7 +2585,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { simdBaseJitType = CORINFO_TYPE_DOUBLE; op1 = impSIMDPopStack(); - moveMaskIntrinsic = (simdSize == 32) ? NI_AVX_MoveMask : NI_SSE2_MoveMask; + moveMaskIntrinsic = (simdSize == 32) ? NI_AVX_MoveMask : NI_X86Base_MoveMask; break; } @@ -3169,8 +3111,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_SSE_LoadVector128: - case NI_SSE2_LoadVector128: + case NI_X86Base_LoadVector128: case NI_AVX_LoadVector256: case NI_AVX512_LoadVector512: case NI_Vector128_LoadUnsafe: @@ -3420,7 +3361,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // PackSignedSaturate uses the base type of the return for the simdBaseType simdBaseJitType = (simdBaseType == TYP_SHORT) ? CORINFO_TYPE_BYTE : CORINFO_TYPE_SHORT; - intrinsic = NI_SSE2_PackSignedSaturate; + intrinsic = NI_X86Base_PackSignedSaturate; retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); } else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) @@ -3871,8 +3812,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_SSE_Store: - case NI_SSE2_Store: + case NI_X86Base_Store: case NI_AVX_Store: case NI_AVX512_Store: { @@ -4009,7 +3949,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, else { assert(simdSize == 16); - intrinsic = NI_SSE2_SubtractSaturate; + intrinsic = NI_X86Base_SubtractSaturate; } retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); @@ -4311,10 +4251,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_SSE_CompareScalarGreaterThan: - case NI_SSE_CompareScalarGreaterThanOrEqual: - case NI_SSE_CompareScalarNotGreaterThan: - case NI_SSE_CompareScalarNotGreaterThanOrEqual: + case NI_X86Base_CompareScalarGreaterThan: + case NI_X86Base_CompareScalarGreaterThanOrEqual: + case NI_X86Base_CompareScalarNotGreaterThan: + case NI_X86Base_CompareScalarNotGreaterThanOrEqual: { assert(sig->numArgs == 2); @@ -4326,10 +4266,9 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); } - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - simdBaseJitType = getBaseJitTypeOfSIMDType(sig->retTypeSigClass); - assert(JitType2PreciseVarType(simdBaseJitType) == TYP_FLOAT); + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + assert(varTypeIsFloating(JitType2PreciseVarType(simdBaseJitType))); if (supportsAvx) { @@ -4344,31 +4283,31 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, { GenTree* clonedOp1 = nullptr; op1 = impCloneExpr(op1, &clonedOp1, CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone op1 for Sse.CompareScalarGreaterThan")); + nullptr DEBUGARG("Clone op1 for CompareScalarGreaterThan")); switch (intrinsic) { - case NI_SSE_CompareScalarGreaterThan: + case NI_X86Base_CompareScalarGreaterThan: { - intrinsic = NI_SSE_CompareScalarLessThan; + intrinsic = NI_X86Base_CompareScalarLessThan; break; } - case NI_SSE_CompareScalarGreaterThanOrEqual: + case NI_X86Base_CompareScalarGreaterThanOrEqual: { - intrinsic = NI_SSE_CompareScalarLessThanOrEqual; + intrinsic = NI_X86Base_CompareScalarLessThanOrEqual; break; } - case NI_SSE_CompareScalarNotGreaterThan: + case NI_X86Base_CompareScalarNotGreaterThan: { - intrinsic = NI_SSE_CompareScalarNotLessThan; + intrinsic = NI_X86Base_CompareScalarNotLessThan; break; } - case NI_SSE_CompareScalarNotGreaterThanOrEqual: + case NI_X86Base_CompareScalarNotGreaterThanOrEqual: { - intrinsic = NI_SSE_CompareScalarNotLessThanOrEqual; + intrinsic = NI_X86Base_CompareScalarNotLessThanOrEqual; break; } @@ -4379,16 +4318,16 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, simdBaseJitType, simdSize); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE_MoveScalar, simdBaseJitType, - simdSize); + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_X86Base_MoveScalar, + simdBaseJitType, simdSize); } break; } - case NI_SSE_Prefetch0: - case NI_SSE_Prefetch1: - case NI_SSE_Prefetch2: - case NI_SSE_PrefetchNonTemporal: + case NI_X86Base_Prefetch0: + case NI_X86Base_Prefetch1: + case NI_X86Base_Prefetch2: + case NI_X86Base_PrefetchNonTemporal: { assert(sig->numArgs == 1); assert(JITtype2varType(sig->retType) == TYP_VOID); @@ -4397,87 +4336,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_SSE_StoreFence: + case NI_X86Base_StoreFence: assert(sig->numArgs == 0); assert(JITtype2varType(sig->retType) == TYP_VOID); retNode = gtNewScalarHWIntrinsicNode(TYP_VOID, intrinsic); break; - case NI_SSE2_CompareScalarGreaterThan: - case NI_SSE2_CompareScalarGreaterThanOrEqual: - case NI_SSE2_CompareScalarNotGreaterThan: - case NI_SSE2_CompareScalarNotGreaterThanOrEqual: - { - assert(sig->numArgs == 2); - - bool supportsAvx = compOpportunisticallyDependsOn(InstructionSet_AVX); - - if (!supportsAvx) - { - impSpillSideEffect(true, - stackState.esStackDepth - 2 DEBUGARG("Spilling op1 side effects for HWIntrinsic")); - } - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); - assert(JitType2PreciseVarType(simdBaseJitType) == TYP_DOUBLE); - - if (supportsAvx) - { - // These intrinsics are "special import" because the non-AVX path isn't directly - // hardware supported. Instead, they start with "swapped operands" and we fix that here. - - int ival = HWIntrinsicInfo::lookupIval(this, intrinsic, simdBaseType); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(ival), NI_AVX_CompareScalar, - simdBaseJitType, simdSize); - } - else - { - GenTree* clonedOp1 = nullptr; - op1 = impCloneExpr(op1, &clonedOp1, CHECK_SPILL_ALL, - nullptr DEBUGARG("Clone op1 for Sse2.CompareScalarGreaterThan")); - - switch (intrinsic) - { - case NI_SSE2_CompareScalarGreaterThan: - { - intrinsic = NI_SSE2_CompareScalarLessThan; - break; - } - - case NI_SSE2_CompareScalarGreaterThanOrEqual: - { - intrinsic = NI_SSE2_CompareScalarLessThanOrEqual; - break; - } - - case NI_SSE2_CompareScalarNotGreaterThan: - { - intrinsic = NI_SSE2_CompareScalarNotLessThan; - break; - } - - case NI_SSE2_CompareScalarNotGreaterThanOrEqual: - { - intrinsic = NI_SSE2_CompareScalarNotLessThanOrEqual; - break; - } - - default: - { - unreached(); - } - } - - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, simdBaseJitType, simdSize); - retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE2_MoveScalar, simdBaseJitType, - simdSize); - } - break; - } - - case NI_SSE2_LoadFence: - case NI_SSE2_MemoryFence: + case NI_X86Base_LoadFence: + case NI_X86Base_MemoryFence: { assert(sig->numArgs == 0); assert(JITtype2varType(sig->retType) == TYP_VOID); @@ -4487,7 +4353,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_SSE2_StoreNonTemporal: + case NI_X86Base_StoreNonTemporal: { assert(sig->numArgs == 2); assert(JITtype2varType(sig->retType) == TYP_VOID); @@ -4498,7 +4364,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impPopStack().val; op1 = impPopStack().val; - retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, NI_SSE2_StoreNonTemporal, argJitType, 0); + retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, op1, op2, NI_X86Base_StoreNonTemporal, argJitType, 0); break; } diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 776856ceff7175..1f4438ede7e143 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -5581,7 +5581,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, { if (!uns) { - hwIntrinsicId = NI_SSE_ConvertToInt32WithTruncation; + hwIntrinsicId = NI_X86Base_ConvertToInt32WithTruncation; } else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { @@ -5594,7 +5594,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, if (!uns) { - hwIntrinsicId = NI_SSE2_ConvertToInt32WithTruncation; + hwIntrinsicId = NI_X86Base_ConvertToInt32WithTruncation; } else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { @@ -5611,7 +5611,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, { if (!uns) { - hwIntrinsicId = NI_SSE_X64_ConvertToInt64WithTruncation; + hwIntrinsicId = NI_X86Base_X64_ConvertToInt64WithTruncation; } else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { @@ -5624,7 +5624,7 @@ GenTree* Compiler::impPrimitiveNamedIntrinsic(NamedIntrinsic intrinsic, if (!uns) { - hwIntrinsicId = NI_SSE2_X64_ConvertToInt64WithTruncation; + hwIntrinsicId = NI_X86Base_X64_ConvertToInt64WithTruncation; } else if (compOpportunisticallyDependsOn(InstructionSet_AVX512)) { @@ -9482,20 +9482,10 @@ GenTree* Compiler::impEstimateIntrinsic(CORINFO_METHOD_HANDLE method, simdType = TYP_SIMD16; intrinsicId = NI_AVX512_Reciprocal14Scalar; } - else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_SSE)) + else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_X86Base)) { - if (!IsBaselineSimdIsaSupported()) - { - // While the actual intrinsic only requires SSE, the - // ToScalar intrinsic asserts that the BaselineSimdIsa - // (SSE2) is supported to help simplify the overall logic - // it has to maintain - assert(intrinsicId == NI_Illegal); - break; - } - simdType = TYP_SIMD16; - intrinsicId = NI_SSE_ReciprocalScalar; + intrinsicId = NI_X86Base_ReciprocalScalar; } #elif defined(TARGET_ARM64) if (compExactlyDependsOn(InstructionSet_AdvSimd_Arm64)) @@ -9517,20 +9507,10 @@ GenTree* Compiler::impEstimateIntrinsic(CORINFO_METHOD_HANDLE method, simdType = TYP_SIMD16; intrinsicId = NI_AVX512_ReciprocalSqrt14Scalar; } - else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_SSE)) + else if ((callType == TYP_FLOAT) && compExactlyDependsOn(InstructionSet_X86Base)) { - if (!IsBaselineSimdIsaSupported()) - { - // While the actual intrinsic only requires SSE, the - // ToScalar intrinsic asserts that the BaselineSimdIsa - // (SSE2) is supported to help simplify the overall logic - // it has to maintain - assert(intrinsicId == NI_Illegal); - break; - } - simdType = TYP_SIMD16; - intrinsicId = NI_SSE_ReciprocalSqrtScalar; + intrinsicId = NI_X86Base_ReciprocalSqrtScalar; } #elif defined(TARGET_ARM64) if (compExactlyDependsOn(InstructionSet_AdvSimd_Arm64)) @@ -9920,7 +9900,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, } #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - if (!isMagnitude && compOpportunisticallyDependsOn(InstructionSet_SSE2)) + if (!isMagnitude && compOpportunisticallyDependsOn(InstructionSet_X86Base)) { bool needsFixup = false; bool canHandle = false; @@ -9956,7 +9936,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, { // Given the checks, op1 can safely be the cns and op2 the other node - intrinsicName = (callType == TYP_DOUBLE) ? NI_SSE2_MaxScalar : NI_SSE_MaxScalar; + intrinsicName = NI_X86Base_MaxScalar; // one is constant and we know its something we can handle, so pop both peeked values @@ -9997,7 +9977,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, { // Given the checks, op1 can safely be the cns and op2 the other node - intrinsicName = (callType == TYP_DOUBLE) ? NI_SSE2_MinScalar : NI_SSE_MinScalar; + intrinsicName = NI_X86Base_MinScalar; // one is constant and we know its something we can handle, so pop both peeked values diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index d5a47bab27ab39..6ee28ccca22344 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -412,8 +412,6 @@ RELEASE_CONFIG_INTEGER(EnableLZCNT, "EnableLZCNT", RELEASE_CONFIG_INTEGER(EnablePCLMULQDQ, "EnablePCLMULQDQ", 1) // Allows PCLMULQDQ+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableVPCLMULQDQ, "EnableVPCLMULQDQ", 1) // Allows VPCLMULQDQ+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnablePOPCNT, "EnablePOPCNT", 1) // Allows POPCNT+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableSSE, "EnableSSE", 1) // Allows SSE+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableSSE2, "EnableSSE2", 1) // Allows SSE2+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableSSE3, "EnableSSE3", 1) // Allows SSE3+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableSSE3_4, "EnableSSE3_4", 1) // Allows SSE3+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableSSE41, "EnableSSE41", 1) // Allows SSE4.1+ hardware intrinsics to be disabled @@ -458,6 +456,10 @@ RELEASE_CONFIG_INTEGER(EnableAVX512F_VL, "EnableAVX512F_VL", // These have been superceded by EnableAVX512VBMI as you get all of them or none of them RELEASE_CONFIG_INTEGER(EnableAVX512VBMI_VL, "EnableAVX512VBMI_VL", 1) // Allows AVX512VBMI_VL+ hardware intrinsics to be disabled + +// These have been superceded by EnableHWIntrinsic as they are part of the baseline +RELEASE_CONFIG_INTEGER(EnableSSE, "EnableSSE", 1) // Allows SSE+ hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableSSE2, "EnableSSE2", 1) // Allows SSE2+ hardware intrinsics to be disabled #endif // clang-format on diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 3f60c0cbeb1b71..580fb83f5fbbdd 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -899,7 +899,7 @@ void Lowering::LowerCast(GenTree* tree) // This creates the equivalent of the following C# code: // castOp = Sse.MaxScalar(srcVec, Vector128.Zero).ToScalar(); - NamedIntrinsic maxScalarIntrinsic = (srcType == TYP_FLOAT) ? NI_SSE_MaxScalar : NI_SSE2_MaxScalar; + NamedIntrinsic maxScalarIntrinsic = NI_X86Base_MaxScalar; GenTree* zero = comp->gtNewZeroConNode(TYP_SIMD16); GenTree* fixupVal = @@ -916,7 +916,7 @@ void Lowering::LowerCast(GenTree* tree) else { assert(comp->IsBaselineSimdIsaSupportedDebugOnly()); - assert(!TargetArchitecture::Is64Bit || comp->compIsaSupportedDebugOnly(InstructionSet_SSE2_X64)); + assert(!TargetArchitecture::Is64Bit || comp->compIsaSupportedDebugOnly(InstructionSet_X86Base_X64)); // We need to fix up NaN as well as handle possible overflow. Signed conversions // return int/long.MinValue for any overflow, which is correct for saturation of @@ -937,12 +937,12 @@ void Lowering::LowerCast(GenTree* tree) if (srcType == TYP_FLOAT) { maxFloatSimdVal->f32[0] = 2147483648.0f; - convertIntrinsic = NI_SSE_ConvertToInt32WithTruncation; + convertIntrinsic = NI_X86Base_ConvertToInt32WithTruncation; } else { maxFloatSimdVal->f64[0] = 2147483648.0; - convertIntrinsic = NI_SSE2_ConvertToInt32WithTruncation; + convertIntrinsic = NI_X86Base_ConvertToInt32WithTruncation; } break; } @@ -953,16 +953,16 @@ void Lowering::LowerCast(GenTree* tree) if (srcType == TYP_FLOAT) { maxFloatSimdVal->f32[0] = 4294967296.0f; - convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_SSE_X64) - ? NI_SSE_X64_ConvertToInt64WithTruncation - : NI_SSE2_ConvertToVector128Int32WithTruncation; + convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_X86Base_X64) + ? NI_X86Base_X64_ConvertToInt64WithTruncation + : NI_X86Base_ConvertToVector128Int32WithTruncation; } else { maxFloatSimdVal->f64[0] = 4294967296.0; - convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_SSE2_X64) - ? NI_SSE2_X64_ConvertToInt64WithTruncation - : NI_SSE2_ConvertToVector128Int32WithTruncation; + convertIntrinsic = comp->compOpportunisticallyDependsOn(InstructionSet_X86Base_X64) + ? NI_X86Base_X64_ConvertToInt64WithTruncation + : NI_X86Base_ConvertToVector128Int32WithTruncation; } break; } @@ -973,12 +973,12 @@ void Lowering::LowerCast(GenTree* tree) if (srcType == TYP_FLOAT) { maxFloatSimdVal->f32[0] = 9223372036854775808.0f; - convertIntrinsic = NI_SSE_X64_ConvertToInt64WithTruncation; + convertIntrinsic = NI_X86Base_X64_ConvertToInt64WithTruncation; } else { maxFloatSimdVal->f64[0] = 9223372036854775808.0; - convertIntrinsic = NI_SSE2_X64_ConvertToInt64WithTruncation; + convertIntrinsic = NI_X86Base_X64_ConvertToInt64WithTruncation; } break; } @@ -989,12 +989,12 @@ void Lowering::LowerCast(GenTree* tree) if (srcType == TYP_FLOAT) { maxFloatSimdVal->f32[0] = 18446744073709551616.0f; - convertIntrinsic = NI_SSE_X64_ConvertToInt64WithTruncation; + convertIntrinsic = NI_X86Base_X64_ConvertToInt64WithTruncation; } else { maxFloatSimdVal->f64[0] = 18446744073709551616.0; - convertIntrinsic = NI_SSE2_X64_ConvertToInt64WithTruncation; + convertIntrinsic = NI_X86Base_X64_ConvertToInt64WithTruncation; } break; } @@ -1023,8 +1023,7 @@ void Lowering::LowerCast(GenTree* tree) // var fixupVal = Sse.And(srcVec, nanMask); // convertResult = Sse.ConvertToInt32WithTruncation(fixupVal); - NamedIntrinsic compareNaNIntrinsic = - (srcType == TYP_FLOAT) ? NI_SSE_CompareScalarOrdered : NI_SSE2_CompareScalarOrdered; + NamedIntrinsic compareNaNIntrinsic = NI_X86Base_CompareScalarOrdered; srcClone = comp->gtClone(srcVector); GenTree* nanMask = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, srcVector, srcClone, compareNaNIntrinsic, @@ -1049,7 +1048,7 @@ void Lowering::LowerCast(GenTree* tree) // This creates the equivalent of the following C# code: // var fixupVal = Sse.MaxScalar(srcVec, Vector128.Zero); - NamedIntrinsic maxScalarIntrinsic = (srcType == TYP_FLOAT) ? NI_SSE_MaxScalar : NI_SSE2_MaxScalar; + NamedIntrinsic maxScalarIntrinsic = NI_X86Base_MaxScalar; GenTree* zero = comp->gtNewZeroConNode(TYP_SIMD16); GenTree* fixupVal = @@ -1058,8 +1057,7 @@ void Lowering::LowerCast(GenTree* tree) castRange.InsertAtEnd(zero); castRange.InsertAtEnd(fixupVal); - if ((dstType == TYP_UINT) && ((convertIntrinsic == NI_SSE_X64_ConvertToInt64WithTruncation) || - (convertIntrinsic == NI_SSE2_X64_ConvertToInt64WithTruncation))) + if ((dstType == TYP_UINT) && (convertIntrinsic == NI_X86Base_X64_ConvertToInt64WithTruncation)) { // On x64, we can use long conversion to handle uint directly. convertResult = @@ -1076,8 +1074,7 @@ void Lowering::LowerCast(GenTree* tree) // This creates the equivalent of the following C# code: // var wrapVal = Sse.SubtractScalar(srcVec, maxFloatingValue); - NamedIntrinsic subtractIntrinsic = - (srcType == TYP_FLOAT) ? NI_SSE_SubtractScalar : NI_SSE2_SubtractScalar; + NamedIntrinsic subtractIntrinsic = NI_X86Base_SubtractScalar; // We're going to use maxFloatingValue twice, so replace the constant with a lclVar. castRange.InsertAtEnd(maxFloatingValue); @@ -1255,8 +1252,7 @@ void Lowering::LowerCast(GenTree* tree) // bool isOverflow = Sse.CompareScalarUnorderedGreaterThanOrEqual(srcVec, maxFloatingValue); // return isOverflow ? maxIntegralValue : convertResult; - NamedIntrinsic compareIntrinsic = (srcType == TYP_FLOAT) ? NI_SSE_CompareScalarUnorderedGreaterThanOrEqual - : NI_SSE2_CompareScalarUnorderedGreaterThanOrEqual; + NamedIntrinsic compareIntrinsic = NI_X86Base_CompareScalarUnorderedGreaterThanOrEqual; // These nodes were all created above but not used until now. castRange.InsertAtEnd(maxFloatingValue); @@ -1316,10 +1312,8 @@ void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIn switch (newIntrinsicId) { - case NI_SSE_COMISS: - case NI_SSE_UCOMISS: - case NI_SSE2_COMISD: - case NI_SSE2_UCOMISD: + case NI_X86Base_COMIS: + case NI_X86Base_UCOMIS: // In some cases we can generate better code if we swap the operands: // - If the condition is not one of the "preferred" floating point conditions we can swap // the operands and change the condition to avoid generating an extra JP/JNP branch. @@ -1551,7 +1545,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) assert(simdBaseType == TYP_FLOAT); assert(simdSize <= 16); - intrinsic = NI_SSE_AndNot; + intrinsic = NI_X86Base_AndNot; } userIntrin->ResetHWIntrinsicId(intrinsic, comp, op1, op2); @@ -2338,27 +2332,17 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_SSE2_CompareGreaterThan: + case NI_X86Base_CompareGreaterThan: + case NI_X86Base_CompareGreaterThanOrEqual: + case NI_X86Base_CompareNotGreaterThan: + case NI_X86Base_CompareNotGreaterThanOrEqual: { - if (node->GetSimdBaseType() != TYP_DOUBLE) + if (!varTypeIsFloating(node->GetSimdBaseType())) { assert(varTypeIsIntegral(node->GetSimdBaseType())); break; } - FALLTHROUGH; - } - - case NI_SSE_CompareGreaterThan: - case NI_SSE_CompareGreaterThanOrEqual: - case NI_SSE_CompareNotGreaterThan: - case NI_SSE_CompareNotGreaterThanOrEqual: - case NI_SSE2_CompareGreaterThanOrEqual: - case NI_SSE2_CompareNotGreaterThan: - case NI_SSE2_CompareNotGreaterThanOrEqual: - { - assert((node->GetSimdBaseType() == TYP_FLOAT) || (node->GetSimdBaseType() == TYP_DOUBLE)); - if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX)) { break; @@ -2369,51 +2353,27 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { - case NI_SSE_CompareGreaterThan: + case NI_X86Base_CompareGreaterThan: { - newIntrinsicId = NI_SSE_CompareLessThan; + newIntrinsicId = NI_X86Base_CompareLessThan; break; } - case NI_SSE_CompareGreaterThanOrEqual: + case NI_X86Base_CompareGreaterThanOrEqual: { - newIntrinsicId = NI_SSE_CompareLessThanOrEqual; + newIntrinsicId = NI_X86Base_CompareLessThanOrEqual; break; } - case NI_SSE_CompareNotGreaterThan: + case NI_X86Base_CompareNotGreaterThan: { - newIntrinsicId = NI_SSE_CompareNotLessThan; + newIntrinsicId = NI_X86Base_CompareNotLessThan; break; } - case NI_SSE_CompareNotGreaterThanOrEqual: + case NI_X86Base_CompareNotGreaterThanOrEqual: { - newIntrinsicId = NI_SSE_CompareNotLessThanOrEqual; - break; - } - - case NI_SSE2_CompareGreaterThan: - { - newIntrinsicId = NI_SSE2_CompareLessThan; - break; - } - - case NI_SSE2_CompareGreaterThanOrEqual: - { - newIntrinsicId = NI_SSE2_CompareLessThanOrEqual; - break; - } - - case NI_SSE2_CompareNotGreaterThan: - { - newIntrinsicId = NI_SSE2_CompareNotLessThan; - break; - } - - case NI_SSE2_CompareNotGreaterThanOrEqual: - { - newIntrinsicId = NI_SSE2_CompareNotLessThanOrEqual; + newIntrinsicId = NI_X86Base_CompareNotLessThanOrEqual; break; } @@ -2431,11 +2391,11 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_SSE2_CompareLessThan: + case NI_X86Base_CompareLessThan: case NI_SSE42_CompareLessThan: case NI_AVX2_CompareLessThan: { - if (node->GetSimdBaseType() == TYP_DOUBLE) + if (varTypeIsFloating(node->GetSimdBaseType())) { break; } @@ -2446,9 +2406,9 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { - case NI_SSE2_CompareLessThan: + case NI_X86Base_CompareLessThan: { - newIntrinsicId = NI_SSE2_CompareGreaterThan; + newIntrinsicId = NI_X86Base_CompareGreaterThan; break; } @@ -2478,80 +2438,42 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_SSE_CompareScalarOrderedEqual: - LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FEQ); + case NI_X86Base_CompareScalarOrderedEqual: + LowerHWIntrinsicCC(node, NI_X86Base_COMIS, GenCondition::FEQ); break; - case NI_SSE_CompareScalarOrderedNotEqual: - LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FNEU); + case NI_X86Base_CompareScalarOrderedNotEqual: + LowerHWIntrinsicCC(node, NI_X86Base_COMIS, GenCondition::FNEU); break; - case NI_SSE_CompareScalarOrderedLessThan: - LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FLT); + case NI_X86Base_CompareScalarOrderedLessThan: + LowerHWIntrinsicCC(node, NI_X86Base_COMIS, GenCondition::FLT); break; - case NI_SSE_CompareScalarOrderedLessThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FLE); + case NI_X86Base_CompareScalarOrderedLessThanOrEqual: + LowerHWIntrinsicCC(node, NI_X86Base_COMIS, GenCondition::FLE); break; - case NI_SSE_CompareScalarOrderedGreaterThan: - LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FGT); + case NI_X86Base_CompareScalarOrderedGreaterThan: + LowerHWIntrinsicCC(node, NI_X86Base_COMIS, GenCondition::FGT); break; - case NI_SSE_CompareScalarOrderedGreaterThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE_COMISS, GenCondition::FGE); + case NI_X86Base_CompareScalarOrderedGreaterThanOrEqual: + LowerHWIntrinsicCC(node, NI_X86Base_COMIS, GenCondition::FGE); break; - case NI_SSE_CompareScalarUnorderedEqual: - LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FEQ); + case NI_X86Base_CompareScalarUnorderedEqual: + LowerHWIntrinsicCC(node, NI_X86Base_UCOMIS, GenCondition::FEQ); break; - case NI_SSE_CompareScalarUnorderedNotEqual: - LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FNEU); + case NI_X86Base_CompareScalarUnorderedNotEqual: + LowerHWIntrinsicCC(node, NI_X86Base_UCOMIS, GenCondition::FNEU); break; - case NI_SSE_CompareScalarUnorderedLessThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FLE); + case NI_X86Base_CompareScalarUnorderedLessThanOrEqual: + LowerHWIntrinsicCC(node, NI_X86Base_UCOMIS, GenCondition::FLE); break; - case NI_SSE_CompareScalarUnorderedLessThan: - LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FLT); + case NI_X86Base_CompareScalarUnorderedLessThan: + LowerHWIntrinsicCC(node, NI_X86Base_UCOMIS, GenCondition::FLT); break; - case NI_SSE_CompareScalarUnorderedGreaterThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FGE); + case NI_X86Base_CompareScalarUnorderedGreaterThanOrEqual: + LowerHWIntrinsicCC(node, NI_X86Base_UCOMIS, GenCondition::FGE); break; - case NI_SSE_CompareScalarUnorderedGreaterThan: - LowerHWIntrinsicCC(node, NI_SSE_UCOMISS, GenCondition::FGT); - break; - - case NI_SSE2_CompareScalarOrderedEqual: - LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FEQ); - break; - case NI_SSE2_CompareScalarOrderedNotEqual: - LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FNEU); - break; - case NI_SSE2_CompareScalarOrderedLessThan: - LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FLT); - break; - case NI_SSE2_CompareScalarOrderedLessThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FLE); - break; - case NI_SSE2_CompareScalarOrderedGreaterThan: - LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FGT); - break; - case NI_SSE2_CompareScalarOrderedGreaterThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE2_COMISD, GenCondition::FGE); - break; - - case NI_SSE2_CompareScalarUnorderedEqual: - LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FEQ); - break; - case NI_SSE2_CompareScalarUnorderedNotEqual: - LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FNEU); - break; - case NI_SSE2_CompareScalarUnorderedLessThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FLE); - break; - case NI_SSE2_CompareScalarUnorderedLessThan: - LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FLT); - break; - case NI_SSE2_CompareScalarUnorderedGreaterThanOrEqual: - LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FGE); - break; - case NI_SSE2_CompareScalarUnorderedGreaterThan: - LowerHWIntrinsicCC(node, NI_SSE2_UCOMISD, GenCondition::FGT); + case NI_X86Base_CompareScalarUnorderedGreaterThan: + LowerHWIntrinsicCC(node, NI_X86Base_UCOMIS, GenCondition::FGT); break; case NI_SSE41_TestC: @@ -3294,8 +3216,8 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm { assert(simdSize == 16); - cmpIntrinsic = NI_SSE2_CompareEqual; - mskIntrinsic = NI_SSE2_MoveMask; + cmpIntrinsic = NI_X86Base_CompareEqual; + mskIntrinsic = NI_X86Base_MoveMask; mskConstant = 0xFFFF; } break; @@ -3323,10 +3245,10 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm } else { - cmpIntrinsic = NI_SSE2_CompareEqual; + cmpIntrinsic = NI_X86Base_CompareEqual; cmpJitType = CORINFO_TYPE_UINT; } - mskIntrinsic = NI_SSE2_MoveMask; + mskIntrinsic = NI_X86Base_MoveMask; mskConstant = 0xFFFF; } break; @@ -3345,8 +3267,8 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm } else { - cmpIntrinsic = NI_SSE_CompareEqual; - mskIntrinsic = NI_SSE_MoveMask; + cmpIntrinsic = NI_X86Base_CompareEqual; + mskIntrinsic = NI_X86Base_MoveMask; if (simdSize == 16) { @@ -3380,8 +3302,8 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm { assert(simdSize == 16); - cmpIntrinsic = NI_SSE2_CompareEqual; - mskIntrinsic = NI_SSE2_MoveMask; + cmpIntrinsic = NI_X86Base_CompareEqual; + mskIntrinsic = NI_X86Base_MoveMask; mskConstant = 0x3; } break; @@ -3773,8 +3695,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) break; } - case NI_SSE_CompareEqual: - case NI_SSE2_CompareEqual: + case NI_X86Base_CompareEqual: case NI_SSE41_CompareEqual: case NI_AVX_CompareEqual: case NI_AVX2_CompareEqual: @@ -3783,8 +3704,7 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) break; } - case NI_SSE_CompareGreaterThan: - case NI_SSE2_CompareGreaterThan: + case NI_X86Base_CompareGreaterThan: case NI_SSE42_CompareGreaterThan: case NI_AVX_CompareGreaterThan: case NI_AVX2_CompareGreaterThan: @@ -3793,16 +3713,14 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) break; } - case NI_SSE_CompareGreaterThanOrEqual: - case NI_SSE2_CompareGreaterThanOrEqual: + case NI_X86Base_CompareGreaterThanOrEqual: case NI_AVX_CompareGreaterThanOrEqual: { cndId = NI_AVX512_CompareGreaterThanOrEqualMask; break; } - case NI_SSE_CompareLessThan: - case NI_SSE2_CompareLessThan: + case NI_X86Base_CompareLessThan: case NI_SSE42_CompareLessThan: case NI_AVX_CompareLessThan: case NI_AVX2_CompareLessThan: @@ -3811,64 +3729,56 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) break; } - case NI_SSE_CompareLessThanOrEqual: - case NI_SSE2_CompareLessThanOrEqual: + case NI_X86Base_CompareLessThanOrEqual: case NI_AVX_CompareLessThanOrEqual: { cndId = NI_AVX512_CompareLessThanOrEqualMask; break; } - case NI_SSE_CompareNotEqual: - case NI_SSE2_CompareNotEqual: + case NI_X86Base_CompareNotEqual: case NI_AVX_CompareNotEqual: { cndId = NI_AVX512_CompareNotEqualMask; break; } - case NI_SSE_CompareNotGreaterThan: - case NI_SSE2_CompareNotGreaterThan: + case NI_X86Base_CompareNotGreaterThan: case NI_AVX_CompareNotGreaterThan: { cndId = NI_AVX512_CompareGreaterThanMask; break; } - case NI_SSE_CompareNotGreaterThanOrEqual: - case NI_SSE2_CompareNotGreaterThanOrEqual: + case NI_X86Base_CompareNotGreaterThanOrEqual: case NI_AVX_CompareNotGreaterThanOrEqual: { cndId = NI_AVX512_CompareNotGreaterThanOrEqualMask; break; } - case NI_SSE_CompareNotLessThan: - case NI_SSE2_CompareNotLessThan: + case NI_X86Base_CompareNotLessThan: case NI_AVX_CompareNotLessThan: { cndId = NI_AVX512_CompareNotLessThanMask; break; } - case NI_SSE_CompareNotLessThanOrEqual: - case NI_SSE2_CompareNotLessThanOrEqual: + case NI_X86Base_CompareNotLessThanOrEqual: case NI_AVX_CompareNotLessThanOrEqual: { cndId = NI_AVX512_CompareNotLessThanOrEqualMask; break; } - case NI_SSE_CompareOrdered: - case NI_SSE2_CompareOrdered: + case NI_X86Base_CompareOrdered: case NI_AVX_CompareOrdered: { cndId = NI_AVX512_CompareOrderedMask; break; } - case NI_SSE_CompareUnordered: - case NI_SSE2_CompareUnordered: + case NI_X86Base_CompareUnordered: case NI_AVX_CompareUnordered: { cndId = NI_AVX512_CompareUnorderedMask; @@ -4379,7 +4289,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) break; } - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); // We will be constructing the following parts: // ... @@ -4406,7 +4316,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); - tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_SSE2_UnpackLow, CORINFO_TYPE_UBYTE, + tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_X86Base_UnpackLow, CORINFO_TYPE_UBYTE, simdSize); BlockRange().InsertAfter(tmp2, tmp1); LowerNode(tmp1); @@ -4434,7 +4344,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp1 = Sse2.UnpackLow(tmp1, tmp2); // ... - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); node->Op(1) = tmp1; LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); @@ -4444,7 +4354,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); - tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_SSE2_UnpackLow, CORINFO_TYPE_USHORT, + tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_X86Base_UnpackLow, CORINFO_TYPE_USHORT, simdSize); BlockRange().InsertAfter(tmp2, tmp1); LowerNode(tmp1); @@ -4466,12 +4376,12 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // ... // return Sse2.Shuffle(tmp1, 0x00); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); idx = comp->gtNewIconNode(0x00, TYP_INT); BlockRange().InsertAfter(tmp1, idx); - node->ResetHWIntrinsicId(NI_SSE2_Shuffle, tmp1, idx); + node->ResetHWIntrinsicId(NI_X86Base_Shuffle, tmp1, idx); node->SetSimdBaseJitType(CORINFO_TYPE_UINT); break; } @@ -4515,7 +4425,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // var tmp2 = tmp1; // return Sse.Shuffle(tmp1, tmp2, 0x00); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE)); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); node->Op(1) = tmp1; LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); @@ -4528,7 +4438,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) idx = comp->gtNewIconNode(0x00, TYP_INT); BlockRange().InsertAfter(tmp2, idx); - node->ResetHWIntrinsicId(NI_SSE_Shuffle, comp, tmp1, tmp2, idx); + node->ResetHWIntrinsicId(NI_X86Base_Shuffle, comp, tmp1, tmp2, idx); break; } @@ -4553,7 +4463,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) break; } - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); // We will be constructing the following parts: // ... @@ -4578,7 +4488,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); - node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2); + node->ResetHWIntrinsicId(NI_X86Base_UnpackLow, tmp1, tmp2); break; } @@ -4684,8 +4594,8 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) if ((simdBaseType == TYP_SHORT) || (simdBaseType == TYP_USHORT)) { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - insIntrinsic = NI_SSE2_Insert; + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); + insIntrinsic = NI_X86Base_Insert; } else if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) { @@ -4744,7 +4654,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) } assert((simdBaseType != TYP_SHORT) && (simdBaseType != TYP_USHORT)); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); GenTree* op[16]; op[0] = tmp1; @@ -4793,18 +4703,18 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) size_t P = N + 2; size_t Q = N + 3; - tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[N], op[O], NI_SSE2_UnpackLow, CORINFO_TYPE_UBYTE, - simdSize); + tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[N], op[O], NI_X86Base_UnpackLow, + CORINFO_TYPE_UBYTE, simdSize); BlockRange().InsertAfter(LIR::LastNode(op[N], op[O]), tmp1); LowerNode(tmp1); - tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[P], op[Q], NI_SSE2_UnpackLow, CORINFO_TYPE_UBYTE, - simdSize); + tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[P], op[Q], NI_X86Base_UnpackLow, + CORINFO_TYPE_UBYTE, simdSize); BlockRange().InsertAfter(LIR::LastNode(op[P], op[Q]), tmp2); LowerNode(tmp2); - tmp3 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_SSE2_UnpackLow, CORINFO_TYPE_USHORT, - simdSize); + tmp3 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, tmp2, NI_X86Base_UnpackLow, + CORINFO_TYPE_USHORT, simdSize); BlockRange().InsertAfter(LIR::LastNode(tmp1, tmp2), tmp3); LowerNode(tmp3); @@ -4842,17 +4752,17 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp2 = Sse2.UnpackLow(opP, opQ); // return Sse2.UnpackLow(tmp1, tmp2); - tmp1 = - comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_SSE2_UnpackLow, CORINFO_TYPE_UINT, simdSize); + tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_X86Base_UnpackLow, CORINFO_TYPE_UINT, + simdSize); BlockRange().InsertAfter(LIR::LastNode(op[0], op[1]), tmp1); LowerNode(tmp1); - tmp2 = - comp->gtNewSimdHWIntrinsicNode(simdType, op[2], op[3], NI_SSE2_UnpackLow, CORINFO_TYPE_UINT, simdSize); + tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[2], op[3], NI_X86Base_UnpackLow, CORINFO_TYPE_UINT, + simdSize); BlockRange().InsertAfter(LIR::LastNode(op[2], op[3]), tmp2); LowerNode(tmp2); - node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2); + node->ResetHWIntrinsicId(NI_X86Base_UnpackLow, tmp1, tmp2); node->SetSimdBaseJitType(CORINFO_TYPE_ULONG); break; } @@ -4971,7 +4881,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp2 = Sse.UnpackLow(opP, opQ); // return Sse.MoveLowToHigh(tmp1, tmp2); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE)); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); GenTree* op[4]; op[0] = tmp1; @@ -4984,15 +4894,17 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) LowerNode(op[N]); } - tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_SSE_UnpackLow, simdBaseJitType, simdSize); + tmp1 = + comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_X86Base_UnpackLow, simdBaseJitType, simdSize); BlockRange().InsertAfter(LIR::LastNode(op[0], op[1]), tmp1); LowerNode(tmp1); - tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, op[2], op[3], NI_SSE_UnpackLow, simdBaseJitType, simdSize); + tmp2 = + comp->gtNewSimdHWIntrinsicNode(simdType, op[2], op[3], NI_X86Base_UnpackLow, simdBaseJitType, simdSize); BlockRange().InsertAfter(LIR::LastNode(op[2], op[3]), tmp2); LowerNode(tmp2); - node->ResetHWIntrinsicId(NI_SSE_MoveLowToHigh, tmp1, tmp2); + node->ResetHWIntrinsicId(NI_X86Base_MoveLowToHigh, tmp1, tmp2); break; } @@ -5036,12 +4948,12 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // var tmp2 = Vector128.CreateScalarUnsafe(op2); // return Sse.UnpackLow(tmp1, tmp2); - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); tmp2 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, simdBaseJitType, 16); LowerNode(tmp2); - node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2); + node->ResetHWIntrinsicId(NI_X86Base_UnpackLow, tmp1, tmp2); break; } @@ -5350,7 +5262,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) case TYP_SHORT: case TYP_USHORT: { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); break; } @@ -5506,7 +5418,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) case TYP_SHORT: case TYP_USHORT: { - resIntrinsic = NI_SSE2_Extract; + resIntrinsic = NI_X86Base_Extract; break; } @@ -5617,7 +5529,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) case TYP_FLOAT: case TYP_SHORT: case TYP_USHORT: - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); break; default: @@ -5837,7 +5749,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) // ... // node = Sse.MoveScalar(op1, op2); - result->ResetHWIntrinsicId(NI_SSE_MoveScalar, op1, tmp1); + result->ResetHWIntrinsicId(NI_X86Base_MoveScalar, op1, tmp1); } else { @@ -5919,7 +5831,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) std::swap(tmp1, tmp2); } - op1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, tmp2, idx, NI_SSE_Shuffle, + op1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, tmp2, idx, NI_X86Base_Shuffle, CORINFO_TYPE_FLOAT, 16); BlockRange().InsertAfter(idx, op1); LowerNode(op1); @@ -5932,7 +5844,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) std::swap(op1, op2); } - result->ChangeHWIntrinsicId(NI_SSE_Shuffle, op1, op2, idx); + result->ChangeHWIntrinsicId(NI_X86Base_Shuffle, op1, op2, idx); } break; } @@ -5960,7 +5872,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) { idx = comp->gtNewIconNode(imm8); BlockRange().InsertBefore(result, idx); - result->ChangeHWIntrinsicId(NI_SSE2_Insert, op1, op3, idx); + result->ChangeHWIntrinsicId(NI_X86Base_Insert, op1, op3, idx); break; } @@ -5978,7 +5890,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) tmp1 = InsertNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op3, CORINFO_TYPE_DOUBLE, 16); LowerNode(tmp1); - result->ResetHWIntrinsicId((imm8 == 0) ? NI_SSE2_MoveScalar : NI_SSE2_UnpackLow, op1, tmp1); + result->ResetHWIntrinsicId((imm8 == 0) ? NI_X86Base_MoveScalar : NI_X86Base_UnpackLow, op1, tmp1); break; } @@ -6153,7 +6065,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) } else { - assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_X86Base)); switch (simdBaseType) { @@ -6164,7 +6076,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSSE3)) { - shuffle = NI_SSE2_ShuffleLow; + shuffle = NI_X86Base_ShuffleLow; } break; } @@ -6237,7 +6149,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) if ((simdSize == 8) || !comp->compOpportunisticallyDependsOn(InstructionSet_SSE3)) { // We also do this for simdSize == 8 to ensure we broadcast the result as expected - shuffle = NI_SSE_Shuffle; + shuffle = NI_X86Base_Shuffle; } break; } @@ -6288,7 +6200,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) horizontalAdd = NI_SSE3_HorizontalAdd; // We need to ensure we broadcast the result as expected - shuffle = NI_SSE2_Shuffle; + shuffle = NI_X86Base_Shuffle; break; } @@ -6551,7 +6463,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // tmp2 = Isa.Shuffle(tmp1, shuffleConst); // ... - tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp2, idx, NI_SSE2_ShuffleLow, simdBaseJitType, + tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp2, idx, NI_X86Base_ShuffleLow, simdBaseJitType, simdSize); BlockRange().InsertAfter(idx, tmp2); LowerNode(tmp2); @@ -6559,7 +6471,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) idx = comp->gtNewIconNode(shuffleConst, TYP_INT); BlockRange().InsertAfter(tmp2, idx); - tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp2, idx, NI_SSE2_ShuffleHigh, simdBaseJitType, + tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp2, idx, NI_X86Base_ShuffleHigh, simdBaseJitType, simdSize); } else @@ -6579,7 +6491,7 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // tmp2 = Isa.Shuffle(tmp1, shuffleConst); // ... - tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp2, idx, NI_SSE2_Shuffle, CORINFO_TYPE_INT, + tmp2 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp2, idx, NI_X86Base_Shuffle, CORINFO_TYPE_INT, simdSize); } } @@ -7821,7 +7733,7 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) LowerNode(op1); } - intrinsicId = varTypeIsByte(node) ? NI_SSE41_Extract : NI_SSE2_Extract; + intrinsicId = varTypeIsByte(node) ? NI_SSE41_Extract : NI_X86Base_Extract; GenTree* zero = comp->gtNewZeroConNode(TYP_INT); BlockRange().InsertBefore(hwintrinsic, zero); @@ -7835,10 +7747,10 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) break; } - case NI_SSE2_ConvertToInt32: - case NI_SSE2_ConvertToUInt32: - case NI_SSE2_X64_ConvertToInt64: - case NI_SSE2_X64_ConvertToUInt64: + case NI_X86Base_ConvertToInt32: + case NI_X86Base_ConvertToUInt32: + case NI_X86Base_X64_ConvertToInt64: + case NI_X86Base_X64_ConvertToUInt64: case NI_AVX2_ConvertToInt32: case NI_AVX2_ConvertToUInt32: { @@ -7885,14 +7797,14 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) else { // TODO-XArch-CQ: We really should specially handle TYP_DOUBLE here but - // it requires handling GetElement(1) and GT_STOREIND as NI_SSE2_StoreHigh + // it requires handling GetElement(1) and GT_STOREIND as NI_X86Base_StoreHigh assert(!isContainable); } } break; } - case NI_SSE2_Extract: + case NI_X86Base_Extract: case NI_SSE41_Extract: case NI_SSE41_X64_Extract: case NI_AVX_ExtractVector128: @@ -7908,7 +7820,7 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) isContainable = HWIntrinsicInfo::isImmOp(intrinsicId, lastOp) && lastOp->IsCnsIntOrI() && (genTypeSize(simdBaseType) == genTypeSize(node)); - if (isContainable && (intrinsicId == NI_SSE2_Extract)) + if (isContainable && (intrinsicId == NI_X86Base_Extract)) { // The encoding that supports containment is SSE4.1 only isContainable = comp->compOpportunisticallyDependsOn(InstructionSet_SSE41); @@ -8911,8 +8823,8 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre goto SIZE_FROM_TUPLE_TYPE; } - case NI_SSE2_ShiftLeftLogical128BitLane: - case NI_SSE2_ShiftRightLogical128BitLane: + case NI_X86Base_ShiftLeftLogical128BitLane: + case NI_X86Base_ShiftRightLogical128BitLane: case NI_AVX2_ShiftLeftLogical128BitLane: case NI_AVX2_ShiftRightLogical128BitLane: { @@ -8925,9 +8837,9 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre goto SIZE_FROM_TUPLE_TYPE; } - case NI_SSE2_ShiftLeftLogical: - case NI_SSE2_ShiftRightArithmetic: - case NI_SSE2_ShiftRightLogical: + case NI_X86Base_ShiftLeftLogical: + case NI_X86Base_ShiftRightArithmetic: + case NI_X86Base_ShiftRightLogical: case NI_AVX2_ShiftLeftLogical: case NI_AVX2_ShiftRightArithmetic: case NI_AVX2_ShiftRightLogical: @@ -8962,7 +8874,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre goto SIZE_FROM_TUPLE_TYPE; } - case NI_SSE2_Insert: + case NI_X86Base_Insert: case NI_SSE41_Insert: case NI_SSE41_X64_Insert: { @@ -9228,8 +9140,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre return false; } - case NI_SSE_LoadAlignedVector128: - case NI_SSE2_LoadAlignedVector128: + case NI_X86Base_LoadAlignedVector128: case NI_AVX_LoadAlignedVector256: case NI_AVX512_LoadAlignedVector512: { @@ -9244,8 +9155,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre ((!comp->canUseVexEncoding() && expectedSize == genTypeSize(TYP_SIMD16)) || !comp->opts.MinOpts())); } - case NI_SSE_LoadScalarVector128: - case NI_SSE2_LoadScalarVector128: + case NI_X86Base_LoadScalarVector128: { // These take only pointer operands. assert(hwintrinsic->OperIsMemoryLoad()); @@ -9593,10 +9503,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { switch (intrinsicId) { - case NI_SSE_ReciprocalScalar: - case NI_SSE_ReciprocalSqrtScalar: - case NI_SSE_SqrtScalar: - case NI_SSE2_SqrtScalar: + case NI_X86Base_ReciprocalScalar: + case NI_X86Base_ReciprocalSqrtScalar: + case NI_X86Base_SqrtScalar: case NI_SSE41_CeilingScalar: case NI_SSE41_FloorScalar: case NI_SSE41_RoundCurrentDirectionScalar: @@ -9617,10 +9526,10 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) return; } - case NI_SSE2_ConvertToInt32: - case NI_SSE2_X64_ConvertToInt64: - case NI_SSE2_ConvertToUInt32: - case NI_SSE2_X64_ConvertToUInt64: + case NI_X86Base_ConvertToInt32: + case NI_X86Base_X64_ConvertToInt64: + case NI_X86Base_ConvertToUInt32: + case NI_X86Base_X64_ConvertToUInt64: case NI_AVX2_ConvertToInt32: case NI_AVX2_ConvertToUInt32: { @@ -9906,7 +9815,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinsicId) { - case NI_SSE2_Extract: + case NI_X86Base_Extract: case NI_AVX_ExtractVector128: case NI_AVX2_ExtractVector128: case NI_AVX512_ExtractVector128: @@ -9930,9 +9839,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) FALLTHROUGH; } - case NI_SSE2_Shuffle: - case NI_SSE2_ShuffleHigh: - case NI_SSE2_ShuffleLow: + case NI_X86Base_Shuffle: + case NI_X86Base_ShuffleHigh: + case NI_X86Base_ShuffleLow: case NI_AVX2_Permute4x64: case NI_AVX2_ShuffleHigh: case NI_AVX2_ShuffleLow: @@ -9968,9 +9877,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } case NI_AVX_Permute: - case NI_SSE2_ShiftLeftLogical: - case NI_SSE2_ShiftRightArithmetic: - case NI_SSE2_ShiftRightLogical: + case NI_X86Base_ShiftLeftLogical: + case NI_X86Base_ShiftRightArithmetic: + case NI_X86Base_ShiftRightLogical: case NI_AVX2_ShiftLeftLogical: case NI_AVX2_ShiftRightArithmetic: case NI_AVX2_ShiftRightLogical: @@ -10013,8 +9922,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_SSE2_ShiftLeftLogical128BitLane: - case NI_SSE2_ShiftRightLogical128BitLane: + case NI_X86Base_ShiftLeftLogical128BitLane: + case NI_X86Base_ShiftRightLogical128BitLane: case NI_AVX2_ShiftLeftLogical128BitLane: case NI_AVX2_ShiftRightLogical128BitLane: case NI_AVX512_ShiftLeftLogical128BitLane: @@ -10477,8 +10386,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) unsigned actualMaskBaseSize = actualMaskSize / (genTypeSize(node->TypeGet()) / 16); - NamedIntrinsic op2AdjustedIntrinsicId = NI_Illegal; - CorInfoType op2AdjustedSimdBaseJitType = CORINFO_TYPE_UNDEF; + CorInfoType op2AdjustedSimdBaseJitType = CORINFO_TYPE_UNDEF; if (actualMaskBaseSize != expectedMaskBaseSize) { @@ -10492,14 +10400,10 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Vector256_ToVector512Unsafe: case NI_Vector512_GetLower: case NI_Vector512_GetLower128: - case NI_SSE_And: - case NI_SSE_AndNot: - case NI_SSE_Or: - case NI_SSE_Xor: - case NI_SSE2_And: - case NI_SSE2_AndNot: - case NI_SSE2_Or: - case NI_SSE2_Xor: + case NI_X86Base_And: + case NI_X86Base_AndNot: + case NI_X86Base_Or: + case NI_X86Base_Xor: case NI_AVX_And: case NI_AVX_AndNot: case NI_AVX_BroadcastVector128ToVector256: @@ -10514,38 +10418,19 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX2_InsertVector128: case NI_AVX2_Or: case NI_AVX2_Xor: - case NI_AVX512F_And: - case NI_AVX512F_AndNot: - case NI_AVX512F_BroadcastVector128ToVector512: - case NI_AVX512F_BroadcastVector256ToVector512: - case NI_AVX512F_ExtractVector128: - case NI_AVX512F_ExtractVector256: - case NI_AVX512F_InsertVector128: - case NI_AVX512F_InsertVector256: - case NI_AVX512F_Or: - case NI_AVX512F_Shuffle4x128: - case NI_AVX512F_TernaryLogic: - case NI_AVX512F_Xor: - case NI_AVX512F_VL_Shuffle2x128: - case NI_AVX512F_VL_TernaryLogic: - case NI_AVX512DQ_And: - case NI_AVX512DQ_AndNot: - case NI_AVX512DQ_BroadcastVector128ToVector512: - case NI_AVX512DQ_BroadcastVector256ToVector512: - case NI_AVX512DQ_ExtractVector128: - case NI_AVX512DQ_ExtractVector256: - case NI_AVX512DQ_InsertVector128: - case NI_AVX512DQ_InsertVector256: - case NI_AVX512DQ_Or: - case NI_AVX512DQ_Xor: - case NI_AVX10v1_Shuffle2x128: - case NI_AVX10v1_TernaryLogic: - case NI_AVX10v1_V512_BroadcastVector128ToVector512: - case NI_AVX10v1_V512_BroadcastVector256ToVector512: - case NI_AVX10v1_V512_ExtractVector128: - case NI_AVX10v1_V512_ExtractVector256: - case NI_AVX10v1_V512_InsertVector128: - case NI_AVX10v1_V512_InsertVector256: + case NI_AVX512_And: + case NI_AVX512_AndNot: + case NI_AVX512_BroadcastVector128ToVector512: + case NI_AVX512_BroadcastVector256ToVector512: + case NI_AVX512_ExtractVector128: + case NI_AVX512_ExtractVector256: + case NI_AVX512_InsertVector128: + case NI_AVX512_InsertVector256: + case NI_AVX512_Or: + case NI_AVX512_Shuffle2x128: + case NI_AVX512_Shuffle4x128: + case NI_AVX512_TernaryLogic: + case NI_AVX512_Xor: { // Some intrinsics are effectively bitwise operations and so we // can freely update them to match the size of the actual mask @@ -10557,38 +10442,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if (op2SimdBaseType == TYP_FLOAT) { op2AdjustedSimdBaseJitType = CORINFO_TYPE_DOUBLE; - - switch (op2IntrinsicId) - { - case NI_SSE_And: - { - op2AdjustedIntrinsicId = NI_SSE2_And; - break; - } - - case NI_SSE_AndNot: - { - op2AdjustedIntrinsicId = NI_SSE2_AndNot; - break; - } - - case NI_SSE_Or: - { - op2AdjustedIntrinsicId = NI_SSE2_Or; - break; - } - - case NI_SSE_Xor: - { - op2AdjustedIntrinsicId = NI_SSE2_Xor; - break; - } - - default: - { - break; - } - } } else if (op2SimdBaseType == TYP_INT) { @@ -10608,38 +10461,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if (op2SimdBaseType == TYP_DOUBLE) { op2AdjustedSimdBaseJitType = CORINFO_TYPE_FLOAT; - - switch (op2IntrinsicId) - { - case NI_SSE2_And: - { - op2AdjustedIntrinsicId = NI_SSE_And; - break; - } - - case NI_SSE2_AndNot: - { - op2AdjustedIntrinsicId = NI_SSE_AndNot; - break; - } - - case NI_SSE2_Or: - { - op2AdjustedIntrinsicId = NI_SSE_Or; - break; - } - - case NI_SSE2_Xor: - { - op2AdjustedIntrinsicId = NI_SSE_Xor; - break; - } - - default: - { - break; - } - } } else if (op2SimdBaseType == TYP_LONG) { @@ -10667,10 +10488,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) ins = HWIntrinsicInfo::lookupIns(op2IntrinsicId, op2SimdBaseType, comp); unsigned expectedMaskBaseSize = CodeGenInterface::instKMaskBaseSize(ins); } - else - { - assert(op2AdjustedIntrinsicId == NI_Illegal); - } unsigned expectedMaskSize = expectedMaskBaseSize * (genTypeSize(op2->TypeGet()) / 16); @@ -10683,11 +10500,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) else if (op2AdjustedSimdBaseJitType != CORINFO_TYPE_UNDEF) { op2->AsHWIntrinsic()->SetSimdBaseJitType(op2AdjustedSimdBaseJitType); - - if (op2AdjustedIntrinsicId != NI_Illegal) - { - op2->AsHWIntrinsic()->ChangeHWIntrinsicId(op2AdjustedIntrinsicId); - } } } @@ -10796,9 +10608,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { switch (intrinsicId) { - case NI_SSE_Shuffle: - case NI_SSE2_Insert: - case NI_SSE2_Shuffle: + case NI_X86Base_Shuffle: + case NI_X86Base_Insert: case NI_SSSE3_AlignRight: case NI_SSE41_Blend: case NI_SSE41_DotProduct: diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index dbf8f0978b11db..9dab10a123a5b4 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -959,7 +959,7 @@ regMaskTP LinearScan::getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node) #ifdef TARGET_XARCH switch (node->GetHWIntrinsicId()) { - case NI_SSE2_MaskMove: + case NI_X86Base_MaskMove: // maskmovdqu uses edi as the implicit address register. // Although it is set as the srcCandidate on the address, if there is also a fixed // assignment for the definition of the address, resolveConflictingDefAndUse() may diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index b93051c5d7b2e1..81a302bc36d7e8 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2378,7 +2378,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } - case NI_SSE2_MaskMove: + case NI_X86Base_MaskMove: { assert(numArgs == 3); assert(!isRMW); diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index d0a86c8cdf4e28..a6792d4eb8b778 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -13223,7 +13223,7 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree) switch (intrinsicId) { #ifdef TARGET_XARCH - case NI_SSE2_MaskMove: + case NI_X86Base_MaskMove: case NI_AVX_MaskStore: case NI_AVX2_MaskStore: case NI_AVX_MaskLoad: diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index 9cf0feec10d2b2..1b9a1dd5d4df35 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -202,11 +202,6 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_GFNI_V512 => (Gfni | Avx512), // Baseline ISAs - they're always available - InstructionSet.X64_SSE => 0, - InstructionSet.X64_SSE_X64 => 0, - InstructionSet.X64_SSE2 => 0, - InstructionSet.X64_SSE2_X64 => 0, - InstructionSet.X64_X86Base => 0, InstructionSet.X64_X86Base_X64 => 0, diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index 2ead9f342c1678..121826f6e12753 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -391,7 +391,7 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, case TargetArchitecture.X64: case TargetArchitecture.X86: { - Debug.Assert(InstructionSet.X86_SSE2 == InstructionSet.X64_SSE2); + Debug.Assert(InstructionSet.X86_X86Base == InstructionSet.X64_X86Base); Debug.Assert(InstructionSet.X86_AVX2 == InstructionSet.X64_AVX2); Debug.Assert(InstructionSet.X86_AVX512 == InstructionSet.X64_AVX512); @@ -402,7 +402,7 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, // We only want one size supported for Vector and we want the other sizes explicitly // unsupported to ensure we throw away the given methods if runtime picks a larger size - Debug.Assert(supportedInstructionSets.HasInstructionSet(InstructionSet.X86_SSE2)); + Debug.Assert(supportedInstructionSets.HasInstructionSet(InstructionSet.X86_X86Base)); Debug.Assert((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 128)); supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128); diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index adc572ce81a754..83550669bd5c38 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -12,8 +12,6 @@ namespace Internal.ReadyToRunConstants { public enum ReadyToRunInstructionSet { - Sse=1, - Sse2=2, Sse3=3, Ssse3=4, Sse41=5, diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 0ec9eca25e89e6..485aa9b7cbe627 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -73,10 +73,6 @@ public static class ReadyToRunInstructionSetHelper { case InstructionSet.X64_X86Base: return ReadyToRunInstructionSet.X86Base; case InstructionSet.X64_X86Base_X64: return ReadyToRunInstructionSet.X86Base; - case InstructionSet.X64_SSE: return ReadyToRunInstructionSet.Sse; - case InstructionSet.X64_SSE_X64: return ReadyToRunInstructionSet.Sse; - case InstructionSet.X64_SSE2: return ReadyToRunInstructionSet.Sse2; - case InstructionSet.X64_SSE2_X64: return ReadyToRunInstructionSet.Sse2; case InstructionSet.X64_SSE3: return ReadyToRunInstructionSet.Sse3; case InstructionSet.X64_SSE3_X64: return ReadyToRunInstructionSet.Sse3; case InstructionSet.X64_SSSE3: return ReadyToRunInstructionSet.Ssse3; @@ -140,10 +136,6 @@ public static class ReadyToRunInstructionSetHelper { case InstructionSet.X86_X86Base: return ReadyToRunInstructionSet.X86Base; case InstructionSet.X86_X86Base_X64: return null; - case InstructionSet.X86_SSE: return ReadyToRunInstructionSet.Sse; - case InstructionSet.X86_SSE_X64: return null; - case InstructionSet.X86_SSE2: return ReadyToRunInstructionSet.Sse2; - case InstructionSet.X86_SSE2_X64: return null; case InstructionSet.X86_SSE3: return ReadyToRunInstructionSet.Sse3; case InstructionSet.X86_SSE3_X64: return null; case InstructionSet.X86_SSSE3: return ReadyToRunInstructionSet.Ssse3; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index d5974dd014f03a..1716361c38c64c 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -4216,8 +4216,8 @@ private uint getJitFlags(ref CORJIT_FLAGS flags, uint sizeInBytes) { case TargetArchitecture.X64: case TargetArchitecture.X86: - Debug.Assert(InstructionSet.X86_SSE2 == InstructionSet.X64_SSE2); - Debug.Assert(_compilation.InstructionSetSupport.IsInstructionSetSupported(InstructionSet.X86_SSE2)); + Debug.Assert(InstructionSet.X86_X86Base == InstructionSet.X64_X86Base); + Debug.Assert(_compilation.InstructionSetSupport.IsInstructionSetSupported(InstructionSet.X86_X86Base)); break; case TargetArchitecture.ARM64: diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index b9059768c55984..c9e6295f763082 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -47,8 +47,6 @@ public enum InstructionSet RiscV64_Zba = InstructionSet_RiscV64.Zba, RiscV64_Zbb = InstructionSet_RiscV64.Zbb, X64_X86Base = InstructionSet_X64.X86Base, - X64_SSE = InstructionSet_X64.SSE, - X64_SSE2 = InstructionSet_X64.SSE2, X64_SSE3 = InstructionSet_X64.SSE3, X64_SSSE3 = InstructionSet_X64.SSSE3, X64_SSE41 = InstructionSet_X64.SSE41, @@ -82,8 +80,6 @@ public enum InstructionSet X64_GFNI_V256 = InstructionSet_X64.GFNI_V256, X64_GFNI_V512 = InstructionSet_X64.GFNI_V512, X64_X86Base_X64 = InstructionSet_X64.X86Base_X64, - X64_SSE_X64 = InstructionSet_X64.SSE_X64, - X64_SSE2_X64 = InstructionSet_X64.SSE2_X64, X64_SSE3_X64 = InstructionSet_X64.SSE3_X64, X64_SSSE3_X64 = InstructionSet_X64.SSSE3_X64, X64_SSE41_X64 = InstructionSet_X64.SSE41_X64, @@ -105,8 +101,6 @@ public enum InstructionSet X64_AVX10v2_X64 = InstructionSet_X64.AVX10v2_X64, X64_GFNI_X64 = InstructionSet_X64.GFNI_X64, X86_X86Base = InstructionSet_X86.X86Base, - X86_SSE = InstructionSet_X86.SSE, - X86_SSE2 = InstructionSet_X86.SSE2, X86_SSE3 = InstructionSet_X86.SSE3, X86_SSSE3 = InstructionSet_X86.SSSE3, X86_SSE41 = InstructionSet_X86.SSE41, @@ -140,8 +134,6 @@ public enum InstructionSet X86_GFNI_V256 = InstructionSet_X86.GFNI_V256, X86_GFNI_V512 = InstructionSet_X86.GFNI_V512, X86_X86Base_X64 = InstructionSet_X86.X86Base_X64, - X86_SSE_X64 = InstructionSet_X86.SSE_X64, - X86_SSE2_X64 = InstructionSet_X86.SSE2_X64, X86_SSE3_X64 = InstructionSet_X86.SSE3_X64, X86_SSSE3_X64 = InstructionSet_X86.SSSE3_X64, X86_SSE41_X64 = InstructionSet_X86.SSE41_X64, @@ -210,63 +202,59 @@ public enum InstructionSet_X64 ILLEGAL = InstructionSet.ILLEGAL, NONE = InstructionSet.NONE, X86Base = 1, - SSE = 2, - SSE2 = 3, - SSE3 = 4, - SSSE3 = 5, - SSE41 = 6, - SSE42 = 7, - AVX = 8, - AVX2 = 9, - AES = 10, - BMI1 = 11, - BMI2 = 12, - FMA = 13, - LZCNT = 14, - PCLMULQDQ = 15, - PCLMULQDQ_V256 = 16, - PCLMULQDQ_V512 = 17, - POPCNT = 18, - Vector128 = 19, - Vector256 = 20, - Vector512 = 21, - AVXVNNI = 22, - MOVBE = 23, - X86Serialize = 24, - AVX512 = 25, - AVX512VBMI = 26, - AVX10v1 = 27, - VectorT128 = 28, - VectorT256 = 29, - VectorT512 = 30, - APX = 31, - AVX10v2 = 32, - GFNI = 33, - GFNI_V256 = 34, - GFNI_V512 = 35, - X86Base_X64 = 36, - SSE_X64 = 37, - SSE2_X64 = 38, - SSE3_X64 = 39, - SSSE3_X64 = 40, - SSE41_X64 = 41, - SSE42_X64 = 42, - AVX_X64 = 43, - AVX2_X64 = 44, - AES_X64 = 45, - BMI1_X64 = 46, - BMI2_X64 = 47, - FMA_X64 = 48, - LZCNT_X64 = 49, - PCLMULQDQ_X64 = 50, - POPCNT_X64 = 51, - AVXVNNI_X64 = 52, - X86Serialize_X64 = 53, - AVX512_X64 = 54, - AVX512VBMI_X64 = 55, - AVX10v1_X64 = 56, - AVX10v2_X64 = 57, - GFNI_X64 = 58, + SSE3 = 2, + SSSE3 = 3, + SSE41 = 4, + SSE42 = 5, + AVX = 6, + AVX2 = 7, + AES = 8, + BMI1 = 9, + BMI2 = 10, + FMA = 11, + LZCNT = 12, + PCLMULQDQ = 13, + PCLMULQDQ_V256 = 14, + PCLMULQDQ_V512 = 15, + POPCNT = 16, + Vector128 = 17, + Vector256 = 18, + Vector512 = 19, + AVXVNNI = 20, + MOVBE = 21, + X86Serialize = 22, + AVX512 = 23, + AVX512VBMI = 24, + AVX10v1 = 25, + VectorT128 = 26, + VectorT256 = 27, + VectorT512 = 28, + APX = 29, + AVX10v2 = 30, + GFNI = 31, + GFNI_V256 = 32, + GFNI_V512 = 33, + X86Base_X64 = 34, + SSE3_X64 = 35, + SSSE3_X64 = 36, + SSE41_X64 = 37, + SSE42_X64 = 38, + AVX_X64 = 39, + AVX2_X64 = 40, + AES_X64 = 41, + BMI1_X64 = 42, + BMI2_X64 = 43, + FMA_X64 = 44, + LZCNT_X64 = 45, + PCLMULQDQ_X64 = 46, + POPCNT_X64 = 47, + AVXVNNI_X64 = 48, + X86Serialize_X64 = 49, + AVX512_X64 = 50, + AVX512VBMI_X64 = 51, + AVX10v1_X64 = 52, + AVX10v2_X64 = 53, + GFNI_X64 = 54, } public enum InstructionSet_X86 @@ -274,63 +262,59 @@ public enum InstructionSet_X86 ILLEGAL = InstructionSet.ILLEGAL, NONE = InstructionSet.NONE, X86Base = 1, - SSE = 2, - SSE2 = 3, - SSE3 = 4, - SSSE3 = 5, - SSE41 = 6, - SSE42 = 7, - AVX = 8, - AVX2 = 9, - AES = 10, - BMI1 = 11, - BMI2 = 12, - FMA = 13, - LZCNT = 14, - PCLMULQDQ = 15, - PCLMULQDQ_V256 = 16, - PCLMULQDQ_V512 = 17, - POPCNT = 18, - Vector128 = 19, - Vector256 = 20, - Vector512 = 21, - AVXVNNI = 22, - MOVBE = 23, - X86Serialize = 24, - AVX512 = 25, - AVX512VBMI = 26, - AVX10v1 = 27, - VectorT128 = 28, - VectorT256 = 29, - VectorT512 = 30, - APX = 31, - AVX10v2 = 32, - GFNI = 33, - GFNI_V256 = 34, - GFNI_V512 = 35, - X86Base_X64 = 36, - SSE_X64 = 37, - SSE2_X64 = 38, - SSE3_X64 = 39, - SSSE3_X64 = 40, - SSE41_X64 = 41, - SSE42_X64 = 42, - AVX_X64 = 43, - AVX2_X64 = 44, - AES_X64 = 45, - BMI1_X64 = 46, - BMI2_X64 = 47, - FMA_X64 = 48, - LZCNT_X64 = 49, - PCLMULQDQ_X64 = 50, - POPCNT_X64 = 51, - AVXVNNI_X64 = 52, - X86Serialize_X64 = 53, - AVX512_X64 = 54, - AVX512VBMI_X64 = 55, - AVX10v1_X64 = 56, - AVX10v2_X64 = 57, - GFNI_X64 = 58, + SSE3 = 2, + SSSE3 = 3, + SSE41 = 4, + SSE42 = 5, + AVX = 6, + AVX2 = 7, + AES = 8, + BMI1 = 9, + BMI2 = 10, + FMA = 11, + LZCNT = 12, + PCLMULQDQ = 13, + PCLMULQDQ_V256 = 14, + PCLMULQDQ_V512 = 15, + POPCNT = 16, + Vector128 = 17, + Vector256 = 18, + Vector512 = 19, + AVXVNNI = 20, + MOVBE = 21, + X86Serialize = 22, + AVX512 = 23, + AVX512VBMI = 24, + AVX10v1 = 25, + VectorT128 = 26, + VectorT256 = 27, + VectorT512 = 28, + APX = 29, + AVX10v2 = 30, + GFNI = 31, + GFNI_V256 = 32, + GFNI_V512 = 33, + X86Base_X64 = 34, + SSE3_X64 = 35, + SSSE3_X64 = 36, + SSE41_X64 = 37, + SSE42_X64 = 38, + AVX_X64 = 39, + AVX2_X64 = 40, + AES_X64 = 41, + BMI1_X64 = 42, + BMI2_X64 = 43, + FMA_X64 = 44, + LZCNT_X64 = 45, + PCLMULQDQ_X64 = 46, + POPCNT_X64 = 47, + AVXVNNI_X64 = 48, + X86Serialize_X64 = 49, + AVX512_X64 = 50, + AVX512VBMI_X64 = 51, + AVX10v1_X64 = 52, + AVX10v2_X64 = 53, + GFNI_X64 = 54, } public unsafe struct InstructionSetFlags : IEnumerable @@ -463,7 +447,7 @@ public static InstructionSet ConvertToImpliedInstructionSetForVectorInstructionS case TargetArchitecture.X64: switch (input) { - case InstructionSet.X64_Vector128: return InstructionSet.X64_SSE; + case InstructionSet.X64_Vector128: return InstructionSet.X64_X86Base; case InstructionSet.X64_Vector256: return InstructionSet.X64_AVX; case InstructionSet.X64_Vector512: return InstructionSet.X64_AVX512; } @@ -471,7 +455,7 @@ public static InstructionSet ConvertToImpliedInstructionSetForVectorInstructionS case TargetArchitecture.X86: switch (input) { - case InstructionSet.X86_Vector128: return InstructionSet.X86_SSE; + case InstructionSet.X86_Vector128: return InstructionSet.X86_X86Base; case InstructionSet.X86_Vector256: return InstructionSet.X86_AVX; case InstructionSet.X86_Vector512: return InstructionSet.X86_AVX512; } @@ -569,14 +553,6 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_X86Base_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base_X64)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE_X64); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE2_X64); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3)) resultflags.AddInstructionSet(InstructionSet.X64_SSE3_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3_X64)) @@ -657,12 +633,8 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_GFNI_X64); if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_X64)) resultflags.AddInstructionSet(InstructionSet.X64_GFNI); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) - resultflags.AddInstructionSet(InstructionSet.X64_X86Base); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE2); + resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_SSSE3)) resultflags.AddInstructionSet(InstructionSet.X64_SSE3); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE41)) @@ -692,9 +664,9 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X64_AES)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE2); + resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE2); + resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V256)) resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ); if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ_V256)) @@ -724,13 +696,13 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v2)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector128)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE); + resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector256)) resultflags.AddInstructionSet(InstructionSet.X64_AVX); if (resultflags.HasInstructionSet(InstructionSet.X64_Vector512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT128)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE2); + resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT256)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT512)) @@ -738,12 +710,8 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target break; case TargetArchitecture.X86: - if (resultflags.HasInstructionSet(InstructionSet.X86_SSE)) - resultflags.AddInstructionSet(InstructionSet.X86_X86Base); - if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) - resultflags.AddInstructionSet(InstructionSet.X86_SSE); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE3)) - resultflags.AddInstructionSet(InstructionSet.X86_SSE2); + resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_SSSE3)) resultflags.AddInstructionSet(InstructionSet.X86_SSE3); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE41)) @@ -773,9 +741,9 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X86_AES)) - resultflags.AddInstructionSet(InstructionSet.X86_SSE2); + resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ)) - resultflags.AddInstructionSet(InstructionSet.X86_SSE2); + resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V256)) resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ); if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ_V256)) @@ -805,13 +773,13 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v2)) resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector128)) - resultflags.AddInstructionSet(InstructionSet.X86_SSE); + resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector256)) resultflags.AddInstructionSet(InstructionSet.X86_AVX); if (resultflags.HasInstructionSet(InstructionSet.X86_Vector512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT128)) - resultflags.AddInstructionSet(InstructionSet.X86_SSE2); + resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT256)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT512)) @@ -895,10 +863,6 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe case TargetArchitecture.X64: if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base_X64)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2_X64)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3_X64)) resultflags.AddInstructionSet(InstructionSet.X64_SSE3); if (resultflags.HasInstructionSet(InstructionSet.X64_SSSE3_X64)) @@ -940,10 +904,6 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe if (resultflags.HasInstructionSet(InstructionSet.X64_GFNI_X64)) resultflags.AddInstructionSet(InstructionSet.X64_GFNI); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE2); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) resultflags.AddInstructionSet(InstructionSet.X64_SSE3); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE3)) resultflags.AddInstructionSet(InstructionSet.X64_SSSE3); @@ -973,9 +933,9 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_AES); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ); if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ)) resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ_V256); @@ -1005,13 +965,13 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) + if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_Vector128); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) resultflags.AddInstructionSet(InstructionSet.X64_Vector256); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512)) resultflags.AddInstructionSet(InstructionSet.X64_Vector512); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_VectorT128); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_VectorT256); @@ -1021,10 +981,6 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe case TargetArchitecture.X86: if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) - resultflags.AddInstructionSet(InstructionSet.X86_SSE); - if (resultflags.HasInstructionSet(InstructionSet.X86_SSE)) - resultflags.AddInstructionSet(InstructionSet.X86_SSE2); - if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) resultflags.AddInstructionSet(InstructionSet.X86_SSE3); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE3)) resultflags.AddInstructionSet(InstructionSet.X86_SSSE3); @@ -1054,9 +1010,9 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_AVX512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI); - if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_AES); - if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ); if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ)) resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ_V256); @@ -1086,13 +1042,13 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1)) resultflags.AddInstructionSet(InstructionSet.X86_AVX10v2); - if (resultflags.HasInstructionSet(InstructionSet.X86_SSE)) + if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_Vector128); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) resultflags.AddInstructionSet(InstructionSet.X86_Vector256); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512)) resultflags.AddInstructionSet(InstructionSet.X86_Vector512); - if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) + if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_VectorT128); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_VectorT256); @@ -1107,10 +1063,10 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe private static Dictionary<(string, TargetArchitecture), string> AllInstructionSetGroups { get; } = new() { - { ("x86-x64", TargetArchitecture.X64), "sse2" }, - { ("x86-x64", TargetArchitecture.X86), "sse2" }, - { ("x86-x64-v2", TargetArchitecture.X64), "sse4.2 popcnt" }, - { ("x86-x64-v2", TargetArchitecture.X86), "sse4.2 popcnt" }, + { ("x86-x64", TargetArchitecture.X64), "base" }, + { ("x86-x64", TargetArchitecture.X86), "base" }, + { ("x86-x64-v2", TargetArchitecture.X64), "x86-x64 sse4.2 popcnt" }, + { ("x86-x64-v2", TargetArchitecture.X86), "x86-x64 sse4.2 popcnt" }, { ("x86-x64-v3", TargetArchitecture.X64), "x86-x64-v2 avx2 bmi bmi2 lzcnt movbe fma" }, { ("x86-x64-v3", TargetArchitecture.X86), "x86-x64-v2 avx2 bmi bmi2 lzcnt movbe fma" }, { ("skylake", TargetArchitecture.X64), "x86-x64-v3" }, @@ -1182,8 +1138,8 @@ public static IEnumerable ArchitectureToValidInstructionSets case TargetArchitecture.X64: yield return new InstructionSetInfo("base", "X86Base", InstructionSet.X64_X86Base, true); - yield return new InstructionSetInfo("sse", "Sse", InstructionSet.X64_SSE, true); - yield return new InstructionSetInfo("sse2", "Sse2", InstructionSet.X64_SSE2, true); + yield return new InstructionSetInfo("sse", "Sse", InstructionSet.X64_X86Base, true); + yield return new InstructionSetInfo("sse2", "Sse2", InstructionSet.X64_X86Base, true); yield return new InstructionSetInfo("sse3", "Sse3", InstructionSet.X64_SSE3, true); yield return new InstructionSetInfo("ssse3", "Ssse3", InstructionSet.X64_SSSE3, true); yield return new InstructionSetInfo("sse4.1", "Sse41", InstructionSet.X64_SSE41, true); @@ -1232,8 +1188,8 @@ public static IEnumerable ArchitectureToValidInstructionSets case TargetArchitecture.X86: yield return new InstructionSetInfo("base", "X86Base", InstructionSet.X86_X86Base, true); - yield return new InstructionSetInfo("sse", "Sse", InstructionSet.X86_SSE, true); - yield return new InstructionSetInfo("sse2", "Sse2", InstructionSet.X86_SSE2, true); + yield return new InstructionSetInfo("sse", "Sse", InstructionSet.X86_X86Base, true); + yield return new InstructionSetInfo("sse2", "Sse2", InstructionSet.X86_X86Base, true); yield return new InstructionSetInfo("sse3", "Sse3", InstructionSet.X86_SSE3, true); yield return new InstructionSetInfo("ssse3", "Ssse3", InstructionSet.X86_SSSE3, true); yield return new InstructionSetInfo("sse4.1", "Sse41", InstructionSet.X86_SSE41, true); @@ -1316,10 +1272,6 @@ public void Set64BitInstructionSetVariants(TargetArchitecture architecture) case TargetArchitecture.X64: if (HasInstructionSet(InstructionSet.X64_X86Base)) AddInstructionSet(InstructionSet.X64_X86Base_X64); - if (HasInstructionSet(InstructionSet.X64_SSE)) - AddInstructionSet(InstructionSet.X64_SSE_X64); - if (HasInstructionSet(InstructionSet.X64_SSE2)) - AddInstructionSet(InstructionSet.X64_SSE2_X64); if (HasInstructionSet(InstructionSet.X64_SSE3)) AddInstructionSet(InstructionSet.X64_SSE3_X64); if (HasInstructionSet(InstructionSet.X64_SSSE3)) @@ -1390,8 +1342,6 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc case TargetArchitecture.X64: AddInstructionSet(InstructionSet.X64_X86Base_X64); - AddInstructionSet(InstructionSet.X64_SSE_X64); - AddInstructionSet(InstructionSet.X64_SSE2_X64); AddInstructionSet(InstructionSet.X64_SSE3_X64); AddInstructionSet(InstructionSet.X64_SSSE3_X64); AddInstructionSet(InstructionSet.X64_SSE41_X64); @@ -1416,8 +1366,6 @@ public void Set64BitInstructionSetVariantsUnconditionally(TargetArchitecture arc case TargetArchitecture.X86: AddInstructionSet(InstructionSet.X86_X86Base_X64); - AddInstructionSet(InstructionSet.X86_SSE_X64); - AddInstructionSet(InstructionSet.X86_SSE2_X64); AddInstructionSet(InstructionSet.X86_SSE3_X64); AddInstructionSet(InstructionSet.X86_SSSE3_X64); AddInstructionSet(InstructionSet.X86_SSE41_X64); @@ -1583,15 +1531,15 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite case "Sse": if (nestedTypeName == "X64") - { return InstructionSet.X64_SSE_X64; } + { return InstructionSet.X64_X86Base_X64; } else - { return InstructionSet.X64_SSE; } + { return InstructionSet.X64_X86Base; } case "Sse2": if (nestedTypeName == "X64") - { return InstructionSet.X64_SSE2_X64; } + { return InstructionSet.X64_X86Base_X64; } else - { return InstructionSet.X64_SSE2; } + { return InstructionSet.X64_X86Base; } case "Sse3": if (nestedTypeName == "X64") @@ -1796,10 +1744,10 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite { return InstructionSet.X86_X86Base; } case "Sse": - { return InstructionSet.X86_SSE; } + { return InstructionSet.X86_X86Base; } case "Sse2": - { return InstructionSet.X86_SSE2; } + { return InstructionSet.X86_X86Base; } case "Sse3": { return InstructionSet.X86_SSE3; } diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 92faeab30d3f6d..fbd27e6267169c 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -28,8 +28,8 @@ definearch ,X86 ,32Bit ,X64, X64 instructionset ,X86 ,X86Base , ,22 ,X86Base ,base -instructionset ,X86 ,Sse , ,1 ,SSE ,sse -instructionset ,X86 ,Sse2 , ,2 ,SSE2 ,sse2 +instructionset ,X86 ,Sse ,X86Base ,22 ,X86Base ,sse +instructionset ,X86 ,Sse2 ,X86Base ,22 ,X86Base ,sse2 instructionset ,X86 ,Sse3 , ,3 ,SSE3 ,sse3 instructionset ,X86 ,Ssse3 , ,4 ,SSSE3 ,ssse3 instructionset ,X86 ,Sse41 , ,5 ,SSE41 ,sse4.1 @@ -76,8 +76,6 @@ instructionset ,X86 ,Gfni_V256 , ,54 ,GFNI_V256 instructionset ,X86 ,Gfni_V512 , ,55 ,GFNI_V512 ,gfni_v512 instructionset64bit,X86 ,X86Base -instructionset64bit,X86 ,SSE -instructionset64bit,X86 ,SSE2 instructionset64bit,X86 ,SSE3 instructionset64bit,X86 ,SSSE3 instructionset64bit,X86 ,SSE41 @@ -103,14 +101,9 @@ vectorinstructionset,X86 ,Vector128 vectorinstructionset,X86 ,Vector256 vectorinstructionset,X86 ,Vector512 -; x86-64-v1 - -implication ,X86 ,SSE ,X86Base -implication ,X86 ,SSE2 ,SSE - ; x86-64-v2 -implication ,X86 ,SSE3 ,SSE2 +implication ,X86 ,SSE3 ,X86Base implication ,X86 ,SSSE3 ,SSE3 implication ,X86 ,SSE41 ,SSSE3 implication ,X86 ,SSE42 ,SSE41 @@ -134,8 +127,8 @@ implication ,X86 ,AVX512VBMI ,AVX512 ; Unversioned -implication ,X86 ,AES ,SSE2 -implication ,X86 ,PCLMULQDQ ,SSE2 +implication ,X86 ,AES ,X86Base +implication ,X86 ,PCLMULQDQ ,X86Base implication ,X86 ,PCLMULQDQ_V256 ,PCLMULQDQ implication ,X86 ,PCLMULQDQ_V256 ,AVX implication ,X86 ,PCLMULQDQ_V512 ,PCLMULQDQ_V256 @@ -156,11 +149,11 @@ implication ,X86 ,AVX10v2 ,AVX10v1 ; as they depend on the other implications being correct first ; otherwise they may not be disabled if the required isa is disabled -implication ,X86 ,Vector128 ,SSE +implication ,X86 ,Vector128 ,X86Base implication ,X86 ,Vector256 ,AVX implication ,X86 ,Vector512 ,AVX512 -implication ,X86 ,VectorT128 ,SSE2 +implication ,X86 ,VectorT128 ,X86Base implication ,X86 ,VectorT256 ,AVX2 implication ,X86 ,VectorT512 ,AVX512 @@ -228,8 +221,8 @@ implication ,RiscV64 ,Zbb ,RiscV64Base implication ,RiscV64 ,Zba ,RiscV64Base ; ,name and aliases ,archs ,lower baselines included by implication -instructionsetgroup ,x86-x64 ,X64 X86 ,sse2 -instructionsetgroup ,x86-x64-v2 ,X64 X86 ,sse4.2 popcnt +instructionsetgroup ,x86-x64 ,X64 X86 ,base +instructionsetgroup ,x86-x64-v2 ,X64 X86 ,x86-x64 sse4.2 popcnt instructionsetgroup ,x86-x64-v3 ,X64 X86 ,x86-x64-v2 avx2 bmi bmi2 lzcnt movbe fma instructionsetgroup ,skylake ,X64 X86 ,x86-x64-v3 instructionsetgroup ,x86-x64-v4 ,X64 X86 ,x86-x64-v3 avx512 diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 17105809564522..ccdd5ce42d226e 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1256,21 +1256,17 @@ void EEJitManager::SetCpuInfo() // x86-64-v1 - if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic)) - { + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE2)) + { + // These ISAs are grouped together and if any are disabled then + // you lose access to all of them. We recommend modern code just + // use EnableHWIntrinsic, but we continue checking the older knobs + // for back-compat CPUCompileFlags.Set(InstructionSet_X86Base); } - if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE)) - { - CPUCompileFlags.Set(InstructionSet_SSE); - } - - if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE2)) - { - CPUCompileFlags.Set(InstructionSet_SSE2); - } - // x86-64-v2 if (((cpuFeatures & XArchIntrinsicConstants_Sse3) != 0) && @@ -1353,10 +1349,8 @@ void EEJitManager::SetCpuInfo() CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ_VL)) { - // These ISAs are grouped together and if any are disabled then - // you lose access to all of them. We recommend modern code just - // use EnableAVX512, but we continue checking the older knobs for - // back-compat + // These ISAs are likewise grouped together and should be checked + // via EnableAVX512 CPUCompileFlags.Set(InstructionSet_AVX512); } } @@ -1366,7 +1360,8 @@ void EEJitManager::SetCpuInfo() if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI_VL)) { - // These ISAs are likewise grouped together + // These ISAs are likewise grouped together and should be checked + // via EnableAVX512VBMI CPUCompileFlags.Set(InstructionSet_AVX512VBMI); } } diff --git a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs index fce083d7154f12..e956c3abbbfad1 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs +++ b/src/tests/JIT/HardwareIntrinsics/X86/X86Base/CpuId.cs @@ -67,31 +67,38 @@ public unsafe static void CpuId() int xarchCpuInfo = eax; - if (IsBitIncorrect(edx, 25, typeof(Sse), Sse.IsSupported, "SSE", ref isHierarchyDisabled)) + for (int i = 0; i < 2; i++) { - testResult = Fail; - } + // The runtime currently requires that all of SSE and SSE2 be supported together or none + // are supported. To handle this we simple check them all twice so that if any of them are disabled + // the first time around, we'll then assert that they are all actually disabled the second time around - if (IsBitIncorrect(edx, 26, typeof(Sse2), Sse2.IsSupported, "SSE2", ref isHierarchyDisabled)) - { - testResult = Fail; + if (IsBitIncorrect(edx, 25, typeof(Sse), Sse.IsSupported, "SSE", ref isHierarchyDisabled)) + { + testResult = Fail; + } + + if (IsBitIncorrect(edx, 26, typeof(Sse2), Sse2.IsSupported, "SSE2", ref isHierarchyDisabled)) + { + testResult = Fail; + } } - bool isSse2HierarchyDisabled = isHierarchyDisabled; + bool isBaselineHierarchyDisabled = isHierarchyDisabled; if (IsBitIncorrect(ecx, 25, typeof(Aes), Aes.IsSupported, "AES", ref isHierarchyDisabled)) { testResult = Fail; } - isHierarchyDisabled = isSse2HierarchyDisabled; + isHierarchyDisabled = isBaselineHierarchyDisabled; if (IsBitIncorrect(ecx, 1, typeof(Pclmulqdq), Pclmulqdq.IsSupported, "PCLMULQDQ", ref isHierarchyDisabled)) { testResult = Fail; } - isHierarchyDisabled = isSse2HierarchyDisabled | !GetDotnetEnable("SSE3_4"); + isHierarchyDisabled = isBaselineHierarchyDisabled | !GetDotnetEnable("SSE3_4"); if (IsBitIncorrect(ecx, 0, typeof(Sse3), Sse3.IsSupported, "SSE3", ref isHierarchyDisabled)) { @@ -173,9 +180,8 @@ public unsafe static void CpuId() for (int i = 0; i < 2; i++) { - // The runtime currently requires that all of F + BW + CD + DQ + VL be supported together or none - // are supported. To handle this we simple check them all twice so that if any of them are disabled - // the first time around, we'll then assert that they are all actually disabled the second time around + // AVX512F + BW + CD + DQ + VL are likewise provided together or not at all + // so we loop twice to ensure it all lines up as expected. if (IsBitIncorrect(ebx, 16, typeof(Avx512F), Avx512F.IsSupported, "AVX512F", ref isHierarchyDisabled)) { @@ -380,7 +386,7 @@ public unsafe static void CpuId() testResult = Fail; } - if (IsIncorrect(typeof(Vector128), Vector128.IsHardwareAccelerated, isSse2HierarchyDisabled)) + if (IsIncorrect(typeof(Vector128), Vector128.IsHardwareAccelerated, isBaselineHierarchyDisabled)) { testResult = Fail; } @@ -395,7 +401,7 @@ public unsafe static void CpuId() testResult = Fail; } - if (IsIncorrect(typeof(Vector), Vector.IsHardwareAccelerated, isSse2HierarchyDisabled)) + if (IsIncorrect(typeof(Vector), Vector.IsHardwareAccelerated, isBaselineHierarchyDisabled)) { testResult = Fail; } From 50b6be41bd1554ca261e2565a16016f77177fd2f Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 27 May 2025 16:41:05 -0700 Subject: [PATCH 3/7] Fix the containment check for NI_AVX512_Shuffle byte --- src/coreclr/jit/lowerxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 580fb83f5fbbdd..ea115c4cc20d63 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -9827,6 +9827,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } case NI_AVX2_Shuffle: + case NI_AVX512_Shuffle: { if (varTypeIsByte(simdBaseType)) { @@ -9848,7 +9849,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX512_Permute2x64: case NI_AVX512_Permute4x32: case NI_AVX512_Permute4x64: - case NI_AVX512_Shuffle: case NI_AVX512_ShuffleHigh: case NI_AVX512_ShuffleLow: case NI_AVX512_RotateLeft: From 549f8dab01982b8f32af1fa1f641b86e8b0f5229 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 27 May 2025 17:23:23 -0700 Subject: [PATCH 4/7] Remove unnecessary config knobs and instruction-set switches --- src/coreclr/inc/clrconfigvalues.h | 24 ------ src/coreclr/inc/corinfoinstructionset.h | 1 - src/coreclr/inc/readytoruninstructionset.h | 1 - src/coreclr/jit/compiler.cpp | 29 +++---- src/coreclr/jit/jitconfigvalues.h | 24 ------ .../Compiler/HardwareIntrinsicHelpers.cs | 29 +++---- .../Common/Compiler/InstructionSetSupport.cs | 14 +--- .../tools/Common/InstructionSetHelpers.cs | 5 +- .../Runtime/ReadyToRunInstructionSet.cs | 1 - .../Runtime/ReadyToRunInstructionSetHelper.cs | 4 +- .../JitInterface/CorInfoInstructionSet.cs | 56 ++++++------- .../ThunkGenerator/InstructionSetDesc.txt | 30 ++++--- src/coreclr/vm/codeman.cpp | 76 ++++-------------- src/tests/Common/testenvironment.proj | 79 ++++--------------- .../JitBlue/GitHub_65988/GitHub_65988.csproj | 4 +- .../JitBlue/Runtime_64764/Runtime_64764.cs | 1 - .../JittedMethodsCountingTest.cs | 18 +---- 17 files changed, 108 insertions(+), 288 deletions(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 54726ea8d4b53c..7ef1d07e35e210 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -697,14 +697,12 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableVPCLMULQDQ, W("EnableVPCLMUL RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableMOVBE, W("EnableMOVBE"), 1, "Allows MOVBE+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePOPCNT, W("EnablePOPCNT"), 1, "Allows POPCNT+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE3, W("EnableSSE3"), 1, "Allows SSE3+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE3_4, W("EnableSSE3_4"), 1, "Allows SSE3+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE41, W("EnableSSE41"), 1, "Allows SSE4.1+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE42, W("EnableSSE42"), 1, "Allows SSE4.2+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSSE3, W("EnableSSSE3"), 1, "Allows SSSE3+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableX86Serialize, W("EnableX86Serialize"), 1, "Allows X86Serialize+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAPX, W("EnableAPX"), 0, "Allows APX+ features to be disabled") #elif defined(TARGET_ARM64) -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64AdvSimd, W("EnableArm64AdvSimd"), 1, "Allows Arm64 AdvSimd+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Aes, W("EnableArm64Aes"), 1, "Allows Arm64 Aes+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Atomics, W("EnableArm64Atomics"), 1, "Allows Arm64 Atomics+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Crc32, W("EnableArm64Crc32"), 1, "Allows Arm64 Crc32+ hardware intrinsics to be disabled") @@ -722,28 +720,6 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableRiscV64Zba, W("EnableRiscV64 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableRiscV64Zbb, W("EnableRiscV64Zbb"), 1, "Allows RiscV64 Zbb hardware intrinsics to be disabled") #endif -// -// These are "legacy" ISA enablement knobs that aren't recommended for use anymore -// -#if defined(TARGET_AMD64) || defined(TARGET_X86) -// These have been superceded by EnableAVX512 as you get all of them or none of them -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BW, W("EnableAVX512BW"), 1, "Allows AVX512BW+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BW_VL, W("EnableAVX512BW_VL"), 1, "Allows AVX512BW_VL+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD, W("EnableAVX512CD"), 1, "Allows AVX512CD+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD_VL, W("EnableAVX512CD_VL"), 1, "Allows AVX512CD_VL+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ, W("EnableAVX512DQ"), 1, "Allows AVX512DQ+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ_VL, W("EnableAVX512DQ_VL"), 1, "Allows AVX512DQ_VL+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F, W("EnableAVX512F"), 1, "Allows AVX512F+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F_VL, W("EnableAVX512F_VL"), 1, "Allows AVX512F_VL+ hardware intrinsics to be disabled") - -// These have been superceded by EnableAVX512VBMI as you get all of them or none of them -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512VBMI_VL, W("EnableAVX512VBMI_VL"), 1, "Allows AVX512VBMI_VL+ hardware intrinsics to be disabled") - -// These have been superceded by EnableHWIntrinsic as they are part of the baseline -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE, W("EnableSSE"), 1, "Allows SSE+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE2, W("EnableSSE2"), 1, "Allows SSE2+ hardware intrinsics to be disabled") -#endif - /// /// Uncategorized /// diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 819cde31f7e7a3..90d0574efc1a8a 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -924,7 +924,6 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst { #ifdef TARGET_ARM64 case READYTORUN_INSTRUCTION_ArmBase: return InstructionSet_ArmBase; - case READYTORUN_INSTRUCTION_AdvSimd: return InstructionSet_AdvSimd; case READYTORUN_INSTRUCTION_Aes: return InstructionSet_Aes; case READYTORUN_INSTRUCTION_Crc32: return InstructionSet_Crc32; case READYTORUN_INSTRUCTION_Dp: return InstructionSet_Dp; diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index a2bbcccaca3721..26f0653b6d7096 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -23,7 +23,6 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Pclmulqdq=14, READYTORUN_INSTRUCTION_Popcnt=15, READYTORUN_INSTRUCTION_ArmBase=16, - READYTORUN_INSTRUCTION_AdvSimd=17, READYTORUN_INSTRUCTION_Crc32=18, READYTORUN_INSTRUCTION_Sha1=19, READYTORUN_INSTRUCTION_Sha256=20, diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 154a11ae964301..6ac4ca679fa611 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -6014,10 +6014,6 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, if (JitConfig.EnableHWIntrinsic() != 0) { instructionSetFlags.AddInstructionSet(InstructionSet_ArmBase); - } - - if (JitConfig.EnableArm64AdvSimd() != 0) - { instructionSetFlags.AddInstructionSet(InstructionSet_AdvSimd); } @@ -6088,16 +6084,12 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, } } - if ((JitConfig.EnableHWIntrinsic() != 0) && (JitConfig.EnableSSE() != 0) && (JitConfig.EnableSSE2() != 0)) + if (JitConfig.EnableHWIntrinsic() != 0) { - // These ISAs are grouped together and if any are disabled then - // you lose access to all of them. We recommend modern code just - // use EnableHWIntrinsic, but we continue checking the older knobs - // for back-compat instructionSetFlags.AddInstructionSet(InstructionSet_X86Base); } - if ((JitConfig.EnableSSE3() != 0) && (JitConfig.EnableSSE3_4() != 0)) + if (JitConfig.EnableSSE3() != 0) { instructionSetFlags.AddInstructionSet(InstructionSet_SSE3); } @@ -6180,21 +6172,13 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, instructionSetFlags.AddInstructionSet(InstructionSet_AVXVNNI); } - if ((JitConfig.EnableAVX512() != 0) && (JitConfig.EnableAVX512F() != 0) && - (JitConfig.EnableAVX512F_VL() != 0) && (JitConfig.EnableAVX512BW() != 0) && - (JitConfig.EnableAVX512BW_VL() != 0) && (JitConfig.EnableAVX512CD() != 0) && - (JitConfig.EnableAVX512CD_VL() != 0) && (JitConfig.EnableAVX512DQ() != 0) && - (JitConfig.EnableAVX512DQ_VL() != 0)) + if (JitConfig.EnableAVX512() != 0) { - // These ISAs are likewise grouped together and should be checked - // via EnableAVX512 instructionSetFlags.AddInstructionSet(InstructionSet_AVX512); } - if ((JitConfig.EnableAVX512VBMI() != 0) && (JitConfig.EnableAVX512VBMI_VL() != 0)) + if (JitConfig.EnableAVX512VBMI() != 0) { - // These ISAs are likewise grouped together and should be checked - // via EnableAVX512VBMI instructionSetFlags.AddInstructionSet(InstructionSet_AVX512VBMI); } @@ -6203,6 +6187,11 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, instructionSetFlags.AddInstructionSet(InstructionSet_AVX10v1); } + if (JitConfig.EnableAVX10v2() != 0) + { + instructionSetFlags.AddInstructionSet(InstructionSet_AVX10v2); + } + if (JitConfig.EnableAPX() != 0) { instructionSetFlags.AddInstructionSet(InstructionSet_APX); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 6ee28ccca22344..31d6f0c3013e85 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -413,13 +413,11 @@ RELEASE_CONFIG_INTEGER(EnablePCLMULQDQ, "EnablePCLMULQDQ", RELEASE_CONFIG_INTEGER(EnableVPCLMULQDQ, "EnableVPCLMULQDQ", 1) // Allows VPCLMULQDQ+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnablePOPCNT, "EnablePOPCNT", 1) // Allows POPCNT+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableSSE3, "EnableSSE3", 1) // Allows SSE3+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableSSE3_4, "EnableSSE3_4", 1) // Allows SSE3+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableSSE41, "EnableSSE41", 1) // Allows SSE4.1+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableSSE42, "EnableSSE42", 1) // Allows SSE4.2+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableSSSE3, "EnableSSSE3", 1) // Allows SSSE3+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAPX, "EnableAPX", 0) // Allows APX+ features to be disabled #elif defined(TARGET_ARM64) -RELEASE_CONFIG_INTEGER(EnableArm64AdvSimd, "EnableArm64AdvSimd", 1) // Allows Arm64 AdvSimd+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableArm64Aes, "EnableArm64Aes", 1) // Allows Arm64 Aes+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableArm64Atomics, "EnableArm64Atomics", 1) // Allows Arm64 Atomics+ hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableArm64Crc32, "EnableArm64Crc32", 1) // Allows Arm64 Crc32+ hardware intrinsics to be disabled @@ -440,28 +438,6 @@ RELEASE_CONFIG_INTEGER(EnableEmbeddedMasking, "EnableEmbeddedMasking", RELEASE_CONFIG_INTEGER(EnableApxNDD, "EnableApxNDD", 0) // Allows APX NDD feature to be disabled RELEASE_CONFIG_INTEGER(EnableApxConditionalChaining, "EnableApxConditionalChaining", 0) // Allows APX conditional compare chaining -// -// These are "legacy" ISA enablement knobs that aren't recommended for use anymore -// -#if defined(TARGET_AMD64) || defined(TARGET_X86) -// These have been superceded by EnableAVX512 as you get all of them or none of them -RELEASE_CONFIG_INTEGER(EnableAVX512BW, "EnableAVX512BW", 1) // Allows AVX512BW+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512BW_VL, "EnableAVX512BW_VL", 1) // Allows AVX512BW+ AVX512VL+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512CD, "EnableAVX512CD", 1) // Allows AVX512CD+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512CD_VL, "EnableAVX512CD_VL", 1) // Allows AVX512CD+ AVX512VL+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512DQ, "EnableAVX512DQ", 1) // Allows AVX512DQ+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512DQ_VL, "EnableAVX512DQ_VL", 1) // Allows AVX512DQ+ AVX512VL+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512F, "EnableAVX512F", 1) // Allows AVX512F+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableAVX512F_VL, "EnableAVX512F_VL", 1) // Allows AVX512F+ AVX512VL+ hardware intrinsics to be disabled - -// These have been superceded by EnableAVX512VBMI as you get all of them or none of them -RELEASE_CONFIG_INTEGER(EnableAVX512VBMI_VL, "EnableAVX512VBMI_VL", 1) // Allows AVX512VBMI_VL+ hardware intrinsics to be disabled - -// These have been superceded by EnableHWIntrinsic as they are part of the baseline -RELEASE_CONFIG_INTEGER(EnableSSE, "EnableSSE", 1) // Allows SSE+ hardware intrinsics to be disabled -RELEASE_CONFIG_INTEGER(EnableSSE2, "EnableSSE2", 1) // Allows SSE2+ hardware intrinsics to be disabled -#endif - // clang-format on #ifdef FEATURE_SIMD diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index 1b9a1dd5d4df35..665261f4030464 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -218,22 +218,19 @@ public static int FromInstructionSet(InstructionSet instructionSet) // Keep these enumerations in sync with cpufeatures.h in the minipal. private static class Arm64IntrinsicConstants { - public const int AdvSimd = 0x0001; - public const int Aes = 0x0002; - public const int Crc32 = 0x0004; - public const int Dp = 0x0008; - public const int Rdm = 0x0010; - public const int Sha1 = 0x0020; - public const int Sha256 = 0x0040; - public const int Atomics = 0x0080; - public const int Rcpc = 0x0100; - public const int Rcpc2 = 0x0200; - public const int Sve = 0x0400; + public const int Aes = 0x0001; + public const int Crc32 = 0x0002; + public const int Dp = 0x0004; + public const int Rdm = 0x0008; + public const int Sha1 = 0x0010; + public const int Sha256 = 0x0020; + public const int Atomics = 0x0040; + public const int Rcpc = 0x0800; + public const int Rcpc2 = 0x0100; + public const int Sve = 0x0200; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { - if ((flags & AdvSimd) != 0) - builder.AddSupportedInstructionSet("neon"); if ((flags & Aes) != 0) builder.AddSupportedInstructionSet("aes"); if ((flags & Crc32) != 0) @@ -264,8 +261,8 @@ public static int FromInstructionSet(InstructionSet instructionSet) // Baseline ISAs - they're always available InstructionSet.ARM64_ArmBase => 0, InstructionSet.ARM64_ArmBase_Arm64 => 0, - InstructionSet.ARM64_AdvSimd => AdvSimd, - InstructionSet.ARM64_AdvSimd_Arm64 => AdvSimd, + InstructionSet.ARM64_AdvSimd => 0, + InstructionSet.ARM64_AdvSimd_Arm64 => 0, // Optional ISAs - only available via opt-in or opportunistic light-up InstructionSet.ARM64_Aes => Aes, @@ -287,7 +284,7 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.ARM64_Sve_Arm64 => Sve, // Vector Sizes - InstructionSet.ARM64_VectorT128 => AdvSimd, + InstructionSet.ARM64_VectorT128 => 0, _ => throw new NotSupportedException(((InstructionSet_ARM64)instructionSet).ToString()) }; diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index 121826f6e12753..0c41654a3bdad5 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -196,7 +196,10 @@ private static Dictionary ComputeInstructSetSupportForAr { // Only instruction sets with associated R2R enum values are specifiable if (instructionSet.Specifiable) - support.Add(instructionSet.Name, instructionSet.InstructionSet); + { + _ = support.TryAdd(instructionSet.Name, instructionSet.InstructionSet); + Debug.Assert(support[instructionSet.Name] == instructionSet.InstructionSet); + } } return support; @@ -321,15 +324,6 @@ public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, if ((_architecture == TargetArchitecture.X86) || (_architecture == TargetArchitecture.ARM)) unsupportedInstructionSets.Set64BitInstructionSetVariantsUnconditionally(_architecture); - // While it's possible to enable individual AVX-512 ISA's, it is not - // optimal to do so, since they aren't totally functional this way, - // plus it is extremely rare to encounter hardware that doesn't support - // all of them. So, here we ensure that we are enabling all the ISA's - // if one is specified in the Crossgen2 or ILC command-lines. - // - // For more information, check this Github comment: - // https://github.com/dotnet/runtime/issues/106450#issuecomment-2299504035 - if (_supportedInstructionSets.Any(iSet => iSet.Contains("avx512"))) { // These ISAs should automatically extend to 512-bit if diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index e7994faeebff0b..f556d637949d35 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -24,7 +24,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru // Ready to run images are built with certain instruction set baselines if ((targetArchitecture == TargetArchitecture.X86) || (targetArchitecture == TargetArchitecture.X64)) { - instructionSetSupportBuilder.AddSupportedInstructionSet("sse2"); // Lower baselines included by implication + instructionSetSupportBuilder.AddSupportedInstructionSet("base"); } else if (targetArchitecture == TargetArchitecture.ARM64) { @@ -222,12 +222,9 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512)) { optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vbmi"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx512vbmi_vl"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v1"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v1_v512"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("vpclmul_v512"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v2"); - optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("avx10v2_v512"); optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("gfni_v512"); } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index 83550669bd5c38..bf8005472bab35 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -26,7 +26,6 @@ public enum ReadyToRunInstructionSet Pclmulqdq=14, Popcnt=15, ArmBase=16, - AdvSimd=17, Crc32=18, Sha1=19, Sha256=20, diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index 485aa9b7cbe627..c0283cb09eff10 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -25,8 +25,8 @@ public static class ReadyToRunInstructionSetHelper { case InstructionSet.ARM64_ArmBase: return ReadyToRunInstructionSet.ArmBase; case InstructionSet.ARM64_ArmBase_Arm64: return ReadyToRunInstructionSet.ArmBase; - case InstructionSet.ARM64_AdvSimd: return ReadyToRunInstructionSet.AdvSimd; - case InstructionSet.ARM64_AdvSimd_Arm64: return ReadyToRunInstructionSet.AdvSimd; + case InstructionSet.ARM64_AdvSimd: return ReadyToRunInstructionSet.ArmBase; + case InstructionSet.ARM64_AdvSimd_Arm64: return ReadyToRunInstructionSet.ArmBase; case InstructionSet.ARM64_Aes: return ReadyToRunInstructionSet.Aes; case InstructionSet.ARM64_Aes_Arm64: return ReadyToRunInstructionSet.Aes; case InstructionSet.ARM64_Crc32: return ReadyToRunInstructionSet.Crc32; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index c9e6295f763082..7989e4d0cf2008 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -1138,8 +1138,8 @@ public static IEnumerable ArchitectureToValidInstructionSets case TargetArchitecture.X64: yield return new InstructionSetInfo("base", "X86Base", InstructionSet.X64_X86Base, true); - yield return new InstructionSetInfo("sse", "Sse", InstructionSet.X64_X86Base, true); - yield return new InstructionSetInfo("sse2", "Sse2", InstructionSet.X64_X86Base, true); + yield return new InstructionSetInfo("base", "Sse", InstructionSet.X64_X86Base, true); + yield return new InstructionSetInfo("base", "Sse2", InstructionSet.X64_X86Base, true); yield return new InstructionSetInfo("sse3", "Sse3", InstructionSet.X64_SSE3, true); yield return new InstructionSetInfo("ssse3", "Ssse3", InstructionSet.X64_SSSE3, true); yield return new InstructionSetInfo("sse4.1", "Sse41", InstructionSet.X64_SSE41, true); @@ -1161,26 +1161,24 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X64_AVXVNNI, true); yield return new InstructionSetInfo("movbe", "", InstructionSet.X64_MOVBE, true); yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X64_X86Serialize, true); - yield return new InstructionSetInfo("avx512", "", InstructionSet.X64_AVX512, true); - yield return new InstructionSetInfo("evex", "", InstructionSet.X64_AVX512, true); - yield return new InstructionSetInfo("avx512f", "Avx512F", InstructionSet.X64_AVX512, true); - yield return new InstructionSetInfo("avx512f_vl", "Avx512F_VL", InstructionSet.X64_AVX512, true); - yield return new InstructionSetInfo("avx512bw", "Avx512BW", InstructionSet.X64_AVX512, true); - yield return new InstructionSetInfo("avx512bw_vl", "Avx512BW_VL", InstructionSet.X64_AVX512, true); - yield return new InstructionSetInfo("avx512cd", "Avx512CD", InstructionSet.X64_AVX512, true); - yield return new InstructionSetInfo("avx512cd_vl", "Avx512CD_VL", InstructionSet.X64_AVX512, true); - yield return new InstructionSetInfo("avx512dq", "Avx512DQ", InstructionSet.X64_AVX512, true); - yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512F", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512F_VL", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512BW", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512BW_VL", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512CD", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512CD_VL", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512DQ", InstructionSet.X64_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512DQ_VL", InstructionSet.X64_AVX512, true); yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X64_AVX512VBMI, true); - yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X64_AVX512VBMI, true); + yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi_VL", InstructionSet.X64_AVX512VBMI, true); yield return new InstructionSetInfo("avx10v1", "Avx10v1", InstructionSet.X64_AVX10v1, true); - yield return new InstructionSetInfo("avx10v1_v512", "Avx10v1_V512", InstructionSet.X64_AVX10v1, true); + yield return new InstructionSetInfo("avx10v1", "Avx10v1_V512", InstructionSet.X64_AVX10v1, true); yield return new InstructionSetInfo("vectort128", "", InstructionSet.X64_VectorT128, true); yield return new InstructionSetInfo("vectort256", "", InstructionSet.X64_VectorT256, true); yield return new InstructionSetInfo("vectort512", "", InstructionSet.X64_VectorT512, true); yield return new InstructionSetInfo("apx", "", InstructionSet.X64_APX, true); yield return new InstructionSetInfo("avx10v2", "Avx10v2", InstructionSet.X64_AVX10v2, true); - yield return new InstructionSetInfo("avx10v2_v512", "Avx10v2_V512", InstructionSet.X64_AVX10v2, true); + yield return new InstructionSetInfo("avx10v2", "Avx10v2_V512", InstructionSet.X64_AVX10v2, true); yield return new InstructionSetInfo("gfni", "Gfni", InstructionSet.X64_GFNI, true); yield return new InstructionSetInfo("gfni_v256", "Gfni_V256", InstructionSet.X64_GFNI_V256, true); yield return new InstructionSetInfo("gfni_v512", "Gfni_V512", InstructionSet.X64_GFNI_V512, true); @@ -1188,8 +1186,8 @@ public static IEnumerable ArchitectureToValidInstructionSets case TargetArchitecture.X86: yield return new InstructionSetInfo("base", "X86Base", InstructionSet.X86_X86Base, true); - yield return new InstructionSetInfo("sse", "Sse", InstructionSet.X86_X86Base, true); - yield return new InstructionSetInfo("sse2", "Sse2", InstructionSet.X86_X86Base, true); + yield return new InstructionSetInfo("base", "Sse", InstructionSet.X86_X86Base, true); + yield return new InstructionSetInfo("base", "Sse2", InstructionSet.X86_X86Base, true); yield return new InstructionSetInfo("sse3", "Sse3", InstructionSet.X86_SSE3, true); yield return new InstructionSetInfo("ssse3", "Ssse3", InstructionSet.X86_SSSE3, true); yield return new InstructionSetInfo("sse4.1", "Sse41", InstructionSet.X86_SSE41, true); @@ -1211,26 +1209,24 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X86_AVXVNNI, true); yield return new InstructionSetInfo("movbe", "", InstructionSet.X86_MOVBE, true); yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X86_X86Serialize, true); - yield return new InstructionSetInfo("avx512", "", InstructionSet.X86_AVX512, true); - yield return new InstructionSetInfo("evex", "", InstructionSet.X86_AVX512, true); - yield return new InstructionSetInfo("avx512f", "Avx512F", InstructionSet.X86_AVX512, true); - yield return new InstructionSetInfo("avx512f_vl", "Avx512F_VL", InstructionSet.X86_AVX512, true); - yield return new InstructionSetInfo("avx512bw", "Avx512BW", InstructionSet.X86_AVX512, true); - yield return new InstructionSetInfo("avx512bw_vl", "Avx512BW_VL", InstructionSet.X86_AVX512, true); - yield return new InstructionSetInfo("avx512cd", "Avx512CD", InstructionSet.X86_AVX512, true); - yield return new InstructionSetInfo("avx512cd_vl", "Avx512CD_VL", InstructionSet.X86_AVX512, true); - yield return new InstructionSetInfo("avx512dq", "Avx512DQ", InstructionSet.X86_AVX512, true); - yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512F", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512F_VL", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512BW", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512BW_VL", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512CD", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512CD_VL", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512DQ", InstructionSet.X86_AVX512, true); + yield return new InstructionSetInfo("avx512", "Avx512DQ_VL", InstructionSet.X86_AVX512, true); yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X86_AVX512VBMI, true); - yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X86_AVX512VBMI, true); + yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi_VL", InstructionSet.X86_AVX512VBMI, true); yield return new InstructionSetInfo("avx10v1", "Avx10v1", InstructionSet.X86_AVX10v1, true); - yield return new InstructionSetInfo("avx10v1_v512", "Avx10v1_V512", InstructionSet.X86_AVX10v1, true); + yield return new InstructionSetInfo("avx10v1", "Avx10v1_V512", InstructionSet.X86_AVX10v1, true); yield return new InstructionSetInfo("vectort128", "", InstructionSet.X86_VectorT128, true); yield return new InstructionSetInfo("vectort256", "", InstructionSet.X86_VectorT256, true); yield return new InstructionSetInfo("vectort512", "", InstructionSet.X86_VectorT512, true); yield return new InstructionSetInfo("apx", "", InstructionSet.X86_APX, true); yield return new InstructionSetInfo("avx10v2", "Avx10v2", InstructionSet.X86_AVX10v2, true); - yield return new InstructionSetInfo("avx10v2_v512", "Avx10v2_V512", InstructionSet.X86_AVX10v2, true); + yield return new InstructionSetInfo("avx10v2", "Avx10v2_V512", InstructionSet.X86_AVX10v2, true); yield return new InstructionSetInfo("gfni", "Gfni", InstructionSet.X86_GFNI, true); yield return new InstructionSetInfo("gfni_v256", "Gfni_V256", InstructionSet.X86_GFNI_V256, true); yield return new InstructionSetInfo("gfni_v512", "Gfni_V512", InstructionSet.X86_GFNI_V512, true); diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index fbd27e6267169c..b70b323ae570b6 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -28,8 +28,8 @@ definearch ,X86 ,32Bit ,X64, X64 instructionset ,X86 ,X86Base , ,22 ,X86Base ,base -instructionset ,X86 ,Sse ,X86Base ,22 ,X86Base ,sse -instructionset ,X86 ,Sse2 ,X86Base ,22 ,X86Base ,sse2 +instructionset ,X86 ,Sse ,X86Base ,22 ,X86Base ,base +instructionset ,X86 ,Sse2 ,X86Base ,22 ,X86Base ,base instructionset ,X86 ,Sse3 , ,3 ,SSE3 ,sse3 instructionset ,X86 ,Ssse3 , ,4 ,SSSE3 ,ssse3 instructionset ,X86 ,Sse41 , ,5 ,SSE41 ,sse4.1 @@ -51,26 +51,24 @@ instructionset ,X86 , , , ,Vector512 instructionset ,X86 ,AvxVnni , ,25 ,AVXVNNI ,avxvnni instructionset ,X86 , ,Movbe ,27 ,MOVBE ,movbe instructionset ,X86 ,X86Serialize , ,28 ,X86Serialize ,serialize -instructionset ,X86 , ,Avx512 ,29 ,AVX512 ,avx512 -instructionset ,X86 , ,Avx512 ,29 ,AVX512 ,evex -instructionset ,X86 ,Avx512F ,Avx512 ,29 ,AVX512 ,avx512f -instructionset ,X86 ,Avx512F_VL ,Avx512 ,29 ,AVX512 ,avx512f_vl -instructionset ,X86 ,Avx512BW ,Avx512 ,29 ,AVX512 ,avx512bw -instructionset ,X86 ,Avx512BW_VL ,Avx512 ,29 ,AVX512 ,avx512bw_vl -instructionset ,X86 ,Avx512CD ,Avx512 ,29 ,AVX512 ,avx512cd -instructionset ,X86 ,Avx512CD_VL ,Avx512 ,29 ,AVX512 ,avx512cd_vl -instructionset ,X86 ,Avx512DQ ,Avx512 ,29 ,AVX512 ,avx512dq -instructionset ,X86 ,Avx512DQ_VL ,Avx512 ,29 ,AVX512 ,avx512dq_vl +instructionset ,X86 ,Avx512F ,Avx512 ,29 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512F_VL ,Avx512 ,29 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512BW ,Avx512 ,29 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512BW_VL ,Avx512 ,29 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512CD ,Avx512 ,29 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512CD_VL ,Avx512 ,29 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512DQ ,Avx512 ,29 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512DQ_VL ,Avx512 ,29 ,AVX512 ,avx512 instructionset ,X86 ,Avx512Vbmi , ,37 ,AVX512VBMI ,avx512vbmi -instructionset ,X86 ,Avx512Vbmi_VL ,Avx512Vbmi ,37 ,AVX512VBMI ,avx512vbmi_vl +instructionset ,X86 ,Avx512Vbmi_VL ,Avx512Vbmi ,37 ,AVX512VBMI ,avx512vbmi instructionset ,X86 ,Avx10v1 , ,44 ,AVX10v1 ,avx10v1 -instructionset ,X86 ,Avx10v1_V512 ,Avx10v1 ,44 ,AVX10v1 ,avx10v1_v512 +instructionset ,X86 ,Avx10v1_V512 ,Avx10v1 ,44 ,AVX10v1 ,avx10v1 instructionset ,X86 , ,VectorT128 ,39 ,VectorT128 ,vectort128 instructionset ,X86 , ,VectorT256 ,40 ,VectorT256 ,vectort256 instructionset ,X86 , ,VectorT512 ,41 ,VectorT512 ,vectort512 instructionset ,X86 , ,Apx ,48 ,APX ,apx instructionset ,X86 ,Avx10v2 , ,51 ,AVX10v2 ,avx10v2 -instructionset ,X86 ,Avx10v2_V512 ,Avx10v2 ,51 ,AVX10v2 ,avx10v2_v512 +instructionset ,X86 ,Avx10v2_V512 ,Avx10v2 ,51 ,AVX10v2 ,avx10v2 instructionset ,X86 ,Gfni , ,53 ,GFNI ,gfni instructionset ,X86 ,Gfni_V256 , ,54 ,GFNI_V256 ,gfni_v256 instructionset ,X86 ,Gfni_V512 , ,55 ,GFNI_V512 ,gfni_v512 @@ -166,7 +164,7 @@ copyinstructionsets,X86 ,X64 definearch ,ARM64 ,64Bit ,Arm64, Arm64 instructionset ,ARM64 ,ArmBase , ,16 ,ArmBase ,base -instructionset ,ARM64 ,AdvSimd , ,17 ,AdvSimd ,neon +instructionset ,ARM64 ,AdvSimd ,ArmBase ,16 ,AdvSimd ,neon instructionset ,ARM64 ,Aes , ,9 ,Aes ,aes instructionset ,ARM64 ,Crc32 , ,18 ,Crc32 ,crc instructionset ,ARM64 ,Dp , ,23 ,Dp ,dotprod diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index ccdd5ce42d226e..5ed0617846d8b1 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1256,25 +1256,15 @@ void EEJitManager::SetCpuInfo() // x86-64-v1 - if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE2)) - { - // These ISAs are grouped together and if any are disabled then - // you lose access to all of them. We recommend modern code just - // use EnableHWIntrinsic, but we continue checking the older knobs - // for back-compat + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic)) + { CPUCompileFlags.Set(InstructionSet_X86Base); } // x86-64-v2 - if (((cpuFeatures & XArchIntrinsicConstants_Sse3) != 0) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3_4)) + if (((cpuFeatures & XArchIntrinsicConstants_Sse3) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3)) { - // We need to additionally check that EXTERNAL_EnableSSE3_4 is set, as that - // is a prexisting config flag that controls the SSE3+ ISAs CPUCompileFlags.Set(InstructionSet_SSE3); } @@ -1337,33 +1327,14 @@ void EEJitManager::SetCpuInfo() // x86-64-v4 - if ((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) - { - if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F_VL) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW_VL) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD_VL) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ_VL)) - { - // These ISAs are likewise grouped together and should be checked - // via EnableAVX512 - CPUCompileFlags.Set(InstructionSet_AVX512); - } + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512)) + { + CPUCompileFlags.Set(InstructionSet_AVX512); } - if ((cpuFeatures & XArchIntrinsicConstants_Avx512Vbmi) != 0) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512Vbmi) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI)) { - if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI_VL)) - { - // These ISAs are likewise grouped together and should be checked - // via EnableAVX512VBMI - CPUCompileFlags.Set(InstructionSet_AVX512VBMI); - } + CPUCompileFlags.Set(InstructionSet_AVX512VBMI); } // Unversioned @@ -1401,31 +1372,22 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_GFNI_V512); } - if ((cpuFeatures & XArchIntrinsicConstants_Avx10v1) != 0) + if (((cpuFeatures & XArchIntrinsicConstants_Avx10v1) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v1)) { - if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v1)) - { - CPUCompileFlags.Set(InstructionSet_AVX10v1); - } + CPUCompileFlags.Set(InstructionSet_AVX10v1); } - if ((cpuFeatures & XArchIntrinsicConstants_Avx10v2) != 0) + if (((cpuFeatures & XArchIntrinsicConstants_Avx10v2) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v2)) { - if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v2)) - { - CPUCompileFlags.Set(InstructionSet_AVX10v2); - } + CPUCompileFlags.Set(InstructionSet_AVX10v2); } - #if defined(TARGET_AMD64) - if ((cpuFeatures & XArchIntrinsicConstants_Apx) != 0) +#if defined(TARGET_AMD64) + if (((cpuFeatures & XArchIntrinsicConstants_Apx) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAPX)) { - if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAPX)) - { - CPUCompileFlags.Set(InstructionSet_APX); - } + CPUCompileFlags.Set(InstructionSet_APX); } - #endif // TARGET_AMD64 +#endif // TARGET_AMD64 #elif defined(TARGET_ARM64) #if !defined(TARGET_WINDOWS) @@ -1443,10 +1405,6 @@ void EEJitManager::SetCpuInfo() if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic)) { CPUCompileFlags.Set(InstructionSet_ArmBase); - } - - if (((cpuFeatures & ARM64IntrinsicConstants_AdvSimd) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64AdvSimd)) - { CPUCompileFlags.Set(InstructionSet_AdvSimd); } @@ -3737,7 +3695,7 @@ BOOL InterpreterJitManager::LoadInterpreter() m_interpreter = NULL; // If both JIT and interpret are available, statically link the JIT. Interpreter can be loaded dynamically -// via config switch for testing purposes. +// via config switch for testing purposes. #if defined(FEATURE_STATICALLY_LINKED) && !defined(FEATURE_JIT) newInterpreter = InitializeStaticJIT(); #else // FEATURE_STATICALLY_LINKED && !FEATURE_JIT diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj index 209ca36fe2bdcf..fbd55abbe6d063 100644 --- a/src/tests/Common/testenvironment.proj +++ b/src/tests/Common/testenvironment.proj @@ -18,25 +18,12 @@ DOTNET_EnableCrashReport; DOTNET_DbgEnableElfDumpOnMacOS; DOTNET_DbgMiniDumpName; - DOTNET_EnableAES; DOTNET_EnableAVX; DOTNET_EnableAVX2; - DOTNET_EnableAVX512F; - DOTNET_EnableBMI1; - DOTNET_EnableBMI2; - DOTNET_EnableFMA; + DOTNET_EnableAVX512; DOTNET_EnableHWIntrinsic; DOTNET_EnableIncompleteISAClass; - DOTNET_EnableLZCNT; - DOTNET_EnablePCLMULQDQ; - DOTNET_EnablePOPCNT; - DOTNET_EnableSSE; - DOTNET_EnableSSE2; DOTNET_EnableSSE3; - DOTNET_EnableSSE3_4; - DOTNET_EnableSSE41; - DOTNET_EnableSSE42; - DOTNET_EnableSSSE3; DOTNET_EnableAPX; DOTNET_JitStressEvexEncoding; DOTNET_PreferredVectorBitWidth; @@ -118,63 +105,29 @@ - - - - - - - - - - - - - - - - + + - - + + - - - - - - - - - - - - - - - - + + + + + + - - - - - - - - - - - - - - - + + + + + diff --git a/src/tests/JIT/Regression/JitBlue/GitHub_65988/GitHub_65988.csproj b/src/tests/JIT/Regression/JitBlue/GitHub_65988/GitHub_65988.csproj index 08a672be37d856..b902a49be2a3c7 100644 --- a/src/tests/JIT/Regression/JitBlue/GitHub_65988/GitHub_65988.csproj +++ b/src/tests/JIT/Regression/JitBlue/GitHub_65988/GitHub_65988.csproj @@ -11,10 +11,10 @@ - + - \ No newline at end of file + diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_64764/Runtime_64764.cs b/src/tests/JIT/Regression/JitBlue/Runtime_64764/Runtime_64764.cs index f4b976980b351c..43af6404c42c81 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_64764/Runtime_64764.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_64764/Runtime_64764.cs @@ -136,7 +136,6 @@ public static void TestEntryPoint() /* set DOTNET_TieredCompilation=0 set DOTNET_JitDoCopyProp=1 -set DOTNET_EnableSSE41=1 set DOTNET_JitStress=2 set DOTNET_GCStress=0xC set DOTNET_AltJitName=clrjit_win_x86_x64.dll diff --git a/src/tests/readytorun/JittedMethodsCountingTest/JittedMethodsCountingTest.cs b/src/tests/readytorun/JittedMethodsCountingTest/JittedMethodsCountingTest.cs index f4ac4837c00a6c..87c11e7e31a40f 100644 --- a/src/tests/readytorun/JittedMethodsCountingTest/JittedMethodsCountingTest.cs +++ b/src/tests/readytorun/JittedMethodsCountingTest/JittedMethodsCountingTest.cs @@ -14,13 +14,12 @@ public class JittedMethodsCountingTest [Fact] public static int TestEntryPoint() { - // If either of DOTNET_ReadyToRun, DOTNET_EnableHWIntrinsics, or - // DOTNET_EnableSSE(2) are disabled (i.e. set to "0"), then this test - // ought to be skipped. - if (!IsReadyToRunEnabled() || !IsHardwareIntrinsicsEnabled() || !IsSSEEnabled()) + // If either of DOTNET_ReadyToRun or DOTNET_EnableHWIntrinsics + // are disabled (i.e. set to "0"), then this test ought to be skipped. + if (!IsReadyToRunEnabled() || !IsHardwareIntrinsicsEnabled()) { Console.WriteLine("\nThis test is only supported in ReadyToRun scenarios" - + " with Hardware Intrinsics and SSE(2) enabled." + + " with Hardware Intrinsics enabled." + " Skipping...\n"); return 100; } @@ -50,13 +49,4 @@ private static bool IsHardwareIntrinsicsEnabled() return (string.IsNullOrEmpty(dotnetEnableHWIntrinsics) || dotnetEnableHWIntrinsics != "0"); } - - private static bool IsSSEEnabled() - { - string? dotnetSSE = Environment.GetEnvironmentVariable("DOTNET_EnableSSE"); - string? dotnetSSE2 = Environment.GetEnvironmentVariable("DOTNET_EnableSSE2"); - - return ((string.IsNullOrEmpty(dotnetSSE) || dotnetSSE != "0") - && (string.IsNullOrEmpty(dotnetSSE2) || dotnetSSE2 != "0")); - } } From 1128c31479d128d81e7bbb5c62e6d9c11858cd54 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 27 May 2025 19:51:07 -0700 Subject: [PATCH 5/7] Ensure that AdvSimd is still tracked to ensure Linux works --- .../Compiler/HardwareIntrinsicHelpers.cs | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index 665261f4030464..b8239a06e257ce 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -218,19 +218,27 @@ public static int FromInstructionSet(InstructionSet instructionSet) // Keep these enumerations in sync with cpufeatures.h in the minipal. private static class Arm64IntrinsicConstants { - public const int Aes = 0x0001; - public const int Crc32 = 0x0002; - public const int Dp = 0x0004; - public const int Rdm = 0x0008; - public const int Sha1 = 0x0010; - public const int Sha256 = 0x0020; - public const int Atomics = 0x0040; - public const int Rcpc = 0x0800; - public const int Rcpc2 = 0x0100; - public const int Sve = 0x0200; + public const int AdvSimd = 0x0001; + public const int Aes = 0x0002; + public const int Crc32 = 0x0004; + public const int Dp = 0x0008; + public const int Rdm = 0x0010; + public const int Sha1 = 0x0020; + public const int Sha256 = 0x0040; + public const int Atomics = 0x0080; + public const int Rcpc = 0x0100; + public const int Rcpc2 = 0x0200; + public const int Sve = 0x0400; + public const int Sve2 = 0x0800; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { + if ((flags & AdvSimd) != 0) + { + // We need to explicitly track AdvSimd as some Linux machines can + // still be encountered without it and we need to fail to launch + builder.AddSupportedInstructionSet("neon"); + } if ((flags & Aes) != 0) builder.AddSupportedInstructionSet("aes"); if ((flags & Crc32) != 0) @@ -251,6 +259,8 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) builder.AddSupportedInstructionSet("rcpc2"); if ((flags & Sve) != 0) builder.AddSupportedInstructionSet("sve"); + if ((flags & Sve2) != 0) + builder.AddSupportedInstructionSet("sve2"); } public static int FromInstructionSet(InstructionSet instructionSet) @@ -261,8 +271,8 @@ public static int FromInstructionSet(InstructionSet instructionSet) // Baseline ISAs - they're always available InstructionSet.ARM64_ArmBase => 0, InstructionSet.ARM64_ArmBase_Arm64 => 0, - InstructionSet.ARM64_AdvSimd => 0, - InstructionSet.ARM64_AdvSimd_Arm64 => 0, + InstructionSet.ARM64_AdvSimd => AdvSimd, + InstructionSet.ARM64_AdvSimd_Arm64 => AdvSimd, // Optional ISAs - only available via opt-in or opportunistic light-up InstructionSet.ARM64_Aes => Aes, @@ -282,9 +292,11 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.ARM64_Rcpc2 => Rcpc2, InstructionSet.ARM64_Sve => Sve, InstructionSet.ARM64_Sve_Arm64 => Sve, + InstructionSet.ARM64_Sve2 => Sve2, + InstructionSet.ARM64_Sve2_Arm64 => Sve2, // Vector Sizes - InstructionSet.ARM64_VectorT128 => 0, + InstructionSet.ARM64_VectorT128 => AdvSimd, _ => throw new NotSupportedException(((InstructionSet_ARM64)instructionSet).ToString()) }; From 50c85ee9baba8ce7ae934b6247902271b4f207df Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 28 May 2025 09:40:43 -0700 Subject: [PATCH 6/7] Add back the R2R values for back-compat --- src/coreclr/inc/corinfoinstructionset.h | 31 ++++++++++++++-- src/coreclr/inc/readytoruninstructionset.h | 16 ++++++++- .../Compiler/HardwareIntrinsicHelpers.cs | 35 ++++++++----------- .../tools/Common/InstructionSetHelpers.cs | 2 +- .../Runtime/ReadyToRunInstructionSet.cs | 16 ++++++++- .../Runtime/ReadyToRunInstructionSetHelper.cs | 10 +++--- .../JitInterface/CorInfoInstructionSet.cs | 2 ++ .../ThunkGenerator/InstructionSetDesc.txt | 29 +++++++-------- src/coreclr/vm/codeman.cpp | 11 ------ src/native/minipal/cpufeatures.c | 13 ++----- src/native/minipal/cpufeatures.h | 23 ++++++------ 11 files changed, 109 insertions(+), 79 deletions(-) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index 90d0574efc1a8a..ba3f47eae1d81b 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -924,6 +924,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst { #ifdef TARGET_ARM64 case READYTORUN_INSTRUCTION_ArmBase: return InstructionSet_ArmBase; + case READYTORUN_INSTRUCTION_AdvSimd: return InstructionSet_AdvSimd; case READYTORUN_INSTRUCTION_Aes: return InstructionSet_Aes; case READYTORUN_INSTRUCTION_Crc32: return InstructionSet_Crc32; case READYTORUN_INSTRUCTION_Dp: return InstructionSet_Dp; @@ -944,6 +945,8 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst #endif // TARGET_RISCV64 #ifdef TARGET_AMD64 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; + case READYTORUN_INSTRUCTION_Sse: return InstructionSet_X86Base; + case READYTORUN_INSTRUCTION_Sse2: return InstructionSet_X86Base; case READYTORUN_INSTRUCTION_Sse3: return InstructionSet_SSE3; case READYTORUN_INSTRUCTION_Ssse3: return InstructionSet_SSSE3; case READYTORUN_INSTRUCTION_Sse41: return InstructionSet_SSE41; @@ -962,20 +965,33 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; case READYTORUN_INSTRUCTION_X86Serialize: return InstructionSet_X86Serialize; - case READYTORUN_INSTRUCTION_Avx512: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Evex: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512F: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512F_VL: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512BW: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512BW_VL: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512CD: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512CD_VL: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512DQ: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512; case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI; + case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI; case READYTORUN_INSTRUCTION_Avx10v1: return InstructionSet_AVX10v1; + case READYTORUN_INSTRUCTION_Avx10v1_V512: return InstructionSet_AVX10v1; case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; case READYTORUN_INSTRUCTION_Avx10v2: return InstructionSet_AVX10v2; + case READYTORUN_INSTRUCTION_Avx10v2_V512: return InstructionSet_AVX10v2; case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI; case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256; case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; + case READYTORUN_INSTRUCTION_Sse: return InstructionSet_X86Base; + case READYTORUN_INSTRUCTION_Sse2: return InstructionSet_X86Base; case READYTORUN_INSTRUCTION_Sse3: return InstructionSet_SSE3; case READYTORUN_INSTRUCTION_Ssse3: return InstructionSet_SSSE3; case READYTORUN_INSTRUCTION_Sse41: return InstructionSet_SSE41; @@ -994,14 +1010,25 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI; case READYTORUN_INSTRUCTION_Movbe: return InstructionSet_MOVBE; case READYTORUN_INSTRUCTION_X86Serialize: return InstructionSet_X86Serialize; - case READYTORUN_INSTRUCTION_Avx512: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Evex: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512F: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512F_VL: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512BW: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512BW_VL: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512CD: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512CD_VL: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512DQ: return InstructionSet_AVX512; + case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512; case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI; + case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI; case READYTORUN_INSTRUCTION_Avx10v1: return InstructionSet_AVX10v1; + case READYTORUN_INSTRUCTION_Avx10v1_V512: return InstructionSet_AVX10v1; case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; case READYTORUN_INSTRUCTION_Apx: return InstructionSet_APX; case READYTORUN_INSTRUCTION_Avx10v2: return InstructionSet_AVX10v2; + case READYTORUN_INSTRUCTION_Avx10v2_V512: return InstructionSet_AVX10v2; case READYTORUN_INSTRUCTION_Gfni: return InstructionSet_GFNI; case READYTORUN_INSTRUCTION_Gfni_V256: return InstructionSet_GFNI_V256; case READYTORUN_INSTRUCTION_Gfni_V512: return InstructionSet_GFNI_V512; diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index 26f0653b6d7096..90738664b7b042 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -9,6 +9,8 @@ #define READYTORUNINSTRUCTIONSET_H enum ReadyToRunInstructionSet { + READYTORUN_INSTRUCTION_Sse=1, + READYTORUN_INSTRUCTION_Sse2=2, READYTORUN_INSTRUCTION_Sse3=3, READYTORUN_INSTRUCTION_Ssse3=4, READYTORUN_INSTRUCTION_Sse41=5, @@ -23,6 +25,7 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Pclmulqdq=14, READYTORUN_INSTRUCTION_Popcnt=15, READYTORUN_INSTRUCTION_ArmBase=16, + READYTORUN_INSTRUCTION_AdvSimd=17, READYTORUN_INSTRUCTION_Crc32=18, READYTORUN_INSTRUCTION_Sha1=19, READYTORUN_INSTRUCTION_Sha256=20, @@ -34,18 +37,29 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Rcpc=26, READYTORUN_INSTRUCTION_Movbe=27, READYTORUN_INSTRUCTION_X86Serialize=28, - READYTORUN_INSTRUCTION_Avx512=29, + READYTORUN_INSTRUCTION_Avx512F=29, + READYTORUN_INSTRUCTION_Avx512F_VL=30, + READYTORUN_INSTRUCTION_Avx512BW=31, + READYTORUN_INSTRUCTION_Avx512BW_VL=32, + READYTORUN_INSTRUCTION_Avx512CD=33, + READYTORUN_INSTRUCTION_Avx512CD_VL=34, + READYTORUN_INSTRUCTION_Avx512DQ=35, + READYTORUN_INSTRUCTION_Avx512DQ_VL=36, READYTORUN_INSTRUCTION_Avx512Vbmi=37, + READYTORUN_INSTRUCTION_Avx512Vbmi_VL=38, READYTORUN_INSTRUCTION_VectorT128=39, READYTORUN_INSTRUCTION_VectorT256=40, READYTORUN_INSTRUCTION_VectorT512=41, READYTORUN_INSTRUCTION_Rcpc2=42, READYTORUN_INSTRUCTION_Sve=43, READYTORUN_INSTRUCTION_Avx10v1=44, + READYTORUN_INSTRUCTION_Avx10v1_V512=46, + READYTORUN_INSTRUCTION_Evex=47, READYTORUN_INSTRUCTION_Apx=48, READYTORUN_INSTRUCTION_Pclmulqdq_V256=49, READYTORUN_INSTRUCTION_Pclmulqdq_V512=50, READYTORUN_INSTRUCTION_Avx10v2=51, + READYTORUN_INSTRUCTION_Avx10v2_V512=52, READYTORUN_INSTRUCTION_Gfni=53, READYTORUN_INSTRUCTION_Gfni_V256=54, READYTORUN_INSTRUCTION_Gfni_V512=55, diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index b8239a06e257ce..023295f97f4b93 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -218,27 +218,20 @@ public static int FromInstructionSet(InstructionSet instructionSet) // Keep these enumerations in sync with cpufeatures.h in the minipal. private static class Arm64IntrinsicConstants { - public const int AdvSimd = 0x0001; - public const int Aes = 0x0002; - public const int Crc32 = 0x0004; - public const int Dp = 0x0008; - public const int Rdm = 0x0010; - public const int Sha1 = 0x0020; - public const int Sha256 = 0x0040; - public const int Atomics = 0x0080; - public const int Rcpc = 0x0100; - public const int Rcpc2 = 0x0200; - public const int Sve = 0x0400; - public const int Sve2 = 0x0800; + public const int Aes = 0x0001; + public const int Crc32 = 0x0002; + public const int Dp = 0x0004; + public const int Rdm = 0x0008; + public const int Sha1 = 0x0010; + public const int Sha256 = 0x0020; + public const int Atomics = 0x0040; + public const int Rcpc = 0x0080; + public const int Rcpc2 = 0x0100; + public const int Sve = 0x0200; + public const int Sve2 = 0x0400; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { - if ((flags & AdvSimd) != 0) - { - // We need to explicitly track AdvSimd as some Linux machines can - // still be encountered without it and we need to fail to launch - builder.AddSupportedInstructionSet("neon"); - } if ((flags & Aes) != 0) builder.AddSupportedInstructionSet("aes"); if ((flags & Crc32) != 0) @@ -271,8 +264,8 @@ public static int FromInstructionSet(InstructionSet instructionSet) // Baseline ISAs - they're always available InstructionSet.ARM64_ArmBase => 0, InstructionSet.ARM64_ArmBase_Arm64 => 0, - InstructionSet.ARM64_AdvSimd => AdvSimd, - InstructionSet.ARM64_AdvSimd_Arm64 => AdvSimd, + InstructionSet.ARM64_AdvSimd => 0, + InstructionSet.ARM64_AdvSimd_Arm64 => 0, // Optional ISAs - only available via opt-in or opportunistic light-up InstructionSet.ARM64_Aes => Aes, @@ -296,7 +289,7 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.ARM64_Sve2_Arm64 => Sve2, // Vector Sizes - InstructionSet.ARM64_VectorT128 => AdvSimd, + InstructionSet.ARM64_VectorT128 => 0, _ => throw new NotSupportedException(((InstructionSet_ARM64)instructionSet).ToString()) }; diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index f556d637949d35..dbb043551cf071 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -35,7 +35,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru } else { - instructionSetSupportBuilder.AddSupportedInstructionSet("neon"); // Lower baselines included by implication + instructionSetSupportBuilder.AddSupportedInstructionSet("neon"); } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index bf8005472bab35..ac9681df52cb4a 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -12,6 +12,8 @@ namespace Internal.ReadyToRunConstants { public enum ReadyToRunInstructionSet { + Sse=1, + Sse2=2, Sse3=3, Ssse3=4, Sse41=5, @@ -26,6 +28,7 @@ public enum ReadyToRunInstructionSet Pclmulqdq=14, Popcnt=15, ArmBase=16, + AdvSimd=17, Crc32=18, Sha1=19, Sha256=20, @@ -37,18 +40,29 @@ public enum ReadyToRunInstructionSet Rcpc=26, Movbe=27, X86Serialize=28, - Avx512=29, + Avx512F=29, + Avx512F_VL=30, + Avx512BW=31, + Avx512BW_VL=32, + Avx512CD=33, + Avx512CD_VL=34, + Avx512DQ=35, + Avx512DQ_VL=36, Avx512Vbmi=37, + Avx512Vbmi_VL=38, VectorT128=39, VectorT256=40, VectorT512=41, Rcpc2=42, Sve=43, Avx10v1=44, + Avx10v1_V512=46, + Evex=47, Apx=48, Pclmulqdq_V256=49, Pclmulqdq_V512=50, Avx10v2=51, + Avx10v2_V512=52, Gfni=53, Gfni_V256=54, Gfni_V512=55, diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index c0283cb09eff10..ca9c93ee99ab8d 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -25,8 +25,8 @@ public static class ReadyToRunInstructionSetHelper { case InstructionSet.ARM64_ArmBase: return ReadyToRunInstructionSet.ArmBase; case InstructionSet.ARM64_ArmBase_Arm64: return ReadyToRunInstructionSet.ArmBase; - case InstructionSet.ARM64_AdvSimd: return ReadyToRunInstructionSet.ArmBase; - case InstructionSet.ARM64_AdvSimd_Arm64: return ReadyToRunInstructionSet.ArmBase; + case InstructionSet.ARM64_AdvSimd: return ReadyToRunInstructionSet.AdvSimd; + case InstructionSet.ARM64_AdvSimd_Arm64: return ReadyToRunInstructionSet.AdvSimd; case InstructionSet.ARM64_Aes: return ReadyToRunInstructionSet.Aes; case InstructionSet.ARM64_Aes_Arm64: return ReadyToRunInstructionSet.Aes; case InstructionSet.ARM64_Crc32: return ReadyToRunInstructionSet.Crc32; @@ -109,8 +109,8 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_MOVBE: return ReadyToRunInstructionSet.Movbe; case InstructionSet.X64_X86Serialize: return ReadyToRunInstructionSet.X86Serialize; case InstructionSet.X64_X86Serialize_X64: return ReadyToRunInstructionSet.X86Serialize; - case InstructionSet.X64_AVX512: return ReadyToRunInstructionSet.Avx512; - case InstructionSet.X64_AVX512_X64: return ReadyToRunInstructionSet.Avx512; + case InstructionSet.X64_AVX512: return ReadyToRunInstructionSet.Evex; + case InstructionSet.X64_AVX512_X64: return ReadyToRunInstructionSet.Evex; case InstructionSet.X64_AVX512VBMI: return ReadyToRunInstructionSet.Avx512Vbmi; case InstructionSet.X64_AVX512VBMI_X64: return ReadyToRunInstructionSet.Avx512Vbmi; case InstructionSet.X64_AVX10v1: return ReadyToRunInstructionSet.Avx10v1; @@ -172,7 +172,7 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_MOVBE: return ReadyToRunInstructionSet.Movbe; case InstructionSet.X86_X86Serialize: return ReadyToRunInstructionSet.X86Serialize; case InstructionSet.X86_X86Serialize_X64: return null; - case InstructionSet.X86_AVX512: return ReadyToRunInstructionSet.Avx512; + case InstructionSet.X86_AVX512: return ReadyToRunInstructionSet.Evex; case InstructionSet.X86_AVX512_X64: return null; case InstructionSet.X86_AVX512VBMI: return ReadyToRunInstructionSet.Avx512Vbmi; case InstructionSet.X86_AVX512VBMI_X64: return null; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 7989e4d0cf2008..4a222d3662d726 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -1161,6 +1161,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X64_AVXVNNI, true); yield return new InstructionSetInfo("movbe", "", InstructionSet.X64_MOVBE, true); yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X64_X86Serialize, true); + yield return new InstructionSetInfo("avx512", "", InstructionSet.X64_AVX512, true); yield return new InstructionSetInfo("avx512", "Avx512F", InstructionSet.X64_AVX512, true); yield return new InstructionSetInfo("avx512", "Avx512F_VL", InstructionSet.X64_AVX512, true); yield return new InstructionSetInfo("avx512", "Avx512BW", InstructionSet.X64_AVX512, true); @@ -1209,6 +1210,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X86_AVXVNNI, true); yield return new InstructionSetInfo("movbe", "", InstructionSet.X86_MOVBE, true); yield return new InstructionSetInfo("serialize", "X86Serialize", InstructionSet.X86_X86Serialize, true); + yield return new InstructionSetInfo("avx512", "", InstructionSet.X86_AVX512, true); yield return new InstructionSetInfo("avx512", "Avx512F", InstructionSet.X86_AVX512, true); yield return new InstructionSetInfo("avx512", "Avx512F_VL", InstructionSet.X86_AVX512, true); yield return new InstructionSetInfo("avx512", "Avx512BW", InstructionSet.X86_AVX512, true); diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index b70b323ae570b6..a0c5aebdb8a980 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -28,8 +28,8 @@ definearch ,X86 ,32Bit ,X64, X64 instructionset ,X86 ,X86Base , ,22 ,X86Base ,base -instructionset ,X86 ,Sse ,X86Base ,22 ,X86Base ,base -instructionset ,X86 ,Sse2 ,X86Base ,22 ,X86Base ,base +instructionset ,X86 ,Sse , ,1 ,X86Base ,base +instructionset ,X86 ,Sse2 , ,2 ,X86Base ,base instructionset ,X86 ,Sse3 , ,3 ,SSE3 ,sse3 instructionset ,X86 ,Ssse3 , ,4 ,SSSE3 ,ssse3 instructionset ,X86 ,Sse41 , ,5 ,SSE41 ,sse4.1 @@ -51,24 +51,25 @@ instructionset ,X86 , , , ,Vector512 instructionset ,X86 ,AvxVnni , ,25 ,AVXVNNI ,avxvnni instructionset ,X86 , ,Movbe ,27 ,MOVBE ,movbe instructionset ,X86 ,X86Serialize , ,28 ,X86Serialize ,serialize -instructionset ,X86 ,Avx512F ,Avx512 ,29 ,AVX512 ,avx512 -instructionset ,X86 ,Avx512F_VL ,Avx512 ,29 ,AVX512 ,avx512 -instructionset ,X86 ,Avx512BW ,Avx512 ,29 ,AVX512 ,avx512 -instructionset ,X86 ,Avx512BW_VL ,Avx512 ,29 ,AVX512 ,avx512 -instructionset ,X86 ,Avx512CD ,Avx512 ,29 ,AVX512 ,avx512 -instructionset ,X86 ,Avx512CD_VL ,Avx512 ,29 ,AVX512 ,avx512 -instructionset ,X86 ,Avx512DQ ,Avx512 ,29 ,AVX512 ,avx512 -instructionset ,X86 ,Avx512DQ_VL ,Avx512 ,29 ,AVX512 ,avx512 +instructionset ,X86 , ,Evex ,47 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512F , ,29 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512F_VL , ,30 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512BW , ,31 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512BW_VL , ,32 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512CD , ,33 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512CD_VL , ,34 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512DQ , ,35 ,AVX512 ,avx512 +instructionset ,X86 ,Avx512DQ_VL , ,36 ,AVX512 ,avx512 instructionset ,X86 ,Avx512Vbmi , ,37 ,AVX512VBMI ,avx512vbmi -instructionset ,X86 ,Avx512Vbmi_VL ,Avx512Vbmi ,37 ,AVX512VBMI ,avx512vbmi +instructionset ,X86 ,Avx512Vbmi_VL , ,38 ,AVX512VBMI ,avx512vbmi instructionset ,X86 ,Avx10v1 , ,44 ,AVX10v1 ,avx10v1 -instructionset ,X86 ,Avx10v1_V512 ,Avx10v1 ,44 ,AVX10v1 ,avx10v1 +instructionset ,X86 ,Avx10v1_V512 , ,46 ,AVX10v1 ,avx10v1 instructionset ,X86 , ,VectorT128 ,39 ,VectorT128 ,vectort128 instructionset ,X86 , ,VectorT256 ,40 ,VectorT256 ,vectort256 instructionset ,X86 , ,VectorT512 ,41 ,VectorT512 ,vectort512 instructionset ,X86 , ,Apx ,48 ,APX ,apx instructionset ,X86 ,Avx10v2 , ,51 ,AVX10v2 ,avx10v2 -instructionset ,X86 ,Avx10v2_V512 ,Avx10v2 ,51 ,AVX10v2 ,avx10v2 +instructionset ,X86 ,Avx10v2_V512 , ,52 ,AVX10v2 ,avx10v2 instructionset ,X86 ,Gfni , ,53 ,GFNI ,gfni instructionset ,X86 ,Gfni_V256 , ,54 ,GFNI_V256 ,gfni_v256 instructionset ,X86 ,Gfni_V512 , ,55 ,GFNI_V512 ,gfni_v512 @@ -164,7 +165,7 @@ copyinstructionsets,X86 ,X64 definearch ,ARM64 ,64Bit ,Arm64, Arm64 instructionset ,ARM64 ,ArmBase , ,16 ,ArmBase ,base -instructionset ,ARM64 ,AdvSimd ,ArmBase ,16 ,AdvSimd ,neon +instructionset ,ARM64 ,AdvSimd , ,17 ,AdvSimd ,neon instructionset ,ARM64 ,Aes , ,9 ,Aes ,aes instructionset ,ARM64 ,Crc32 , ,18 ,Crc32 ,crc instructionset ,ARM64 ,Dp , ,23 ,Dp ,dotprod diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 5ed0617846d8b1..51a11dbd4ee36a 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1389,17 +1389,6 @@ void EEJitManager::SetCpuInfo() } #endif // TARGET_AMD64 #elif defined(TARGET_ARM64) - -#if !defined(TARGET_WINDOWS) - // Linux may still support no AdvSimd - if ((cpuFeatures & ARM64IntrinsicConstants_AdvSimd) == 0) - { - EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("AdvSimd processor support required.")); - } -#else - _ASSERTE((cpuFeatures & ARM64IntrinsicConstants_AdvSimd) != 0); -#endif - CPUCompileFlags.Set(InstructionSet_VectorT128); if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic)) diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index e5cabd11b60321..70a866c9bbb8b2 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -416,6 +416,8 @@ int minipal_getcpufeatures(void) #if HAVE_AUXV_HWCAP_H unsigned long hwCap = getauxval(AT_HWCAP); + assert(hwCap & HWCAP_ASIMD); + if (hwCap & HWCAP_AES) result |= ARM64IntrinsicConstants_Aes; @@ -440,9 +442,6 @@ int minipal_getcpufeatures(void) if (hwCap & HWCAP_SHA2) result |= ARM64IntrinsicConstants_Sha256; - if (hwCap & HWCAP_ASIMD) - result |= ARM64IntrinsicConstants_AdvSimd; - if (hwCap & HWCAP_ASIMDRDM) result |= ARM64IntrinsicConstants_Rdm; @@ -487,18 +486,10 @@ int minipal_getcpufeatures(void) if ((sysctlbyname("hw.optional.arm.FEAT_LRCPC2", &valueFromSysctl, &sz, NULL, 0) == 0) && (valueFromSysctl != 0)) result |= ARM64IntrinsicConstants_Rcpc2; #endif // HAVE_SYSCTLBYNAME - - // Every ARM64 CPU should support SIMD and FP - // If the OS have no function to query for CPU capabilities we set just these - - result |= ARM64IntrinsicConstants_AdvSimd; #endif // HAVE_AUXV_HWCAP_H #endif // HOST_UNIX #if defined(HOST_WINDOWS) - // FP and SIMD support are enabled by default - result |= ARM64IntrinsicConstants_AdvSimd; - if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { result |= ARM64IntrinsicConstants_Aes; diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index a0671b5fea3401..0076a0b03b0c47 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -40,18 +40,17 @@ enum XArchIntrinsicConstants #if defined(HOST_ARM64) enum ARM64IntrinsicConstants { - ARM64IntrinsicConstants_AdvSimd = 0x0001, - ARM64IntrinsicConstants_Aes = 0x0002, - ARM64IntrinsicConstants_Crc32 = 0x0004, - ARM64IntrinsicConstants_Dp = 0x0008, - ARM64IntrinsicConstants_Rdm = 0x0010, - ARM64IntrinsicConstants_Sha1 = 0x0020, - ARM64IntrinsicConstants_Sha256 = 0x0040, - ARM64IntrinsicConstants_Atomics = 0x0080, - ARM64IntrinsicConstants_Rcpc = 0x0100, - ARM64IntrinsicConstants_Rcpc2 = 0x0200, - ARM64IntrinsicConstants_Sve = 0x0400, - ARM64IntrinsicConstants_Sve2 = 0x0800, + ARM64IntrinsicConstants_Aes = 0x0001, + ARM64IntrinsicConstants_Crc32 = 0x0002, + ARM64IntrinsicConstants_Dp = 0x0004, + ARM64IntrinsicConstants_Rdm = 0x0008, + ARM64IntrinsicConstants_Sha1 = 0x0010, + ARM64IntrinsicConstants_Sha256 = 0x0020, + ARM64IntrinsicConstants_Atomics = 0x0040, + ARM64IntrinsicConstants_Rcpc = 0x0080, + ARM64IntrinsicConstants_Rcpc2 = 0x0100, + ARM64IntrinsicConstants_Sve = 0x0200, + ARM64IntrinsicConstants_Sve2 = 0x0400, }; #include From ec9d4fb45b880415ab4e68388f39550432164212 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 28 May 2025 11:43:30 -0700 Subject: [PATCH 7/7] Adjust ARM64_ATOMICS_FEATURE_FLAG_BIT to match the new bit position --- src/coreclr/nativeaot/Runtime/AsmOffsets.h | 4 ++-- src/native/minipal/cpufeatures.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/AsmOffsets.h b/src/coreclr/nativeaot/Runtime/AsmOffsets.h index 0284daba94d541..1bf3a0c3e06338 100644 --- a/src/coreclr/nativeaot/Runtime/AsmOffsets.h +++ b/src/coreclr/nativeaot/Runtime/AsmOffsets.h @@ -37,8 +37,8 @@ ASM_CONST(3FFFFFDF,3FFFFFDF,MAX_STRING_LENGTH) #if defined(HOST_ARM64) // Bit position for the ARM64IntrinsicConstants_Atomics flags, to be used with tbz / tbnz instructions -// ARM64IntrinsicConstants_Atomics = 0x0080 -ASM_CONST( 7, 7, ARM64_ATOMICS_FEATURE_FLAG_BIT) +// ARM64IntrinsicConstants_Atomics = 0x0040 +ASM_CONST( 6, 6, ARM64_ATOMICS_FEATURE_FLAG_BIT) #endif ASM_OFFSET( 0, 0, MethodTable, m_usComponentSize) diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 0076a0b03b0c47..5f3df31debfdbb 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -56,7 +56,7 @@ enum ARM64IntrinsicConstants #include // Bit position for the ARM64IntrinsicConstants_Atomics flags, to be used with tbz / tbnz instructions -#define ARM64_ATOMICS_FEATURE_FLAG_BIT 7 +#define ARM64_ATOMICS_FEATURE_FLAG_BIT 6 static_assert((1 << ARM64_ATOMICS_FEATURE_FLAG_BIT) == ARM64IntrinsicConstants_Atomics, "ARM64_ATOMICS_FEATURE_FLAG_BIT must match with ARM64IntrinsicConstants_Atomics"); #endif // HOST_ARM64