From e2cd8bc7ee4b6b05d293047b30cc1accf31c73cd Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 17 Apr 2026 12:47:04 -0700 Subject: [PATCH 1/2] Add support for utilizing F16C instructions on xarch --- src/coreclr/jit/compiler.h | 18 +++- src/coreclr/jit/emitxarch.cpp | 2 + src/coreclr/jit/hwintrinsiclistxarch.h | 4 + src/coreclr/jit/importer.cpp | 53 ++++++++++ src/coreclr/jit/importercalls.cpp | 96 +++++++++++++++++++ src/coreclr/jit/lowerxarch.cpp | 24 +++++ src/coreclr/jit/namedintrinsiclist.h | 2 + .../System.Private.CoreLib/src/System/Half.cs | 3 + 8 files changed, 201 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 10e36182e61692..853beb5c06a7e8 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1722,7 +1722,7 @@ struct FuncInfoDsc }; jitstd::vector* funWasmLocalDecls; -#endif // defined(TARGET_WASM) +#endif // defined(TARGET_WASM) EHblkDsc* GetEHDesc(Compiler* comp) const; BasicBlock* GetStartBlock(Compiler* comp) const; @@ -4778,6 +4778,11 @@ class Compiler GenTree* impImportLdvirtftn(GenTree* thisPtr, CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo); +#if defined(FEATURE_HW_INTRINSICS) + GenTree* impSimdCreateScalarHalf(GenTree* op1); + GenTree* impSimdToScalarHalf(GenTree* op1, CORINFO_CLASS_HANDLE halfClsHnd); +#endif // FEATURE_HW_INTRINSICS + enum class BoxPatterns { None = 0, @@ -9705,6 +9710,17 @@ class Compiler return isOpaqueSIMDType(varDsc->GetLayout()); } + bool isSystemHalfClass(CORINFO_CLASS_HANDLE clsHnd) + { + if (isIntrinsicType(clsHnd)) + { + const char* namespaceName = nullptr; + const char* className = getClassNameFromMetadata(clsHnd, &namespaceName); + return (strcmp(className, "Half") == 0) && (strcmp(namespaceName, "System") == 0); + } + return false; + } + bool isSIMDClass(CORINFO_CLASS_HANDLE clsHnd) { if (isIntrinsicType(clsHnd)) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index ff65e2d883a25f..e4d89e60d1c921 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -21249,6 +21249,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_cvtpd2ps: case INS_cvttpd2dq: case INS_vcvtpd2udq: + case INS_vcvtph2ps: + case INS_vcvtps2ph: case INS_vcvtps2qq: case INS_vcvtps2uqq: case INS_vcvtqq2ps: diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 9cccc717fd1351..67df50e186762d 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -732,9 +732,13 @@ HARDWARE_INTRINSIC(AVX2, CompareGreaterThan, HARDWARE_INTRINSIC(AVX2, CompareLessThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(AVX2, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX2, ConvertToVector128Half, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ph, INS_invalid}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vcvtph2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX2, ConvertToVector256Half, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2ph, INS_invalid}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_FullRangeIMM) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int16, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int32, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int64, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) +HARDWARE_INTRINSIC(AVX2, ConvertToVector256Single, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_vcvtph2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX2, ExtractLowestSetBit, 0, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, ExtractVector128, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti32x4, INS_vextracti32x4, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NormalizeSmallTypeToInt) HARDWARE_INTRINSIC(AVX2, GatherMaskVector128, 16, 5, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpgatherdd, INS_vpgatherdd, INS_vpgatherdq, INS_vpgatherdq, INS_vgatherdps, INS_vgatherdpd}, HW_Category_IMM, HW_Flag_MaybeMemoryLoad|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoEvexSemantics) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index bf542265421835..24a84b1f2eed60 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -2803,6 +2803,59 @@ GenTree* Compiler::impImportLdvirtftn(GenTree* thisPtr, return call; } +#if defined(FEATURE_HW_INTRINSICS) +//---------------------------------------------------------------------------------------------- +// Compiler::impSimdCreateScalarHalf: Creates a new Vector128.CreateScalar node for a System.Half value +// +// Arguments:d +// op1 - The System.Half value +// +// Returns: +// The Vector128.CreateScalar node that contains op1 +// +GenTree* Compiler::impSimdCreateScalarHalf(GenTree* op1) +{ + unsigned op1Tmp; + + if (!op1->OperIs(GT_LCL_VAR)) + { + op1Tmp = lvaGrabTemp(true DEBUGARG("System.Half tmp")); + impStoreToTemp(op1Tmp, op1, CHECK_SPILL_ALL); + } + else + { + op1Tmp = op1->AsLclVarCommon()->GetLclNum(); + } + + op1 = gtNewLclFldNode(op1Tmp, TYP_USHORT, 0); + return gtNewSimdCreateScalarNode(TYP_SIMD16, op1, TYP_USHORT, 16); +} + +//---------------------------------------------------------------------------------------------- +// Compiler::impSimdToScalarHalf: Creates a new Vector128.ToScalar node for a System.Half value +// +// Arguments: +// op1 - The Vector128 from which to extract the System.Half value +// halfClsHnd - The class handle for System.Half +// +// Returns: +// The System.Half value extracted from op1 +// +GenTree* Compiler::impSimdToScalarHalf(GenTree* op1, CORINFO_CLASS_HANDLE halfClsHnd) +{ + assert(isSystemHalfClass(halfClsHnd)); + + unsigned resTmp = lvaGrabTemp(true DEBUGARG("System.Half tmp")); + lvaSetStruct(resTmp, halfClsHnd, false); + + op1 = gtNewSimdToScalarNode(TYP_INT, op1, TYP_USHORT, 16); + op1 = gtNewStoreLclFldNode(resTmp, TYP_USHORT, 0, op1); + + impAppendTree(op1, CHECK_SPILL_ALL, impCurStmtDI); + return gtNewLclvNode(resTmp, TYP_STRUCT); +} +#endif // FEATURE_HW_INTRINSICS + //------------------------------------------------------------------------ // impInlineUnboxNullable: Generate code for unboxing Nullable from an object (obj) // We either inline the unbox operation (if profitable) or call the helper. diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index ee5a882cd9bcf6..427e795da7c8eb 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -4355,6 +4355,79 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, } #ifdef FEATURE_HW_INTRINSICS + case NI_System_Half_op_Explicit: + { + assert(sig->numArgs == 1); + + CORINFO_CLASS_HANDLE retClsHnd = sig->retTypeSigClass; + CorInfoType retJitType = sig->retType; + var_types retType = JitType2PreciseVarType(retJitType); + + CORINFO_CLASS_HANDLE op1ClsHnd; + CorInfoType op1JitType = strip(info.compCompHnd->getArgType(sig, sig->args, &op1ClsHnd)); + var_types op1Type = JitType2PreciseVarType(op1JitType); + + if (retType == TYP_STRUCT) + { + assert(isSystemHalfClass(retClsHnd)); + assert(varTypeIsArithmetic(op1Type)); + + switch (op1Type) + { + case TYP_FLOAT: + { +#if defined(TARGET_XARCH) + if (compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + GenTree* op1 = impPopStack().val; + op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, TYP_FLOAT, 16); + + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode(0), + NI_AVX2_ConvertToVector128Half, TYP_FLOAT, 16); + retNode = impSimdToScalarHalf(retNode, retClsHnd); + } +#endif + break; + } + + default: + { + unreached(); + } + } + } + else + { + assert(varTypeIsArithmetic(retType)); + assert((op1Type == TYP_STRUCT) && isSystemHalfClass(op1ClsHnd)); + + switch (retType) + { + case TYP_FLOAT: + { +#if defined(TARGET_XARCH) + if (compOpportunisticallyDependsOn(InstructionSet_AVX2)) + { + GenTree* op1 = impPopStack().val; + op1 = impSimdCreateScalarHalf(op1); + + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_AVX2_ConvertToVector128Single, + TYP_USHORT, 16); + retNode = gtNewSimdToScalarNode(TYP_FLOAT, retNode, TYP_FLOAT, 16); + } +#endif + break; + } + + default: + { + unreached(); + } + } + } + break; + } + case NI_System_Math_FusedMultiplyAdd: { assert(varTypeIsFloating(callType)); @@ -10454,6 +10527,15 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) break; } + case 'H': + { + if (strcmp(className, "Half") == 0) + { + result = lookupPrimitiveFloatNamedIntrinsic(method, methodName); + } + break; + } + case 'I': { if ((strcmp(className, "Int32") == 0) || (strcmp(className, "Int64") == 0) || @@ -11695,6 +11777,20 @@ NamedIntrinsic Compiler::lookupPrimitiveFloatNamedIntrinsic(CORINFO_METHOD_HANDL break; } + case 'o': + { + if (strncmp(methodName, "op_", 3) == 0) + { + methodName += 3; + + if (strcmp(methodName, "Explicit") == 0) + { + result = NI_System_Half_op_Explicit; + } + } + break; + } + default: { break; diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 8fbed3c79d41d3..61fef20d231ccc 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -7647,6 +7647,22 @@ void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node) break; } + case NI_AVX2_ConvertToVector128Half: + case NI_AVX2_ConvertToVector256Half: + { + // These intrinsics are "ins xmm/mem, xmm, imm8" + // and store half the width of the input vector + + size_t numArgs = hwintrinsic->GetOperandCount(); + GenTree* lastOp = hwintrinsic->Op(numArgs); + unsigned simdSize = hwintrinsic->GetSimdSize(); + unsigned memSize = (simdSize / 2); + + isContainable = HWIntrinsicInfo::isImmOp(intrinsicId, lastOp) && lastOp->IsCnsIntOrI() && + (genTypeSize(node) == memSize); + break; + } + default: { break; @@ -9815,6 +9831,14 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) return; } + case NI_AVX2_ConvertToVector128Half: + case NI_AVX2_ConvertToVector256Half: + { + // These intrinsics are "ins xmm/mem, xmm, imm8" and get + // contained by the relevant store operation instead. + break; + } + default: { assert(!"Unhandled containment for binary hardware intrinsic with immediate operand"); diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index 5856f644e20e89..b7e75a2c4a39b7 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -25,6 +25,8 @@ enum NamedIntrinsic : unsigned short NI_System_SpanHelpers_Memmove, + NI_System_Half_op_Explicit, + NI_SYSTEM_MATH_START, NI_System_Math_Abs, NI_System_Math_Acos, diff --git a/src/libraries/System.Private.CoreLib/src/System/Half.cs b/src/libraries/System.Private.CoreLib/src/System/Half.cs index de4b6b925a544d..8fe7b7be916ddd 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Half.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Half.cs @@ -16,6 +16,7 @@ namespace System /// /// Represents a half-precision floating-point number. /// + [Intrinsic] [StructLayout(LayoutKind.Sequential)] public readonly struct Half : IComparable, @@ -617,6 +618,7 @@ public static explicit operator Half(double value) /// Explicitly converts a value to its nearest representable half-precision floating-point value. /// The value to convert. /// converted to its nearest representable half-precision floating-point value. + [Intrinsic] public static explicit operator Half(float value) { #region Explanation of this algorithm @@ -1021,6 +1023,7 @@ public static explicit operator double(Half value) /// Explicitly converts a half-precision floating-point value to its nearest representable value. /// The value to convert. /// converted to its nearest representable value. + [Intrinsic] public static explicit operator float(Half value) { #region Explanation of this algorithm From c3ad72cafb4874601e76c618f36c993b644b021b Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 17 Apr 2026 14:51:45 -0700 Subject: [PATCH 2/2] Remove a typo --- src/coreclr/jit/importer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 24a84b1f2eed60..6bcad0f2f418d2 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -2807,7 +2807,7 @@ GenTree* Compiler::impImportLdvirtftn(GenTree* thisPtr, //---------------------------------------------------------------------------------------------- // Compiler::impSimdCreateScalarHalf: Creates a new Vector128.CreateScalar node for a System.Half value // -// Arguments:d +// Arguments: // op1 - The System.Half value // // Returns: