From 7bae37a4307ce4ccd0b3d67fb54218460f62b2ee Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 9 May 2023 16:22:00 -0700 Subject: [PATCH 01/13] fixing the JITDbl2Ulng helper function. The new AVX512 instruction vcvtsd2usi uses ulong.max_value to show FPE for negative, NAN and ulong_max + 1 values. --- src/coreclr/vm/jithelpers.cpp | 5 +++++ .../out_of_range_fp_to_int_conversions.cpp | 15 ++++----------- .../out_of_range_fp_to_int_conversions.cs | 16 ++-------------- 3 files changed, 11 insertions(+), 25 deletions(-) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 084281763c0107..1969132b41f922 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -589,7 +589,11 @@ HCIMPLEND HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) { FCALL_CONTRACT; +#if defined(TARGET_X86) || defined(TARGET_AMD64) + const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN; + return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val; +#else const double two63 = 2147483648.0 * 4294967296.0; UINT64 ret; if (val < two63) { @@ -600,6 +604,7 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) ret = FastDbl2Lng(val - two63) + I64(0x8000000000000000); } return ret; +#endif } HCIMPLEND diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp index eaf7f2fa1a9daa..db690e1160f809 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp @@ -124,6 +124,7 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver if (t == CONVERT_NATIVECOMPILERBEHAVIOR) return (uint64_t)x; + double input_val = x; x = trunc(x); // truncate (round toward zero) // (double)UINT64_MAX cannot be represented exactly as double @@ -153,18 +154,10 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver return (uint64_t)ConvertDoubleToInt64(x - int64_max_plus_1, CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000); } } - + case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: - if (x < int64_max_plus_1) - { - return (x < INT64_MIN) ? (uint64_t)INT64_MIN : (uint64_t)(int64_t)x; - } - else - { - x -= int64_max_plus_1; - x = trunc(x); - return (uint64_t)(((x != x) || (x >= int64_max_plus_1)) ? INT64_MIN : (int64_t)x) + (0x8000000000000000); - } + return ((input_val != input_val) || (input_val < 0) || (input_val >= uint64_max_plus_1)) ? UINT64_MAX : (uint64_t)input_val; + case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning return 0; } diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs index 5b78783c09e4ca..49197e7965febd 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs @@ -171,6 +171,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) if (t == FPtoIntegerConversionType.CONVERT_NATIVECOMPILERBEHAVIOR) return (ulong)x; + double input_val = x; x = Truncate(x); // truncate (round toward zero) // (double)ULLONG_MAX cannot be represented exactly as double @@ -199,21 +200,8 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) return (ulong)ConvertDoubleToInt64(x - two63, FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000); } } - case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: - - if (x < two63) - { - return (x < long.MinValue) ? unchecked((ulong)long.MinValue) : (ulong)(long)x; - } - else - { - // (double)LLONG_MAX cannot be represented exactly as double - const double llong_max_plus_1 = (double)((ulong)long.MaxValue + 1); - x -= two63; - x = Math.Truncate(x); - return (ulong)((Double.IsNaN(x) || (x >= llong_max_plus_1)) ? long.MinValue : (long)x) + (0x8000000000000000); - } + return (Double.IsNaN(input_val) || (input_val < 0) || (input_val >= ullong_max_plus_1)) ? ulong.MaxValue : (ulong)input_val; } return 0; From 73dc4c467b22bfa5e8b601bf8023c10b25e293e8 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 10 May 2023 03:20:29 -0700 Subject: [PATCH 02/13] Making changes to the library test case expected output based on the architecture. This is because we have changed the JITDbl2Ulng helper function to mimic the new IEEE compliant AVX512 instruction vcvtsd2usi. In the process, we needed to update the library test case because the default Floating Point Error (FPE) value for the new instruction is different from the default MSVC FPE value i.e. 0. --- .../tests/System/UIntPtrTests.GenericMath.cs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs index 2e752a91af21f4..117c87db6ce9eb 100644 --- a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs +++ b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs @@ -12,6 +12,7 @@ public class UIntPtrTests_GenericMath // // IAdditionOperators // + public static Architecture arch = RuntimeInformation.ProcessArchitecture; [Fact] public static void op_AdditionTest() @@ -2223,7 +2224,7 @@ public static void CreateSaturatingFromDoubleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateSaturating(double.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(double.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(double.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(double.NaN)); } [Fact] @@ -2244,7 +2245,7 @@ public static void CreateSaturatingFromHalfTest() Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.NegativeInfinity)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.NaN)); } [Fact] @@ -2351,7 +2352,7 @@ public static void CreateSaturatingFromNFloatTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateSaturating(NFloat.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(NFloat.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(NFloat.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(NFloat.NaN)); } [Fact] @@ -2396,7 +2397,7 @@ public static void CreateSaturatingFromSingleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateSaturating(float.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(float.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(float.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(float.NaN)); } [Fact] @@ -2535,7 +2536,7 @@ public static void CreateTruncatingFromDoubleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateTruncating(double.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(double.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(double.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(double.NaN)); } [Fact] @@ -2556,7 +2557,7 @@ public static void CreateTruncatingFromHalfTest() Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.NegativeInfinity)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.NaN)); } [Fact] @@ -2685,7 +2686,7 @@ public static void CreateTruncatingFromNFloatTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateTruncating(NFloat.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(NFloat.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(NFloat.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(NFloat.NaN)); } [Fact] @@ -2741,7 +2742,7 @@ public static void CreateTruncatingFromSingleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateTruncating(float.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(float.MinValue)); - Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(float.NaN)); + Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(float.NaN)); } [Fact] From fbc134db9c7c7df88bb4ce79dc1b093cda645522 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 12 May 2023 13:32:23 -0700 Subject: [PATCH 03/13] Fixing the JITDbl2Ulng helper function. Also making sure that we are not changing the library test case but the API to make sure NaN cases are handled. --- src/coreclr/scripts/jitformat.py | 12 +++++++----- src/coreclr/vm/jithelpers.cpp | 3 ++- .../System.Private.CoreLib/src/System/Double.cs | 2 +- .../System.Private.CoreLib/src/System/Half.cs | 2 +- .../src/System/Runtime/InteropServices/NFloat.cs | 2 +- .../System.Private.CoreLib/src/System/Single.cs | 2 +- .../tests/System/UIntPtrTests.GenericMath.cs | 16 ++++++++-------- .../out_of_range_fp_to_int_conversions.cpp | 6 ++---- .../out_of_range_fp_to_int_conversions.cs | 5 ++--- 9 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/coreclr/scripts/jitformat.py b/src/coreclr/scripts/jitformat.py index 51a096c59cd3cf..497a5a12290e3f 100644 --- a/src/coreclr/scripts/jitformat.py +++ b/src/coreclr/scripts/jitformat.py @@ -21,6 +21,7 @@ import tarfile import tempfile import zipfile +import time class ChangeDir: def __init__(self, dir): @@ -81,7 +82,7 @@ def main(argv): args, unknown = parser.parse_known_args(argv) if unknown: - logging.warning('Ignoring argument(s): {}'.format(','.join(unknown))) + logging.warn('Ignoring argument(s): {}'.format(','.join(unknown))) if args.coreclr is None: logging.error('Specify --coreclr') @@ -140,10 +141,11 @@ def main(argv): bootstrapPath = os.path.join(temp_location, bootstrapFilename) assert len(os.listdir(os.path.dirname(bootstrapPath))) == 0 - - if not jitutil.download_one_url(bootstrapUrl, bootstrapPath): - logging.error("Did not download bootstrap!") - return -1 + print(bootstrapPath) + time.sleep(60) + # if not jitutil.download_one_url(bootstrapUrl, bootstrapPath): + # logging.error("Did not download bootstrap!") + # return -1 if platform == 'windows': # Need to ensure we have Windows line endings on the downloaded script file, diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 1969132b41f922..b8dc01a69114bd 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -590,8 +590,9 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) { FCALL_CONTRACT; #if defined(TARGET_X86) || defined(TARGET_AMD64) + const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN; - return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val; + return ((val != val) || ((val < 0) && (val + 1 <= 0)) || (val >= uint64_max_plus_1)) ? UINT64_MAX : ((val < 0) && (val + 1 > 0)) ? 0 : (UINT64)val; #else const double two63 = 2147483648.0 * 4294967296.0; diff --git a/src/libraries/System.Private.CoreLib/src/System/Double.cs b/src/libraries/System.Private.CoreLib/src/System/Double.cs index 523ad54c060472..b0143ad159711f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Double.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Double.cs @@ -1400,7 +1400,7 @@ private static bool TryConvertTo(double value, [MaybeNullWhen(false)] ou { #if TARGET_64BIT nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) : - (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value; + (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value; result = (TOther)(object)actualResult; return true; #else diff --git a/src/libraries/System.Private.CoreLib/src/System/Half.cs b/src/libraries/System.Private.CoreLib/src/System/Half.cs index b462c88b6e3908..54d2437f39722c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Half.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Half.cs @@ -1883,7 +1883,7 @@ private static bool TryConvertTo(Half value, [MaybeNullWhen(false)] out else if (typeof(TOther) == typeof(nuint)) { nuint actualResult = (value == PositiveInfinity) ? nuint.MaxValue : - (value <= Zero) ? nuint.MinValue : (nuint)value; + (value <= Zero || IsNaN(value)) ? nuint.MinValue : (nuint)value; result = (TOther)(object)actualResult; return true; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs index 7987c7eb89f159..12a0809373b144 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NFloat.cs @@ -1754,7 +1754,7 @@ private static bool TryConvertTo(NFloat value, [MaybeNullWhen(false)] ou return true; #else nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) : - (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value; + (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value; result = (TOther)(object)actualResult; return true; #endif diff --git a/src/libraries/System.Private.CoreLib/src/System/Single.cs b/src/libraries/System.Private.CoreLib/src/System/Single.cs index 188534808e3a73..8f89dc39dadcb4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Single.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Single.cs @@ -1380,7 +1380,7 @@ private static bool TryConvertTo(float value, [MaybeNullWhen(false)] out { #if TARGET_64BIT nuint actualResult = (value >= ulong.MaxValue) ? unchecked((nuint)ulong.MaxValue) : - (value <= ulong.MinValue) ? unchecked((nuint)ulong.MinValue) : (nuint)value; + (value <= ulong.MinValue || IsNaN(value)) ? unchecked((nuint)ulong.MinValue) : (nuint)value; result = (TOther)(object)actualResult; return true; #else diff --git a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs index 117c87db6ce9eb..414788a4c47420 100644 --- a/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs +++ b/src/libraries/System.Runtime/tests/System/UIntPtrTests.GenericMath.cs @@ -2224,7 +2224,7 @@ public static void CreateSaturatingFromDoubleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateSaturating(double.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(double.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(double.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(double.NaN)); } [Fact] @@ -2245,7 +2245,7 @@ public static void CreateSaturatingFromHalfTest() Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.NegativeInfinity)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(Half.NaN)); } [Fact] @@ -2352,7 +2352,7 @@ public static void CreateSaturatingFromNFloatTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateSaturating(NFloat.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(NFloat.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(NFloat.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(NFloat.NaN)); } [Fact] @@ -2397,7 +2397,7 @@ public static void CreateSaturatingFromSingleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateSaturating(float.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(float.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateSaturating(float.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateSaturating(float.NaN)); } [Fact] @@ -2536,7 +2536,7 @@ public static void CreateTruncatingFromDoubleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateTruncating(double.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(double.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(double.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(double.NaN)); } [Fact] @@ -2557,7 +2557,7 @@ public static void CreateTruncatingFromHalfTest() Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.NegativeInfinity)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(Half.NaN)); } [Fact] @@ -2686,7 +2686,7 @@ public static void CreateTruncatingFromNFloatTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateTruncating(NFloat.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(NFloat.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(NFloat.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(NFloat.NaN)); } [Fact] @@ -2742,7 +2742,7 @@ public static void CreateTruncatingFromSingleTest() Assert.Equal(nuint.MaxValue, NumberBaseHelper.CreateTruncating(float.MaxValue)); Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(float.MinValue)); - Assert.Equal((arch == Architecture.X86 || arch == Architecture.X64)?nuint.MaxValue:nuint.MinValue, NumberBaseHelper.CreateTruncating(float.NaN)); + Assert.Equal(nuint.MinValue, NumberBaseHelper.CreateTruncating(float.NaN)); } [Fact] diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp index db690e1160f809..3890fcac11a3dd 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cpp @@ -124,7 +124,6 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver if (t == CONVERT_NATIVECOMPILERBEHAVIOR) return (uint64_t)x; - double input_val = x; x = trunc(x); // truncate (round toward zero) // (double)UINT64_MAX cannot be represented exactly as double @@ -138,6 +137,7 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver return ((x != x) || (x < INT64_MIN) || (x >= uint64_max_plus_1)) ? (uint64_t)INT64_MIN : (x < 0) ? (uint64_t)(int64_t)x : (uint64_t)x; case CONVERT_SENTINEL: + case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: return ((x != x) || (x < 0) || (x >= uint64_max_plus_1)) ? UINT64_MAX : (uint64_t)x; case CONVERT_SATURATING: @@ -155,9 +155,7 @@ extern "C" DLLEXPORT uint64_t ConvertDoubleToUInt64(double x, FPtoIntegerConver } } - case CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: - return ((input_val != input_val) || (input_val < 0) || (input_val >= uint64_max_plus_1)) ? UINT64_MAX : (uint64_t)input_val; - + case CONVERT_NATIVECOMPILERBEHAVIOR: // handled above, but add case to silence warning return 0; } diff --git a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs index 49197e7965febd..e2be91c974fec3 100644 --- a/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs +++ b/src/tests/JIT/Directed/Convert/out_of_range_fp_to_int_conversions.cs @@ -171,7 +171,6 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) if (t == FPtoIntegerConversionType.CONVERT_NATIVECOMPILERBEHAVIOR) return (ulong)x; - double input_val = x; x = Truncate(x); // truncate (round toward zero) // (double)ULLONG_MAX cannot be represented exactly as double @@ -184,6 +183,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) return (Double.IsNaN(x) || (x < long.MinValue) || (x >= ullong_max_plus_1)) ? unchecked((ulong)long.MinValue): (x < 0) ? (ulong)(long)x: (ulong)x; case FPtoIntegerConversionType.CONVERT_SENTINEL: + case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: return (Double.IsNaN(x) || (x < 0) || (x >= ullong_max_plus_1)) ? ulong.MaxValue : (ulong)x; case FPtoIntegerConversionType.CONVERT_SATURATING: @@ -200,8 +200,7 @@ public static ulong ConvertDoubleToUInt64(double x, FPtoIntegerConversionType t) return (ulong)ConvertDoubleToInt64(x - two63, FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_ARM32) + (0x8000000000000000); } } - case FPtoIntegerConversionType.CONVERT_MANAGED_BACKWARD_COMPATIBLE_X86_X64: - return (Double.IsNaN(input_val) || (input_val < 0) || (input_val >= ullong_max_plus_1)) ? ulong.MaxValue : (ulong)input_val; + } return 0; From 293e84d21d0253695f17bbc89a80303a83193d2b Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Fri, 12 May 2023 13:33:26 -0700 Subject: [PATCH 04/13] reverting jitformat --- src/coreclr/scripts/jitformat.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/coreclr/scripts/jitformat.py b/src/coreclr/scripts/jitformat.py index 497a5a12290e3f..ad63529fa75803 100644 --- a/src/coreclr/scripts/jitformat.py +++ b/src/coreclr/scripts/jitformat.py @@ -21,7 +21,6 @@ import tarfile import tempfile import zipfile -import time class ChangeDir: def __init__(self, dir): @@ -141,11 +140,10 @@ def main(argv): bootstrapPath = os.path.join(temp_location, bootstrapFilename) assert len(os.listdir(os.path.dirname(bootstrapPath))) == 0 - print(bootstrapPath) - time.sleep(60) - # if not jitutil.download_one_url(bootstrapUrl, bootstrapPath): - # logging.error("Did not download bootstrap!") - # return -1 + + if not jitutil.download_one_url(bootstrapUrl, bootstrapPath): + logging.error("Did not download bootstrap!") + return -1 if platform == 'windows': # Need to ensure we have Windows line endings on the downloaded script file, From 6d14c222fd89ba77dda20f5ffe074816e003e35f Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Mon, 15 May 2023 16:31:05 -0700 Subject: [PATCH 05/13] Adding a truncate function to the Dbl2Ulng helper to make sure we avoid handling edge cases (-1,0) separately inside the helper. --- src/coreclr/vm/jithelpers.cpp | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index b8dc01a69114bd..1a4f247d8ce698 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -572,6 +572,30 @@ FORCEINLINE INT64 FastDbl2Lng(double val) #endif } +/*********************************************************************/ +// helper function to truncate double numbers to nearest integer (round towards zero) +double TrucateDouble(double val) +{ + FCALL_CONTRACT; + int64_t *dintVal = (int64_t *)&val; + + uint64_t uintVal = (uint64_t)*dintVal; + int exponent = (int)((uintVal >> 52) & 0x7FF); + if (exponent < 1023) + { + uintVal = uintVal & 0x8000000000000000ull; + } + else if (exponent < 1075) + { + uintVal = uintVal & (unsigned long long)(~(0xFFFFFFFFFFFFF >> (exponent - 1023))); + } + int64_t intVal = (int64_t)uintVal; + double *doubleVal = (double *)&intVal; + double retVal = *doubleVal; + + return retVal; +} + /*********************************************************************/ HCIMPL1_V(UINT32, JIT_Dbl2UIntOvf, double val) { @@ -592,7 +616,9 @@ HCIMPL1_V(UINT64, JIT_Dbl2ULng, double val) #if defined(TARGET_X86) || defined(TARGET_AMD64) const double uint64_max_plus_1 = -2.0 * (double)INT64_MIN; - return ((val != val) || ((val < 0) && (val + 1 <= 0)) || (val >= uint64_max_plus_1)) ? UINT64_MAX : ((val < 0) && (val + 1 > 0)) ? 0 : (UINT64)val; + val = TrucateDouble(val); + //return ((val != val) || ((val < 0) && (val + 1 < 0)) || (val >= uint64_max_plus_1)) ? UINT64_MAX : ((val < 0) && (val + 1 > 0)) ? 0 : (UINT64)val; + return ((val != val) || (val < 0) || (val >= uint64_max_plus_1)) ? UINT64_MAX : (UINT64)val; #else const double two63 = 2147483648.0 * 4294967296.0; From d9774473617e7f7ce9d6b73350eff2ce79600e22 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 16 May 2023 00:24:42 -0700 Subject: [PATCH 06/13] Adding code to handle vectorized conversion for float/double to/from ulong/uint --- src/coreclr/jit/codegenxarch.cpp | 20 +++++++++++++++++--- src/coreclr/jit/emitxarch.cpp | 12 ++++++++++-- src/coreclr/jit/instr.cpp | 16 ++++++++++++++++ src/coreclr/jit/lowerxarch.cpp | 8 ++++---- src/coreclr/jit/morph.cpp | 4 ++++ 5 files changed, 51 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 1988aa632cf35b..cc7b0939e45d2b 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -7451,6 +7451,18 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) noway_assert(srcType != TYP_UINT); noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT)); + if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + { + if (srcType == TYP_ULONG && (dstType == TYP_DOUBLE || dstType == TYP_FLOAT)) + { + genConsumeOperands(treeNode->AsOp()); + instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType)); + GetEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1); + genProduceReg(treeNode); + return; + } + } + // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used // which does a partial write to lower 4/8 bytes of xmm register keeping the other // upper bytes unmodified. If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop, @@ -7562,8 +7574,10 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG)))); // We shouldn't be seeing uint64 here as it should have been converted - // into a helper call by either front-end or lowering phase. - noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG)))); + // into a helper call by either front-end or lowering phase, unless we have AVX512F + // accelerated conversions. + noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) || + compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); // If the dstType is TYP_UINT, we have 32-bits to encode the // float number. Any of 33rd or above bits can be the sign bit. @@ -7576,7 +7590,7 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode) // Note that we need to specify dstType here so that it will determine // the size of destination integer register and also the rex.w prefix. genConsumeOperands(treeNode->AsOp()); - instruction ins = ins_FloatConv(TYP_INT, srcType, emitTypeSize(srcType)); + instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType)); GetEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1); genProduceReg(treeNode); } diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 83cdb1c6fe72b7..321d8a08159a81 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -18157,15 +18157,23 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_cvtsi2sd64: case INS_cvtsi2ss64: case INS_vcvtsd2usi: - case INS_vcvttsd2usi: case INS_vcvtusi2sd32: - case INS_vcvtusi2sd64: case INS_vcvtusi2ss32: case INS_vcvtusi2ss64: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += PERFSCORE_LATENCY_7C; break; + case INS_vcvttsd2usi: + result.insLatency += PERFSCORE_LATENCY_6C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + + case INS_vcvtusi2sd64: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency += PERFSCORE_LATENCY_5C; + break; + case INS_cvttss2si: case INS_cvtss2si: case INS_vcvtss2usi: diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index b942ddd6d878d9..92f0078444c3af 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -2165,6 +2165,9 @@ instruction CodeGen::ins_MathOp(genTreeOps oper, var_types type) instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) { // AVX: For now we support only conversion from Int/Long -> float + // AVX512: Supports following conversions + // srcType = float/double castToType = ulong + // srcType = ulong castToType = double switch (from) { @@ -2213,6 +2216,8 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) return ins_Move_Extend(TYP_FLOAT, false); case TYP_DOUBLE: return INS_cvtss2sd; + case TYP_ULONG: + return INS_vcvttss2usi; default: unreached(); } @@ -2225,6 +2230,8 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) return INS_cvttsd2si; case TYP_LONG: return INS_cvttsd2si; + case TYP_ULONG: + return INS_vcvttsd2usi; case TYP_FLOAT: return INS_cvtsd2ss; case TYP_DOUBLE: @@ -2234,6 +2241,15 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) } break; + case TYP_ULONG: + switch (to) + { + case TYP_DOUBLE: + return INS_vcvtusi2sd64; + default: + unreached(); + } + default: unreached(); } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index d780773abba69a..fa522dad83bb9d 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -795,15 +795,15 @@ void Lowering::LowerCast(GenTree* tree) // srcType = float/double castToType = * and overflow detecting cast // Reason: must be converted to a helper call // srcType = float/double, castToType = ulong - // Reason: must be converted to a helper call + // Reason: must be converted to a helper call unless we have AVX512F // srcType = uint castToType = float/double // Reason: uint -> float/double = uint -> long -> float/double // srcType = ulong castToType = float // Reason: ulong -> float = ulong -> double -> float - if (varTypeIsFloating(srcType)) + if (srcType == TYP_FLOAT) { - noway_assert(!tree->gtOverflow()); - noway_assert(castToType != TYP_ULONG); + noway_assert(!tree->gtOverflow() || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); + noway_assert(castToType != TYP_ULONG || comp->compOpportunisticallyDependsOn(InstructionSet_AVX512F)); } else if (srcType == TYP_UINT) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index a583c739816904..3d9653f72e71f1 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -358,6 +358,10 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) #endif // !TARGET_AMD64 case TYP_ULONG: +#ifdef TARGET_AMD64 + if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + return nullptr; +#endif return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); default: unreached(); From 0845905db72a433d7e793366c4c44faaa61ee055 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 16 May 2023 11:45:15 -0700 Subject: [PATCH 07/13] reverting changes for float to ulong --- src/coreclr/jit/morph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 3d9653f72e71f1..88be9ce4afb3f1 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -359,7 +359,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) case TYP_ULONG: #ifdef TARGET_AMD64 - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512F) && srcType != TYP_FLOAT) return nullptr; #endif return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); From 451780efd53dd3c4d5cf44b20bc563cd776507d3 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 16 May 2023 14:28:24 -0700 Subject: [PATCH 08/13] enabling float to ulong conversion --- src/coreclr/jit/morph.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 88be9ce4afb3f1..3d9653f72e71f1 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -359,7 +359,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree) case TYP_ULONG: #ifdef TARGET_AMD64 - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F) && srcType != TYP_FLOAT) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) return nullptr; #endif return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper); From 06ecf6a3926ea0c1f426fcef645239db4d7a92fd Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 17 May 2023 00:35:51 -0700 Subject: [PATCH 09/13] Making change to set w1 bit for evex --- src/coreclr/jit/instrsxarch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 589df2cd7bdeb6..5b9e797aec5bfd 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -622,7 +622,7 @@ INST3(vcmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_ INST3(vcvtpd2udq, "cvtpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt packed doubles to unsigned DWORDs INST3(vcvtps2udq, "cvtps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed singles to unsigned DWORDs INST3(vcvtsd2usi, "cvtsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x79), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt scalar double to unsigned DWORD/QWORD -INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD +INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD INST3(vcvttpd2udq, "cvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned DWORDs INST3(vcvttps2udq, "cvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned DWORDs INST3(vcvttsd2usi, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD/QWORD From 6b963e64a5a02dc4ca39f5ecde55939a463e6656 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 18 May 2023 00:25:58 -0700 Subject: [PATCH 10/13] trying to return EA_4BYTE for INS_vcvttss2usi to make sure that we read dword and not qword for float to ulong --- src/coreclr/jit/emit.h | 6 ++++++ src/coreclr/jit/emitxarch.cpp | 27 +++++++++++++------------- src/coreclr/jit/hwintrinsiclistxarch.h | 4 ++-- src/coreclr/jit/instr.cpp | 2 +- src/coreclr/jit/instrsxarch.h | 3 ++- 5 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 1d49eb69070061..f778186f7cd521 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3857,6 +3857,12 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id) const return EA_32BYTE; } + case INS_vcvttss2usi64: + case INS_vcvttss2usi32: + { + return EA_4BYTE; + } + case INS_movddup: { if (defaultSize == 64) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 321d8a08159a81..1d199c4dcb207c 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1294,17 +1294,6 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const case INS_vcvtsd2usi: case INS_vcvtss2usi: case INS_vcvttsd2usi: - case INS_vcvttss2usi: - { - if (attr == EA_8BYTE) - { - return true; - } - - // TODO-Cleanup: This should really only ever be EA_4BYTE - assert((attr == EA_4BYTE) || (attr == EA_16BYTE)); - return false; - } case INS_vbroadcastsd: case INS_vpbroadcastq: @@ -2518,7 +2507,8 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id) case INS_vcvtsd2usi: case INS_vcvtss2usi: case INS_vcvttsd2usi: - case INS_vcvttss2usi: + case INS_vcvttss2usi32: + case INS_vcvttss2usi64: { // These SSE instructions write to a general purpose integer register. return false; @@ -11234,7 +11224,7 @@ void emitter::emitDispIns( case INS_vcvtsd2usi: case INS_vcvtss2usi: case INS_vcvttsd2usi: - case INS_vcvttss2usi: + //case INS_vcvttss2usi: { printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); break; @@ -18177,10 +18167,19 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case INS_cvttss2si: case INS_cvtss2si: case INS_vcvtss2usi: - case INS_vcvttss2usi: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += opSize == EA_8BYTE ? PERFSCORE_LATENCY_8C : PERFSCORE_LATENCY_7C; break; + + case INS_vcvttss2usi32: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency += PERFSCORE_LATENCY_7C; + break; + + case INS_vcvttss2usi64: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency += PERFSCORE_LATENCY_8C; + break; case INS_cvtss2sd: result.insThroughput = PERFSCORE_THROUGHPUT_1C; diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 0c677837b8cf26..afbd92cfa534ac 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -836,7 +836,7 @@ HARDWARE_INTRINSIC(AVX512F, Ceiling, HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi, INS_vcvtsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512F, ConvertToUInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi32, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Byte, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F, ConvertToVector128ByteWithSaturation, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F, ConvertToVector128Int16, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) @@ -948,7 +948,7 @@ HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmeticVariable, HARDWARE_INTRINSIC(AVX512F_X64, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2sd64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F_X64, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtusi2ss64, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtss2usi, INS_vcvtsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttss2usi64, INS_vcvttsd2usi}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 92f0078444c3af..19fa2049dc9752 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -2217,7 +2217,7 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from, emitAttr attr) case TYP_DOUBLE: return INS_cvtss2sd; case TYP_ULONG: - return INS_vcvttss2usi; + return INS_vcvttss2usi64; default: unreached(); } diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 5b9e797aec5bfd..658157161516d5 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -626,7 +626,8 @@ INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_ INST3(vcvttpd2udq, "cvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned DWORDs INST3(vcvttps2udq, "cvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned DWORDs INST3(vcvttsd2usi, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD/QWORD -INST3(vcvttss2usi, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD +INST3(vcvttss2usi32, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD +INST3(vcvttss2usi64, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD INST3(vcvtudq2pd, "cvtudq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to doubles INST3(vcvtudq2ps, "cvtudq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to singles INST3(vcvtusi2sd32, "cvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to double From e8f06d9a66999e35cdb7478fda4c9e4a2d555576 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 18 May 2023 01:16:12 -0700 Subject: [PATCH 11/13] jit format --- src/coreclr/jit/emit.h | 2 +- src/coreclr/jit/emitxarch.cpp | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index f778186f7cd521..038313e6d1c217 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3862,7 +3862,7 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id) const { return EA_4BYTE; } - + case INS_movddup: { if (defaultSize == 64) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 1d199c4dcb207c..0408f347913cf8 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -11224,11 +11224,11 @@ void emitter::emitDispIns( case INS_vcvtsd2usi: case INS_vcvtss2usi: case INS_vcvttsd2usi: - //case INS_vcvttss2usi: - { - printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); - break; - } + // case INS_vcvttss2usi: + { + printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); + break; + } #ifdef TARGET_AMD64 case INS_movsxd: @@ -18170,12 +18170,12 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += opSize == EA_8BYTE ? PERFSCORE_LATENCY_8C : PERFSCORE_LATENCY_7C; break; - + case INS_vcvttss2usi32: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += PERFSCORE_LATENCY_7C; break; - + case INS_vcvttss2usi64: result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency += PERFSCORE_LATENCY_8C; From c703c1bf8ee1f31b76111c5144b5fe0863330ab8 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 18 May 2023 14:00:03 -0700 Subject: [PATCH 12/13] Splitting vcvttss2usi to vcvttss2usi32 and vcvttss2usi64. Also adding a special handling for vcvttss2usi64 to make sure we read only dword instead of qword for float to ulong conversion --- src/coreclr/jit/emit.h | 7 +++++-- src/coreclr/jit/emitxarch.cpp | 26 +++++++++++++++++++++----- src/coreclr/jit/instrsxarch.h | 2 +- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 038313e6d1c217..ef401b7d115a2b 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3858,9 +3858,12 @@ emitAttr emitter::emitGetMemOpSize(instrDesc* id) const } case INS_vcvttss2usi64: - case INS_vcvttss2usi32: { - return EA_4BYTE; + if (defaultSize == 8) + { + return EA_4BYTE; + } + return defaultSize; } case INS_movddup: diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 0408f347913cf8..c79e2993f25a8e 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1294,6 +1294,16 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const case INS_vcvtsd2usi: case INS_vcvtss2usi: case INS_vcvttsd2usi: + { + if (attr == EA_8BYTE) + { + return true; + } + + // TODO-Cleanup: This should really only ever be EA_4BYTE + assert((attr == EA_4BYTE) || (attr == EA_16BYTE)); + return false; + } case INS_vbroadcastsd: case INS_vpbroadcastq: @@ -11224,11 +11234,17 @@ void emitter::emitDispIns( case INS_vcvtsd2usi: case INS_vcvtss2usi: case INS_vcvttsd2usi: - // case INS_vcvttss2usi: - { - printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); - break; - } + { + printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); + break; + } + + case INS_vcvttss2usi32: + case INS_vcvttss2usi64: + { + printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_4BYTE)); + break; + } #ifdef TARGET_AMD64 case INS_movsxd: diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 658157161516d5..17b8ac164d4056 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -622,7 +622,7 @@ INST3(vcmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_ INST3(vcvtpd2udq, "cvtpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt packed doubles to unsigned DWORDs INST3(vcvtps2udq, "cvtps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed singles to unsigned DWORDs INST3(vcvtsd2usi, "cvtsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x79), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt scalar double to unsigned DWORD/QWORD -INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD +INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD INST3(vcvttpd2udq, "cvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned DWORDs INST3(vcvttps2udq, "cvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned DWORDs INST3(vcvttsd2usi, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD/QWORD From bb4a91ef23b2f0a262660622006205185cd98fc1 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 18 May 2023 16:42:40 -0700 Subject: [PATCH 13/13] undoing jitformat changes due to merge error --- src/coreclr/scripts/jitformat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/scripts/jitformat.py b/src/coreclr/scripts/jitformat.py index ad63529fa75803..51a096c59cd3cf 100644 --- a/src/coreclr/scripts/jitformat.py +++ b/src/coreclr/scripts/jitformat.py @@ -81,7 +81,7 @@ def main(argv): args, unknown = parser.parse_known_args(argv) if unknown: - logging.warn('Ignoring argument(s): {}'.format(','.join(unknown))) + logging.warning('Ignoring argument(s): {}'.format(','.join(unknown))) if args.coreclr is None: logging.error('Specify --coreclr')