From f2ea0d0453426bff210d9d034eee7effc757d9e1 Mon Sep 17 00:00:00 2001 From: Timur Mustafin Date: Mon, 15 Jan 2024 21:12:59 +0300 Subject: [PATCH 1/5] [x86 Unix] Fix memory waste on X86EmitPopRegs --- src/coreclr/vm/i386/stublinkerx86.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/i386/stublinkerx86.cpp b/src/coreclr/vm/i386/stublinkerx86.cpp index 76d888c0c52756..4e2f89421cf58f 100644 --- a/src/coreclr/vm/i386/stublinkerx86.cpp +++ b/src/coreclr/vm/i386/stublinkerx86.cpp @@ -1289,7 +1289,8 @@ VOID StubLinkerCPU::X86EmitPopRegs(unsigned regSet) { STANDARD_VM_CONTRACT; - for (X86Reg r = NumX86Regs; r >= kEAX; r = (X86Reg)(r-1)) + /* Cmp with regs num cause r is UCHAR */ + for (X86Reg r = NumX86Regs; r <= NumX86Regs; r = (X86Reg)(r-1)) if (regSet & (1U< Date: Wed, 17 Jan 2024 12:07:39 +0300 Subject: [PATCH 2/5] [x86/x64] Disable BlockInit if SIMD disabled BlockInit algorithm for x86 and x64 uses SIMD instructions so it shoud be disabled if FEATURE_SIMD instructions turned off. --- src/coreclr/jit/codegencommon.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 09a28cc373bf49..ba408b86ef8457 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4467,22 +4467,18 @@ void CodeGen::genCheckUseBlockInit() // to be modified. CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef TARGET_64BIT -#if defined(TARGET_AMD64) - - // We can clear using aligned SIMD so the threshold is lower, +#if defined(TARGET_XARCH) && defined(FEATURE_SIMD) + // For AMD64 we can clear using aligned SIMD so the threshold is lower, // and clears in order which is better for auto-prefetching genUseBlockInit = (genInitStkLclCnt > 4); - -#else // !defined(TARGET_AMD64) - +#elif defined(TARGET_XARCH) && !defined(FEATURE_SIMD) + // Block init algorithm needs SIMD instructions + genUseBlockInit = 0; +#elif defined(TARGET_64BIT) genUseBlockInit = (genInitStkLclCnt > 8); -#endif -#else - +#else // !TARGET_64BIT && !TARGET_XARCH genUseBlockInit = (genInitStkLclCnt > 4); - -#endif // TARGET_64BIT +#endif // !defined(TARGET_64BIT) if (genUseBlockInit) { From 14c9ea60cece5c340cf25b5289f8b5fc513dcd9e Mon Sep 17 00:00:00 2001 From: Timur Mustafin Date: Tue, 16 Jan 2024 18:51:52 +0300 Subject: [PATCH 3/5] [x86 Unix] Enable FEATURE_SIMD FEATURE_SIMD for x86/Unix was turned off by https://github.com/dotnet/coreclr/pull/8335 as temporary solve for https://github.com/dotnet/runtime/issues/7061 which was fixed by https://github.com/dotnet/coreclr/pull/8382. So FEATURE_SIMD could be enabled for x86/Unix. --- src/coreclr/jit/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 480d9d50350ddc..1b0a87bae4d4e6 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -70,13 +70,13 @@ function(create_standalone_jit) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_NO_HOST) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE SELF_NO_HOST) - if ((TARGETDETAILS_ARCH STREQUAL "x64") OR (TARGETDETAILS_ARCH STREQUAL "arm64") OR ((TARGETDETAILS_ARCH STREQUAL "x86") AND NOT (TARGETDETAILS_OS STREQUAL "unix"))) + if ((TARGETDETAILS_ARCH STREQUAL "x64") OR (TARGETDETAILS_ARCH STREQUAL "arm64") OR (TARGETDETAILS_ARCH STREQUAL "x86")) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_SIMD) target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_HW_INTRINSICS) endif () endfunction() -if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR (CLR_CMAKE_TARGET_ARCH_I386 AND NOT CLR_CMAKE_HOST_UNIX)) +if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_I386) add_compile_definitions($<$>>:FEATURE_SIMD>) add_compile_definitions($<$>>:FEATURE_HW_INTRINSICS>) endif () From fc1d9beb304520ef89dcd178336957aea39c3f34 Mon Sep 17 00:00:00 2001 From: Timur Mustafin Date: Thu, 18 Jan 2024 18:22:08 +0300 Subject: [PATCH 4/5] Feedback --- src/coreclr/vm/i386/stublinkerx86.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/coreclr/vm/i386/stublinkerx86.cpp b/src/coreclr/vm/i386/stublinkerx86.cpp index 4e2f89421cf58f..821471405d598a 100644 --- a/src/coreclr/vm/i386/stublinkerx86.cpp +++ b/src/coreclr/vm/i386/stublinkerx86.cpp @@ -1277,7 +1277,7 @@ VOID StubLinkerCPU::X86EmitPushRegs(unsigned regSet) { STANDARD_VM_CONTRACT; - for (X86Reg r = kEAX; r <= NumX86Regs; r = (X86Reg)(r+1)) + for (X86Reg r = kEAX; r < NumX86Regs; r = (X86Reg)(r+1)) if (regSet & (1U< Date: Sat, 20 Jan 2024 00:36:04 +0300 Subject: [PATCH 5/5] Formatting patch --- src/coreclr/jit/codegencommon.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index ba408b86ef8457..d47d4ad61184bc 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4475,8 +4475,8 @@ void CodeGen::genCheckUseBlockInit() // Block init algorithm needs SIMD instructions genUseBlockInit = 0; #elif defined(TARGET_64BIT) - genUseBlockInit = (genInitStkLclCnt > 8); -#else // !TARGET_64BIT && !TARGET_XARCH + genUseBlockInit = (genInitStkLclCnt > 8); +#else // !TARGET_64BIT && !TARGET_XARCH genUseBlockInit = (genInitStkLclCnt > 4); #endif // !defined(TARGET_64BIT)