From 3b0d6c487ba49f5624bc8bdc5fd9154d535163ac Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 29 Feb 2024 04:12:09 +0100 Subject: [PATCH 01/14] Batched wb --- src/coreclr/inc/corinfo.h | 1 + src/coreclr/inc/jithelpers.h | 1 + src/coreclr/jit/codegencommon.cpp | 1 + src/coreclr/jit/codegenxarch.cpp | 23 ++ src/coreclr/jit/emit.cpp | 2 + src/coreclr/jit/utils.cpp | 1 + .../Common/JitInterface/CorInfoHelpFunc.cs | 1 + src/coreclr/vm/amd64/JitHelpers_Fast.asm | 221 ++++++++++++++++++ src/coreclr/vm/excep.cpp | 2 + src/coreclr/vm/jitinterface.h | 1 + 10 files changed, 254 insertions(+) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 5fad5e4b2429e4..6ee7ab3cbb0fb3 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -496,6 +496,7 @@ enum CorInfoHelpFunc CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP, // Do the store, and ensure that the target was not in the heap. CORINFO_HELP_ASSIGN_BYREF, + CORINFO_HELP_ASSIGN_BYREF_BATCH, CORINFO_HELP_ASSIGN_STRUCT, diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index a0982f3ac6520f..95d1aaf3c60af8 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -153,6 +153,7 @@ JITHELPER(CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP, JIT_WriteBarrierEnsureNonHeapTarget,CORINFO_HELP_SIG_REG_ONLY) DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF, JIT_ByRefWriteBarrier,CORINFO_HELP_SIG_NO_ALIGN_STUB) + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF_BATCH, JIT_ByRefWriteBarrierBatch,CORINFO_HELP_SIG_NO_ALIGN_STUB) JITHELPER(CORINFO_HELP_ASSIGN_STRUCT, JIT_StructWriteBarrier,CORINFO_HELP_SIG_4_STACK) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 9d7992ea4efe27..3e6b7232dd1f6f 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -632,6 +632,7 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper) return RBM_CALLEE_TRASH_WRITEBARRIER; case CORINFO_HELP_ASSIGN_BYREF: + case CORINFO_HELP_ASSIGN_BYREF_BATCH: return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF; case CORINFO_HELP_PROF_FCN_ENTER: diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 584cb3aab19bb3..cf3e4556a0e901 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4265,6 +4265,29 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) } else { +#if defined(TARGET_AMD64) + if (!compiler->opts.IsReadyToRun()) + { + unsigned gcSlotCount = 0; + do + { + gcSlotCount++; + i++; + } while ((i < slots) && layout->IsGCPtr(i)); + + if (gcSlotCount > 1) + { + instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_R8, gcSlotCount); + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF_BATCH, 0, EA_PTRSIZE); + } + else + { + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); + } + gcPtrCount -= gcSlotCount; + continue; + } +#endif genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); gcPtrCount--; i++; diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index ecbfe659be1034..20b8710cc63b86 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -2850,6 +2850,7 @@ bool emitter::emitNoGChelper(CorInfoHelpFunc helpFunc) case CORINFO_HELP_ASSIGN_REF: case CORINFO_HELP_CHECKED_ASSIGN_REF: case CORINFO_HELP_ASSIGN_BYREF: + case CORINFO_HELP_ASSIGN_BYREF_BATCH: case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR: case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR: @@ -10422,6 +10423,7 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper) break; case CORINFO_HELP_ASSIGN_BYREF: + case CORINFO_HELP_ASSIGN_BYREF_BATCH: result = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF; break; diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index aed8cda7c24df9..9370a53be1a9d4 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1747,6 +1747,7 @@ void HelperCallProperties::init() case CORINFO_HELP_CHECKED_ASSIGN_REF: case CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP: case CORINFO_HELP_ASSIGN_BYREF: + case CORINFO_HELP_ASSIGN_BYREF_BATCH: case CORINFO_HELP_ASSIGN_STRUCT: mutatesHeap = true; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 5346806c1aff60..179cba4af26e2f 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -138,6 +138,7 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP, // Do the store, and ensure that the target was not in the heap. CORINFO_HELP_ASSIGN_BYREF, + CORINFO_HELP_ASSIGN_BYREF_BATCH, CORINFO_HELP_ASSIGN_STRUCT, diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index 0f1b71b5ee93b3..cfb8a56aab6517 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -256,6 +256,227 @@ endif ret LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT + +; JIT_ByRefWriteBarrierBatch has weird semantics, see usage in StubLinkerX86.cpp +; +; Entry: +; RDI - address of ref-field (assigned to) +; RSI - address of the data (source) +; R8D - number of byrefs to write +; RCX is trashed +; RAX is trashed when FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP is defined +; Exit: +; RDI, RSI are incremented by SIZEOF(LPVOID) +; R8D is zeroed +LEAF_ENTRY JIT_ByRefWriteBarrierBatch, _TEXT + NextByref: + mov rcx, [rsi] + +; If !WRITE_BARRIER_CHECK do the write first, otherwise we might have to do some ShadowGC stuff +ifndef WRITE_BARRIER_CHECK + ; rcx is [rsi] + mov [rdi], rcx +endif + + ; When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference + ; but if it isn't then it will just return. + ; + ; See if this is in GCHeap + cmp rdi, [g_lowest_address] + jb NotInHeap + cmp rdi, [g_highest_address] + jnb NotInHeap + +ifdef WRITE_BARRIER_CHECK + ; we can only trash rcx in this function so in _DEBUG we need to save + ; some scratch registers. + push r10 + push r11 + push rax + + ; **ALSO update the shadow GC heap if that is enabled** + ; Do not perform the work if g_GCShadow is 0 + cmp g_GCShadow, 0 + je NoShadow + + ; If we end up outside of the heap don't corrupt random memory + mov r10, rdi + sub r10, [g_lowest_address] + jb NoShadow + + ; Check that our adjusted destination is somewhere in the shadow gc + add r10, [g_GCShadow] + cmp r10, [g_GCShadowEnd] + jnb NoShadow + + ; Write ref into real GC + mov [rdi], rcx + ; Write ref into shadow GC + mov [r10], rcx + + ; Ensure that the write to the shadow heap occurs before the read from + ; the GC heap so that race conditions are caught by INVALIDGCVALUE + mfence + + ; Check that GC/ShadowGC values match + mov r11, [rdi] + mov rax, [r10] + cmp rax, r11 + je DoneShadow + mov r11, INVALIDGCVALUE + mov [r10], r11 + + jmp DoneShadow + + ; If we don't have a shadow GC we won't have done the write yet + NoShadow: + mov [rdi], rcx + + ; If we had a shadow GC then we already wrote to the real GC at the same time + ; as the shadow GC so we want to jump over the real write immediately above. + ; Additionally we know for sure that we are inside the heap and therefore don't + ; need to replicate the above checks. + DoneShadow: + pop rax + pop r11 + pop r10 +endif + +ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + ; Update the write watch table if necessary + cmp byte ptr [g_sw_ww_enabled_for_gc_heap], 0h + je CheckCardTable + mov rax, rdi + shr rax, 0Ch ; SoftwareWriteWatch::AddressToTableByteIndexShift + add rax, qword ptr [g_sw_ww_table] + cmp byte ptr [rax], 0h + jne CheckCardTable + mov byte ptr [rax], 0FFh +endif + + ; See if we can just quick out + CheckCardTable: + cmp rcx, [g_ephemeral_low] + jb Exit + cmp rcx, [g_ephemeral_high] + jnb Exit + + ; do the following checks only if we are allowed to trash rax + ; otherwise we don't have enough registers +ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + mov rax, rcx + + mov cl, [g_region_shr] + test cl, cl + je SkipCheck + + ; check if the source is in gen 2 - then it's not an ephemeral pointer + shr rax, cl + add rax, [g_region_to_generation_table] + cmp byte ptr [rax], 82h + je Exit + + ; check if the destination happens to be in gen 0 + mov rax, rdi + shr rax, cl + add rax, [g_region_to_generation_table] + cmp byte ptr [rax], 0 + je Exit + SkipCheck: + + cmp [g_region_use_bitwise_write_barrier], 0 + je CheckCardTableByte + + ; compute card table bit + mov rcx, rdi + mov al, 1 + shr rcx, 8 + and cl, 7 + shl al, cl + + ; move current rdi value into rcx and then increment the pointers + mov rcx, rdi + add rsi, 8h + add rdi, 8h + + ; Check if we need to update the card table + ; Calc pCardByte + shr rcx, 0Bh + add rcx, [g_card_table] + + ; Check if this card table bit is already set + test byte ptr [rcx], al + je SetCardTableBit + dec r8d + jne NextByref + REPRET + + SetCardTableBit: + lock or byte ptr [rcx], al + jmp CheckCardBundle +endif +CheckCardTableByte: + + ; move current rdi value into rcx and then increment the pointers + mov rcx, rdi + add rsi, 8h + add rdi, 8h + + ; Check if we need to update the card table + ; Calc pCardByte + shr rcx, 0Bh + add rcx, [g_card_table] + + ; Check if this card is dirty + cmp byte ptr [rcx], 0FFh + jne UpdateCardTable + dec r8d + jne NextByref + REPRET + + UpdateCardTable: + mov byte ptr [rcx], 0FFh + + CheckCardBundle: + +ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + ; check if we need to update the card bundle table + ; restore destination address from rdi - rdi has been incremented by 8 already + lea rcx, [rdi-8] + shr rcx, 15h + add rcx, [g_card_bundle_table] + cmp byte ptr [rcx], 0FFh + jne UpdateCardBundleTable + dec r8d + jne NextByref + REPRET + + UpdateCardBundleTable: + mov byte ptr [rcx], 0FFh +endif + dec r8d + jne NextByref + ret + + align 16 + NotInHeap: +; If WRITE_BARRIER_CHECK then we won't have already done the mov and should do it here +; If !WRITE_BARRIER_CHECK we want _NotInHeap and _Leave to be the same and have both +; 16 byte aligned. +ifdef WRITE_BARRIER_CHECK + ; rcx is [rsi] + mov [rdi], rcx +endif + Exit: + ; Increment the pointers before leaving + add rdi, 8h + add rsi, 8h + dec r8d + jne NextByref + ret +LEAF_END_MARKED JIT_ByRefWriteBarrierBatch, _TEXT + + Section segment para 'DATA' align 16 diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 6e82507cfb7bfb..fb0480acc8b510 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6295,6 +6295,7 @@ EXTERN_C void JIT_StackProbe_End(); EXTERN_C void JIT_WriteBarrier_End(); EXTERN_C void JIT_CheckedWriteBarrier_End(); EXTERN_C void JIT_ByRefWriteBarrier_End(); +EXTERN_C void JIT_ByRefWriteBarrierBatch_End(); #endif // TARGET_X86 #if defined(TARGET_AMD64) && defined(_DEBUG) @@ -6345,6 +6346,7 @@ bool IsIPInMarkedJitHelper(UINT_PTR uControlPc) CHECK_RANGE(JIT_WriteBarrier) CHECK_RANGE(JIT_CheckedWriteBarrier) CHECK_RANGE(JIT_ByRefWriteBarrier) + CHECK_RANGE(JIT_ByRefWriteBarrierBatch) #if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) CHECK_RANGE(JIT_StackProbe) #endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index bbca5c355fbb97..2358839af1e039 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -389,6 +389,7 @@ extern "C" #endif // TARGET_X86 void STDCALL JIT_ByRefWriteBarrier(); // JIThelp.asm/JIThelp.s + void STDCALL JIT_ByRefWriteBarrierBatch(); // JIThelp.asm/JIThelp.s #if defined(TARGET_AMD64) || defined(TARGET_ARM) From 2565a74f7621b3dd461f2db26b0867ab9103d69c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 29 Feb 2024 04:25:14 +0100 Subject: [PATCH 02/14] only for windows --- src/coreclr/jit/codegenxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index cf3e4556a0e901..e30641b814f9bd 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4266,7 +4266,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) else { #if defined(TARGET_AMD64) - if (!compiler->opts.IsReadyToRun()) + if (!compiler->opts.IsReadyToRun() && TargetOS::IsWindows) { unsigned gcSlotCount = 0; do From 8bc4ee39e6e057d6ad0e5912c460625c11383f66 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 29 Feb 2024 04:45:27 +0100 Subject: [PATCH 03/14] fix build --- src/coreclr/vm/excep.cpp | 4 ++++ src/coreclr/vm/jitinterface.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index fb0480acc8b510..b0ee75743345b3 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6295,7 +6295,9 @@ EXTERN_C void JIT_StackProbe_End(); EXTERN_C void JIT_WriteBarrier_End(); EXTERN_C void JIT_CheckedWriteBarrier_End(); EXTERN_C void JIT_ByRefWriteBarrier_End(); +#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) EXTERN_C void JIT_ByRefWriteBarrierBatch_End(); +#endif // TARGET_AMD64 && TARGET_WINDOWS #endif // TARGET_X86 #if defined(TARGET_AMD64) && defined(_DEBUG) @@ -6346,7 +6348,9 @@ bool IsIPInMarkedJitHelper(UINT_PTR uControlPc) CHECK_RANGE(JIT_WriteBarrier) CHECK_RANGE(JIT_CheckedWriteBarrier) CHECK_RANGE(JIT_ByRefWriteBarrier) +#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) CHECK_RANGE(JIT_ByRefWriteBarrierBatch) +#endif // TARGET_AMD64 && TARGET_WINDOWS #if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) CHECK_RANGE(JIT_StackProbe) #endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64 diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index 2358839af1e039..594dde587b29fd 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -389,7 +389,10 @@ extern "C" #endif // TARGET_X86 void STDCALL JIT_ByRefWriteBarrier(); // JIThelp.asm/JIThelp.s + +#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) void STDCALL JIT_ByRefWriteBarrierBatch(); // JIThelp.asm/JIThelp.s +#endif // TARGET_AMD64 && TARGET_WINDOWS #if defined(TARGET_AMD64) || defined(TARGET_ARM) From 8c0dd225734d9b62d1f55e684d12c3659a69797f Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 29 Feb 2024 04:57:50 +0100 Subject: [PATCH 04/14] Fix build again --- src/coreclr/inc/jithelpers.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index 95d1aaf3c60af8..f918f2709f03f8 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -153,7 +153,9 @@ JITHELPER(CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP, JIT_WriteBarrierEnsureNonHeapTarget,CORINFO_HELP_SIG_REG_ONLY) DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF, JIT_ByRefWriteBarrier,CORINFO_HELP_SIG_NO_ALIGN_STUB) +#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF_BATCH, JIT_ByRefWriteBarrierBatch,CORINFO_HELP_SIG_NO_ALIGN_STUB) +#endif JITHELPER(CORINFO_HELP_ASSIGN_STRUCT, JIT_StructWriteBarrier,CORINFO_HELP_SIG_4_STACK) From 03da66259251f5628ebe49e4dc6eb54dd770358d Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Thu, 29 Feb 2024 05:15:29 +0100 Subject: [PATCH 05/14] FIX BUILD --- src/coreclr/inc/jithelpers.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index f918f2709f03f8..e7d0998c773024 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -155,6 +155,8 @@ DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF, JIT_ByRefWriteBarrier,CORINFO_HELP_SIG_NO_ALIGN_STUB) #if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF_BATCH, JIT_ByRefWriteBarrierBatch,CORINFO_HELP_SIG_NO_ALIGN_STUB) +#else + DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF_BATCH, NULL,CORINFO_HELP_SIG_NO_ALIGN_STUB) #endif JITHELPER(CORINFO_HELP_ASSIGN_STRUCT, JIT_StructWriteBarrier,CORINFO_HELP_SIG_4_STACK) From b909b4900a49de42f4e859918a0e33b895fc632d Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 29 Feb 2024 13:09:21 +0100 Subject: [PATCH 06/14] optimize nonheap case, bump guid --- src/coreclr/inc/jiteeversionguid.h | 10 +++++----- src/coreclr/vm/amd64/JitHelpers_Fast.asm | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 11675936acfa37..c1799ef65fde00 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 86eab154-5d93-4fad-bc07-e94fd9268b70 */ - 0x86eab154, - 0x5d93, - 0x4fad, - {0xbc, 0x07, 0xe9, 0x4f, 0xd9, 0x26, 0x8b, 0x70} +constexpr GUID JITEEVersionIdentifier = { /* 121a4c9e-57c9-4e20-a02b-713cd8fd1ecc */ + 0x121a4c9e, + 0x57c9, + 0x4e20, + {0xa0, 0x2b, 0x71, 0x3c, 0xd8, 0xfd, 0x1e, 0xcc} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index cfb8a56aab6517..d2e83b9c1a4b0c 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -467,6 +467,22 @@ ifdef WRITE_BARRIER_CHECK ; rcx is [rsi] mov [rdi], rcx endif + ; At least one write is already done, increment the pointers + add rdi, 8h + add rsi, 8h + dec r8d + je NotInHeapExit + ; Now we can do the rest of the writes without checking the heap + NextByrefUnchecked: + mov rcx, [rsi] + mov [rdi], rcx + add rdi, 8h + add rsi, 8h + dec r8d + jne NextByrefUnchecked + NotInHeapExit: + ret + Exit: ; Increment the pointers before leaving add rdi, 8h From 2c875557ee6651d5e9aa73e5f7da0a9ae83d7872 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 29 Feb 2024 13:16:52 +0100 Subject: [PATCH 07/14] Clean up jit part --- src/coreclr/jit/codegenxarch.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index e30641b814f9bd..d3270d02cbdd54 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4268,24 +4268,25 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) #if defined(TARGET_AMD64) if (!compiler->opts.IsReadyToRun() && TargetOS::IsWindows) { + // How many continuous GC slots we have? unsigned gcSlotCount = 0; + unsigned j = i; do { gcSlotCount++; - i++; - } while ((i < slots) && layout->IsGCPtr(i)); + j++; + } while ((j < slots) && layout->IsGCPtr(j)); + // if more than 1, call the batch version if (gcSlotCount > 1) { + // Number of continuous GC slots is passed in R8 instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_R8, gcSlotCount); genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF_BATCH, 0, EA_PTRSIZE); + gcPtrCount -= gcSlotCount; + i += gcSlotCount; + continue; } - else - { - genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); - } - gcPtrCount -= gcSlotCount; - continue; } #endif genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); From 6170bba127709ad73c24380dd7b11515ddf37f77 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 29 Feb 2024 14:54:54 +0100 Subject: [PATCH 08/14] Clean up jit part --- src/coreclr/jit/codegenxarch.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index d3270d02cbdd54..c24a0823573b50 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4277,10 +4277,12 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) j++; } while ((j < slots) && layout->IsGCPtr(j)); - // if more than 1, call the batch version + // Use a batched version of write-barrier if there are more than 1 continuous GC slots if (gcSlotCount > 1) { // Number of continuous GC slots is passed in R8 + assert((genRegMask(REG_R8) & (RBM_INT_CALLEE_TRASH)) == genRegMask(REG_R8)); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_R8, gcSlotCount); genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF_BATCH, 0, EA_PTRSIZE); gcPtrCount -= gcSlotCount; From cd7c2a2fa1943f82f0f1dbcd3885c0e9e7f3fe3c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 29 Feb 2024 16:53:34 +0100 Subject: [PATCH 09/14] Max size of a batch --- src/coreclr/jit/codegenxarch.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index c24a0823573b50..65ac6d864d0add 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4277,6 +4277,10 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) j++; } while ((j < slots) && layout->IsGCPtr(j)); + // Limit the max size of a batch, we don't want to get stuck in the write-barrier + // moving a huge batch while GC is suspending threads. + gcSlotCount = min(gcSlotCount, 256); + // Use a batched version of write-barrier if there are more than 1 continuous GC slots if (gcSlotCount > 1) { From 23e42bb13a1b801bc8482e1ada834feba60bdd96 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 29 Feb 2024 17:58:10 +0100 Subject: [PATCH 10/14] Enable for Unix-x64 --- src/coreclr/inc/jithelpers.h | 2 +- src/coreclr/jit/codegenxarch.cpp | 2 +- src/coreclr/vm/amd64/JitHelpers_Fast.asm | 13 +- src/coreclr/vm/amd64/jithelpers_fast.S | 245 +++++++++++++++++++++++ src/coreclr/vm/excep.cpp | 4 +- src/coreclr/vm/jitinterface.h | 2 +- 6 files changed, 260 insertions(+), 8 deletions(-) diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index e7d0998c773024..30a90de33cea44 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -153,7 +153,7 @@ JITHELPER(CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP, JIT_WriteBarrierEnsureNonHeapTarget,CORINFO_HELP_SIG_REG_ONLY) DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF, JIT_ByRefWriteBarrier,CORINFO_HELP_SIG_NO_ALIGN_STUB) -#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) +#if defined(TARGET_AMD64) DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF_BATCH, JIT_ByRefWriteBarrierBatch,CORINFO_HELP_SIG_NO_ALIGN_STUB) #else DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF_BATCH, NULL,CORINFO_HELP_SIG_NO_ALIGN_STUB) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 65ac6d864d0add..79d7c44d3901d7 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4266,7 +4266,7 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) else { #if defined(TARGET_AMD64) - if (!compiler->opts.IsReadyToRun() && TargetOS::IsWindows) + if (!compiler->opts.IsReadyToRun()) { // How many continuous GC slots we have? unsigned gcSlotCount = 0; diff --git a/src/coreclr/vm/amd64/JitHelpers_Fast.asm b/src/coreclr/vm/amd64/JitHelpers_Fast.asm index d2e83b9c1a4b0c..e96b2d017d2630 100644 --- a/src/coreclr/vm/amd64/JitHelpers_Fast.asm +++ b/src/coreclr/vm/amd64/JitHelpers_Fast.asm @@ -52,6 +52,8 @@ extern JIT_InternalThrow:proc ; JIT_ByRefWriteBarrier has weird semantics, see usage in StubLinkerX86.cpp ; +; Keep in sync with JIT_ByRefWriteBarrierBatch!! +; ; Entry: ; RDI - address of ref-field (assigned to) ; RSI - address of the data (source) @@ -257,17 +259,17 @@ endif LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT -; JIT_ByRefWriteBarrierBatch has weird semantics, see usage in StubLinkerX86.cpp +; JIT_ByRefWriteBarrierBatch is a batch version of JIT_ByRefWriteBarrier, so see comments there first ; ; Entry: ; RDI - address of ref-field (assigned to) ; RSI - address of the data (source) -; R8D - number of byrefs to write +; R8 - number of byrefs to write ; RCX is trashed ; RAX is trashed when FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP is defined ; Exit: ; RDI, RSI are incremented by SIZEOF(LPVOID) -; R8D is zeroed +; R8 is zeroed LEAF_ENTRY JIT_ByRefWriteBarrierBatch, _TEXT NextByref: mov rcx, [rsi] @@ -407,6 +409,7 @@ ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP ; Check if this card table bit is already set test byte ptr [rcx], al je SetCardTableBit + ; Check if we have more in the batch and run again dec r8d jne NextByref REPRET @@ -430,6 +433,7 @@ CheckCardTableByte: ; Check if this card is dirty cmp byte ptr [rcx], 0FFh jne UpdateCardTable + ; Check if we have more in the batch and run again dec r8d jne NextByref REPRET @@ -447,6 +451,7 @@ ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES add rcx, [g_card_bundle_table] cmp byte ptr [rcx], 0FFh jne UpdateCardBundleTable + ; Check if we have more in the batch and run again dec r8d jne NextByref REPRET @@ -454,6 +459,7 @@ ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES UpdateCardBundleTable: mov byte ptr [rcx], 0FFh endif + ; Check if we have more in the batch and run again dec r8d jne NextByref ret @@ -487,6 +493,7 @@ endif ; Increment the pointers before leaving add rdi, 8h add rsi, 8h + ; Check if we have more in the batch and run again dec r8d jne NextByref ret diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index 3a2d803a1460fb..d0e37307a1c101 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -16,6 +16,7 @@ // except RDI and RSI. This helper uses and defines RDI and RSI, so // they remain as live GC refs or byrefs, and are not killed. // +// Keep in sync with JIT_ByRefWriteBarrierBatch!! // // RCX is trashed // RAX is trashed @@ -219,6 +220,250 @@ LEAF_ENTRY JIT_ByRefWriteBarrier, _TEXT ret LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT + +// JIT_ByRefWriteBarrierBatch is a batch version of JIT_ByRefWriteBarrier, so see comments there first +// +// Entry: +// RDI - address of ref-field (assigned to) +// RSI - address of the data (source) +// R8 - number of byrefs to write +// +// RCX is trashed +// RAX is trashed +// R10 is trashed +// R11 is trashed on Debug build +// Exit: +// RDI, RSI are incremented by SIZEOF(LPVOID) +// R8 is zeroed +LEAF_ENTRY JIT_ByRefWriteBarrierBatch, _TEXT + LOCAL_LABEL(NextByref_ByRefWriteBarrier): + mov rcx, [rsi] + +// If !WRITE_BARRIER_CHECK do the write first, otherwise we might have to do some ShadowGC stuff +#ifndef WRITE_BARRIER_CHECK + // rcx is [rsi] + mov [rdi], rcx +#endif + + // When WRITE_BARRIER_CHECK is defined _NotInHeap will write the reference + // but if it isn't then it will just return. + // + // See if this is in GCHeap + PREPARE_EXTERNAL_VAR g_lowest_address, rax + cmp rdi, [rax] + jb LOCAL_LABEL(NotInHeap_ByRefWriteBarrier) + PREPARE_EXTERNAL_VAR g_highest_address, rax + cmp rdi, [rax] + jnb LOCAL_LABEL(NotInHeap_ByRefWriteBarrier) + +#ifdef WRITE_BARRIER_CHECK + // **ALSO update the shadow GC heap if that is enabled** + // Do not perform the work if g_GCShadow is 0 + PREPARE_EXTERNAL_VAR g_GCShadow, rax + cmp qword ptr [rax], 0 + je LOCAL_LABEL(NoShadow_ByRefWriteBarrier) + + // If we end up outside of the heap don't corrupt random memory + mov r10, rdi + PREPARE_EXTERNAL_VAR g_lowest_address, rax + sub r10, [rax] + jb LOCAL_LABEL(NoShadow_ByRefWriteBarrier) + + // Check that our adjusted destination is somewhere in the shadow gc + PREPARE_EXTERNAL_VAR g_GCShadow, rax + add r10, [rax] + PREPARE_EXTERNAL_VAR g_GCShadowEnd, rax + cmp r10, [rax] + jnb LOCAL_LABEL(NoShadow_ByRefWriteBarrier) + + // Write ref into real GC + mov [rdi], rcx + // Write ref into shadow GC + mov [r10], rcx + + // Ensure that the write to the shadow heap occurs before the read from + // the GC heap so that race conditions are caught by INVALIDGCVALUE + mfence + + // Check that GC/ShadowGC values match + mov r11, [rdi] + mov rax, [r10] + cmp rax, r11 + je LOCAL_LABEL(DoneShadow_ByRefWriteBarrier) + movabs r11, INVALIDGCVALUE + mov [r10], r11 + + jmp LOCAL_LABEL(DoneShadow_ByRefWriteBarrier) + + // If we don't have a shadow GC we won't have done the write yet + LOCAL_LABEL(NoShadow_ByRefWriteBarrier): + mov [rdi], rcx + + // If we had a shadow GC then we already wrote to the real GC at the same time + // as the shadow GC so we want to jump over the real write immediately above. + // Additionally we know for sure that we are inside the heap and therefore don't + // need to replicate the above checks. + LOCAL_LABEL(DoneShadow_ByRefWriteBarrier): +#endif + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // Update the write watch table if necessary + PREPARE_EXTERNAL_VAR g_sw_ww_enabled_for_gc_heap, rax + cmp byte ptr [rax], 0x0 + je LOCAL_LABEL(CheckCardTable_ByRefWriteBarrier) + mov rax, rdi + shr rax, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift + PREPARE_EXTERNAL_VAR g_sw_ww_table, r10 + add rax, qword ptr [r10] + cmp byte ptr [rax], 0x0 + jne LOCAL_LABEL(CheckCardTable_ByRefWriteBarrier) + mov byte ptr [rax], 0xFF +#endif + + LOCAL_LABEL(CheckCardTable_ByRefWriteBarrier): + // See if we can just quick out + PREPARE_EXTERNAL_VAR g_ephemeral_low, rax + cmp rcx, [rax] + jb LOCAL_LABEL(Exit_ByRefWriteBarrier) + PREPARE_EXTERNAL_VAR g_ephemeral_high, rax + cmp rcx, [rax] + jnb LOCAL_LABEL(Exit_ByRefWriteBarrier) + + mov rax, rcx + + PREPARE_EXTERNAL_VAR g_region_shr, rcx + mov cl, [rcx] + test cl, cl + je LOCAL_LABEL(SkipCheck_ByRefWriteBarrier) + + // check if the source is in gen 2 - then it's not an ephemeral pointer + shr rax, cl + PREPARE_EXTERNAL_VAR g_region_to_generation_table, r10 + mov r10, [r10] + cmp byte ptr [rax + r10], 0x82 + je LOCAL_LABEL(Exit_ByRefWriteBarrier) + + // check if the destination happens to be in gen 0 + mov rax, rdi + shr rax, cl + cmp byte ptr [rax + r10], 0 + je LOCAL_LABEL(Exit_ByRefWriteBarrier) + LOCAL_LABEL(SkipCheck_ByRefWriteBarrier): + + PREPARE_EXTERNAL_VAR g_card_table, r10 + mov r10, [r10] + + PREPARE_EXTERNAL_VAR g_region_use_bitwise_write_barrier, rax + cmp byte ptr [rax], 0 + je LOCAL_LABEL(CheckCardTableByte_ByRefWriteBarrier) + + // compute card table bit + mov ecx, edi + mov al, 1 + shr ecx, 8 + and cl, 7 + shl al, cl + + // move current rdi value into rcx and then increment the pointers + mov rcx, rdi + add rsi, 0x8 + add rdi, 0x8 + + // Check if we need to update the card table + // Calc pCardByte + shr rcx, 0xB + // Check if this card table bit is already set + test byte ptr [rcx + r10], al + je LOCAL_LABEL(SetCardTableBit_ByRefWriteBarrier) + // Check if we have more in the batch and run again + dec r8d + jne LOCAL_LABEL(NextByref_ByRefWriteBarrier) + REPRET + + LOCAL_LABEL(SetCardTableBit_ByRefWriteBarrier): + lock or byte ptr [rcx + r10], al + + jmp LOCAL_LABEL(CheckCardBundle_ByRefWriteBarrier) + + LOCAL_LABEL(CheckCardTableByte_ByRefWriteBarrier): + // move current rdi value into rcx and then increment the pointers + mov rcx, rdi + add rsi, 0x8 + add rdi, 0x8 + + shr rcx, 0xB + cmp byte ptr [rcx + r10], 0xFF + jne LOCAL_LABEL(SetCardTableByte_ByRefWriteBarrier) + // Check if we have more in the batch and run again + dec r8d + jne LOCAL_LABEL(NextByref_ByRefWriteBarrier) + REPRET + LOCAL_LABEL(SetCardTableByte_ByRefWriteBarrier): + mov byte ptr [rcx + r10], 0xFF + + LOCAL_LABEL(CheckCardBundle_ByRefWriteBarrier): + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + // Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already) + shr rcx, 0x0A + + PREPARE_EXTERNAL_VAR g_card_bundle_table, rax + add rcx, [rax] + + // Check if this bundle byte is dirty + cmp byte ptr [rcx], 0xFF + + jne LOCAL_LABEL(UpdateCardBundle_ByRefWriteBarrier) + // Check if we have more in the batch and run again + dec r8d + jne LOCAL_LABEL(NextByref_ByRefWriteBarrier) + REPRET + + LOCAL_LABEL(UpdateCardBundle_ByRefWriteBarrier): + mov byte ptr [rcx], 0xFF +#endif + // Check if we have more in the batch and run again + dec r8d + jne LOCAL_LABEL(NextByref_ByRefWriteBarrier) + ret + + .balign 16 + LOCAL_LABEL(NotInHeap_ByRefWriteBarrier): +// If WRITE_BARRIER_CHECK then we won't have already done the mov and should do it here +// If !WRITE_BARRIER_CHECK we want _NotInHeap and _Leave to be the same and have both +// 16 byte aligned. +#ifdef WRITE_BARRIER_CHECK + // rcx is [rsi] + mov [rdi], rcx +#endif + + // At least one write is already done, increment the pointers + add rdi, 8h + add rsi, 8h + dec r8d + je LOCAL_LABEL(NotInHeapExit) + // Now we can do the rest of the writes without checking the heap + LOCAL_LABEL(NextByrefUnchecked): + mov rcx, [rsi] + mov [rdi], rcx + add rdi, 8h + add rsi, 8h + dec r8d + jne LOCAL_LABEL(NextByrefUnchecked) + LOCAL_LABEL(NotInHeapExit): + ret + + LOCAL_LABEL(Exit_ByRefWriteBarrier): + // Increment the pointers before leaving + add rdi, 0x8 + add rsi, 0x8 + // Check if we have more in the batch and run again + dec r8d + jne LOCAL_LABEL(NextByref_ByRefWriteBarrier) + ret +LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT + + // When JIT_WriteBarrier is copied into an allocated page, // helpers use this global variable to jump to it. This variable is set in InitThreadManager. .global C_FUNC(JIT_WriteBarrier_Loc) diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index b0ee75743345b3..815f311bb4b9bb 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6295,7 +6295,7 @@ EXTERN_C void JIT_StackProbe_End(); EXTERN_C void JIT_WriteBarrier_End(); EXTERN_C void JIT_CheckedWriteBarrier_End(); EXTERN_C void JIT_ByRefWriteBarrier_End(); -#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) +#if defined(TARGET_AMD64) EXTERN_C void JIT_ByRefWriteBarrierBatch_End(); #endif // TARGET_AMD64 && TARGET_WINDOWS #endif // TARGET_X86 @@ -6348,7 +6348,7 @@ bool IsIPInMarkedJitHelper(UINT_PTR uControlPc) CHECK_RANGE(JIT_WriteBarrier) CHECK_RANGE(JIT_CheckedWriteBarrier) CHECK_RANGE(JIT_ByRefWriteBarrier) -#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) +#if defined(TARGET_AMD64) CHECK_RANGE(JIT_ByRefWriteBarrierBatch) #endif // TARGET_AMD64 && TARGET_WINDOWS #if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index 594dde587b29fd..19fdfc136c40eb 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -390,7 +390,7 @@ extern "C" void STDCALL JIT_ByRefWriteBarrier(); // JIThelp.asm/JIThelp.s -#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) +#if defined(TARGET_AMD64) void STDCALL JIT_ByRefWriteBarrierBatch(); // JIThelp.asm/JIThelp.s #endif // TARGET_AMD64 && TARGET_WINDOWS From 2f9019290a7e01a165e63bae6a880b28c0505686 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Thu, 29 Feb 2024 18:08:53 +0100 Subject: [PATCH 11/14] 8h -> 0x8 (fixes build on linux) --- src/coreclr/vm/amd64/jithelpers_fast.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index d0e37307a1c101..26feba957ee5cb 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -438,16 +438,16 @@ LEAF_ENTRY JIT_ByRefWriteBarrierBatch, _TEXT #endif // At least one write is already done, increment the pointers - add rdi, 8h - add rsi, 8h + add rdi, 0x8 + add rsi, 0x8 dec r8d je LOCAL_LABEL(NotInHeapExit) // Now we can do the rest of the writes without checking the heap LOCAL_LABEL(NextByrefUnchecked): mov rcx, [rsi] mov [rdi], rcx - add rdi, 8h - add rsi, 8h + add rdi, 0x8 + add rsi, 0x8 dec r8d jne LOCAL_LABEL(NextByrefUnchecked) LOCAL_LABEL(NotInHeapExit): From a96af65aff59a937c918679fc20b0b216604cfe4 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 29 Feb 2024 19:05:51 +0100 Subject: [PATCH 12/14] Fix build on unix --- src/coreclr/vm/amd64/jithelpers_fast.S | 75 +++++++++++++------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/src/coreclr/vm/amd64/jithelpers_fast.S b/src/coreclr/vm/amd64/jithelpers_fast.S index 26feba957ee5cb..1115d8f9405368 100644 --- a/src/coreclr/vm/amd64/jithelpers_fast.S +++ b/src/coreclr/vm/amd64/jithelpers_fast.S @@ -236,7 +236,7 @@ LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT // RDI, RSI are incremented by SIZEOF(LPVOID) // R8 is zeroed LEAF_ENTRY JIT_ByRefWriteBarrierBatch, _TEXT - LOCAL_LABEL(NextByref_ByRefWriteBarrier): + LOCAL_LABEL(NextByref_ByRefWriteBarrierBatch): mov rcx, [rsi] // If !WRITE_BARRIER_CHECK do the write first, otherwise we might have to do some ShadowGC stuff @@ -251,30 +251,30 @@ LEAF_ENTRY JIT_ByRefWriteBarrierBatch, _TEXT // See if this is in GCHeap PREPARE_EXTERNAL_VAR g_lowest_address, rax cmp rdi, [rax] - jb LOCAL_LABEL(NotInHeap_ByRefWriteBarrier) + jb LOCAL_LABEL(NotInHeap_ByRefWriteBarrierBatch) PREPARE_EXTERNAL_VAR g_highest_address, rax cmp rdi, [rax] - jnb LOCAL_LABEL(NotInHeap_ByRefWriteBarrier) + jnb LOCAL_LABEL(NotInHeap_ByRefWriteBarrierBatch) #ifdef WRITE_BARRIER_CHECK // **ALSO update the shadow GC heap if that is enabled** // Do not perform the work if g_GCShadow is 0 PREPARE_EXTERNAL_VAR g_GCShadow, rax cmp qword ptr [rax], 0 - je LOCAL_LABEL(NoShadow_ByRefWriteBarrier) + je LOCAL_LABEL(NoShadow_ByRefWriteBarrierBatch) // If we end up outside of the heap don't corrupt random memory mov r10, rdi PREPARE_EXTERNAL_VAR g_lowest_address, rax sub r10, [rax] - jb LOCAL_LABEL(NoShadow_ByRefWriteBarrier) + jb LOCAL_LABEL(NoShadow_ByRefWriteBarrierBatch) // Check that our adjusted destination is somewhere in the shadow gc PREPARE_EXTERNAL_VAR g_GCShadow, rax add r10, [rax] PREPARE_EXTERNAL_VAR g_GCShadowEnd, rax cmp r10, [rax] - jnb LOCAL_LABEL(NoShadow_ByRefWriteBarrier) + jnb LOCAL_LABEL(NoShadow_ByRefWriteBarrierBatch) // Write ref into real GC mov [rdi], rcx @@ -289,73 +289,73 @@ LEAF_ENTRY JIT_ByRefWriteBarrierBatch, _TEXT mov r11, [rdi] mov rax, [r10] cmp rax, r11 - je LOCAL_LABEL(DoneShadow_ByRefWriteBarrier) + je LOCAL_LABEL(DoneShadow_ByRefWriteBarrierBatch) movabs r11, INVALIDGCVALUE mov [r10], r11 - jmp LOCAL_LABEL(DoneShadow_ByRefWriteBarrier) + jmp LOCAL_LABEL(DoneShadow_ByRefWriteBarrierBatch) // If we don't have a shadow GC we won't have done the write yet - LOCAL_LABEL(NoShadow_ByRefWriteBarrier): + LOCAL_LABEL(NoShadow_ByRefWriteBarrierBatch): mov [rdi], rcx // If we had a shadow GC then we already wrote to the real GC at the same time // as the shadow GC so we want to jump over the real write immediately above. // Additionally we know for sure that we are inside the heap and therefore don't // need to replicate the above checks. - LOCAL_LABEL(DoneShadow_ByRefWriteBarrier): + LOCAL_LABEL(DoneShadow_ByRefWriteBarrierBatch): #endif #ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP // Update the write watch table if necessary PREPARE_EXTERNAL_VAR g_sw_ww_enabled_for_gc_heap, rax cmp byte ptr [rax], 0x0 - je LOCAL_LABEL(CheckCardTable_ByRefWriteBarrier) + je LOCAL_LABEL(CheckCardTable_ByRefWriteBarrierBatch) mov rax, rdi shr rax, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift PREPARE_EXTERNAL_VAR g_sw_ww_table, r10 add rax, qword ptr [r10] cmp byte ptr [rax], 0x0 - jne LOCAL_LABEL(CheckCardTable_ByRefWriteBarrier) + jne LOCAL_LABEL(CheckCardTable_ByRefWriteBarrierBatch) mov byte ptr [rax], 0xFF #endif - LOCAL_LABEL(CheckCardTable_ByRefWriteBarrier): + LOCAL_LABEL(CheckCardTable_ByRefWriteBarrierBatch): // See if we can just quick out PREPARE_EXTERNAL_VAR g_ephemeral_low, rax cmp rcx, [rax] - jb LOCAL_LABEL(Exit_ByRefWriteBarrier) + jb LOCAL_LABEL(Exit_ByRefWriteBarrierBatch) PREPARE_EXTERNAL_VAR g_ephemeral_high, rax cmp rcx, [rax] - jnb LOCAL_LABEL(Exit_ByRefWriteBarrier) + jnb LOCAL_LABEL(Exit_ByRefWriteBarrierBatch) mov rax, rcx PREPARE_EXTERNAL_VAR g_region_shr, rcx mov cl, [rcx] test cl, cl - je LOCAL_LABEL(SkipCheck_ByRefWriteBarrier) + je LOCAL_LABEL(SkipCheck_ByRefWriteBarrierBatch) // check if the source is in gen 2 - then it's not an ephemeral pointer shr rax, cl PREPARE_EXTERNAL_VAR g_region_to_generation_table, r10 mov r10, [r10] cmp byte ptr [rax + r10], 0x82 - je LOCAL_LABEL(Exit_ByRefWriteBarrier) + je LOCAL_LABEL(Exit_ByRefWriteBarrierBatch) // check if the destination happens to be in gen 0 mov rax, rdi shr rax, cl cmp byte ptr [rax + r10], 0 - je LOCAL_LABEL(Exit_ByRefWriteBarrier) - LOCAL_LABEL(SkipCheck_ByRefWriteBarrier): + je LOCAL_LABEL(Exit_ByRefWriteBarrierBatch) + LOCAL_LABEL(SkipCheck_ByRefWriteBarrierBatch): PREPARE_EXTERNAL_VAR g_card_table, r10 mov r10, [r10] PREPARE_EXTERNAL_VAR g_region_use_bitwise_write_barrier, rax cmp byte ptr [rax], 0 - je LOCAL_LABEL(CheckCardTableByte_ByRefWriteBarrier) + je LOCAL_LABEL(CheckCardTableByte_ByRefWriteBarrierBatch) // compute card table bit mov ecx, edi @@ -374,18 +374,18 @@ LEAF_ENTRY JIT_ByRefWriteBarrierBatch, _TEXT shr rcx, 0xB // Check if this card table bit is already set test byte ptr [rcx + r10], al - je LOCAL_LABEL(SetCardTableBit_ByRefWriteBarrier) + je LOCAL_LABEL(SetCardTableBit_ByRefWriteBarrierBatch) // Check if we have more in the batch and run again dec r8d - jne LOCAL_LABEL(NextByref_ByRefWriteBarrier) + jne LOCAL_LABEL(NextByref_ByRefWriteBarrierBatch) REPRET - LOCAL_LABEL(SetCardTableBit_ByRefWriteBarrier): + LOCAL_LABEL(SetCardTableBit_ByRefWriteBarrierBatch): lock or byte ptr [rcx + r10], al - jmp LOCAL_LABEL(CheckCardBundle_ByRefWriteBarrier) + jmp LOCAL_LABEL(CheckCardBundle_ByRefWriteBarrierBatch) - LOCAL_LABEL(CheckCardTableByte_ByRefWriteBarrier): + LOCAL_LABEL(CheckCardTableByte_ByRefWriteBarrierBatch): // move current rdi value into rcx and then increment the pointers mov rcx, rdi add rsi, 0x8 @@ -393,15 +393,15 @@ LEAF_ENTRY JIT_ByRefWriteBarrierBatch, _TEXT shr rcx, 0xB cmp byte ptr [rcx + r10], 0xFF - jne LOCAL_LABEL(SetCardTableByte_ByRefWriteBarrier) + jne LOCAL_LABEL(SetCardTableByte_ByRefWriteBarrierBatch) // Check if we have more in the batch and run again dec r8d - jne LOCAL_LABEL(NextByref_ByRefWriteBarrier) + jne LOCAL_LABEL(NextByref_ByRefWriteBarrierBatch) REPRET - LOCAL_LABEL(SetCardTableByte_ByRefWriteBarrier): + LOCAL_LABEL(SetCardTableByte_ByRefWriteBarrierBatch): mov byte ptr [rcx + r10], 0xFF - LOCAL_LABEL(CheckCardBundle_ByRefWriteBarrier): + LOCAL_LABEL(CheckCardBundle_ByRefWriteBarrierBatch): #ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES // Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already) @@ -413,22 +413,22 @@ LEAF_ENTRY JIT_ByRefWriteBarrierBatch, _TEXT // Check if this bundle byte is dirty cmp byte ptr [rcx], 0xFF - jne LOCAL_LABEL(UpdateCardBundle_ByRefWriteBarrier) + jne LOCAL_LABEL(UpdateCardBundle_ByRefWriteBarrierBatch) // Check if we have more in the batch and run again dec r8d - jne LOCAL_LABEL(NextByref_ByRefWriteBarrier) + jne LOCAL_LABEL(NextByref_ByRefWriteBarrierBatch) REPRET - LOCAL_LABEL(UpdateCardBundle_ByRefWriteBarrier): + LOCAL_LABEL(UpdateCardBundle_ByRefWriteBarrierBatch): mov byte ptr [rcx], 0xFF #endif // Check if we have more in the batch and run again dec r8d - jne LOCAL_LABEL(NextByref_ByRefWriteBarrier) + jne LOCAL_LABEL(NextByref_ByRefWriteBarrierBatch) ret .balign 16 - LOCAL_LABEL(NotInHeap_ByRefWriteBarrier): + LOCAL_LABEL(NotInHeap_ByRefWriteBarrierBatch): // If WRITE_BARRIER_CHECK then we won't have already done the mov and should do it here // If !WRITE_BARRIER_CHECK we want _NotInHeap and _Leave to be the same and have both // 16 byte aligned. @@ -453,16 +453,15 @@ LEAF_ENTRY JIT_ByRefWriteBarrierBatch, _TEXT LOCAL_LABEL(NotInHeapExit): ret - LOCAL_LABEL(Exit_ByRefWriteBarrier): + LOCAL_LABEL(Exit_ByRefWriteBarrierBatch): // Increment the pointers before leaving add rdi, 0x8 add rsi, 0x8 // Check if we have more in the batch and run again dec r8d - jne LOCAL_LABEL(NextByref_ByRefWriteBarrier) + jne LOCAL_LABEL(NextByref_ByRefWriteBarrierBatch) ret -LEAF_END_MARKED JIT_ByRefWriteBarrier, _TEXT - +LEAF_END_MARKED JIT_ByRefWriteBarrierBatch, _TEXT // When JIT_WriteBarrier is copied into an allocated page, // helpers use this global variable to jump to it. This variable is set in InitThreadManager. From 810777b303cf50c88999b4b9c4b68e05f0ffe042 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 1 Mar 2024 00:31:06 +0100 Subject: [PATCH 13/14] NAOT/R2R for x64 --- src/coreclr/inc/jiteeversionguid.h | 10 +- src/coreclr/inc/readytorun.h | 4 +- src/coreclr/inc/readytorunhelpers.h | 1 + src/coreclr/jit/codegenxarch.cpp | 45 ++++---- src/coreclr/nativeaot/Runtime/EHHelpers.cpp | 9 ++ .../nativeaot/Runtime/amd64/WriteBarriers.S | 102 +++++++++++++++++ .../nativeaot/Runtime/amd64/WriteBarriers.asm | 104 ++++++++++++++++++ .../nativeaot/Runtime/inc/ModuleHeaders.h | 2 +- .../Common/Internal/Runtime/ModuleHeaders.cs | 2 +- .../Internal/Runtime/ReadyToRunConstants.cs | 1 + .../ILCompiler.Compiler/Compiler/JitHelper.cs | 5 + .../JitInterface/CorInfoImpl.ReadyToRun.cs | 3 + .../ReadyToRunSignature.cs | 3 + .../JitInterface/CorInfoImpl.RyuJit.cs | 3 + .../tools/aot/ILCompiler/repro/Program.cs | 36 +++++- 15 files changed, 295 insertions(+), 35 deletions(-) diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index c1799ef65fde00..4bc7184d145628 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 121a4c9e-57c9-4e20-a02b-713cd8fd1ecc */ - 0x121a4c9e, - 0x57c9, - 0x4e20, - {0xa0, 0x2b, 0x71, 0x3c, 0xd8, 0xfd, 0x1e, 0xcc} +constexpr GUID JITEEVersionIdentifier = { /* e9339b0c-8569-4bd3-ba2a-cd098e174073 */ + 0xe9339b0c, + 0x8569, + 0x4bd3, + {0xba, 0x2a, 0xcd, 0x09, 0x8e, 0x17, 0x40, 0x73} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h index 41a4aa251fa742..443ff7bdb7dfde 100644 --- a/src/coreclr/inc/readytorun.h +++ b/src/coreclr/inc/readytorun.h @@ -20,7 +20,7 @@ // If you update this, ensure you run `git grep MINIMUM_READYTORUN_MAJOR_VERSION` // and handle pending work. #define READYTORUN_MAJOR_VERSION 0x0009 -#define READYTORUN_MINOR_VERSION 0x0002 +#define READYTORUN_MINOR_VERSION 0x0003 #define MINIMUM_READYTORUN_MAJOR_VERSION 0x009 @@ -34,6 +34,7 @@ // R2R Version 9.0 adds support for the Vector512 type // R2R Version 9.1 adds new helpers to allocate objects on frozen segments // R2R Version 9.2 adds MemZero and NativeMemSet helpers +// R2R Version 9.3 adds ByRefWriteBarrierBatch helper struct READYTORUN_CORE_HEADER @@ -322,6 +323,7 @@ enum ReadyToRunHelper READYTORUN_HELPER_WriteBarrier = 0x30, READYTORUN_HELPER_CheckedWriteBarrier = 0x31, READYTORUN_HELPER_ByRefWriteBarrier = 0x32, + READYTORUN_HELPER_ByRefWriteBarrierBatch = 0x33, // Array helpers READYTORUN_HELPER_Stelem_Ref = 0x38, diff --git a/src/coreclr/inc/readytorunhelpers.h b/src/coreclr/inc/readytorunhelpers.h index bbb586e8eb4a30..2c6c8a46e6da1f 100644 --- a/src/coreclr/inc/readytorunhelpers.h +++ b/src/coreclr/inc/readytorunhelpers.h @@ -24,6 +24,7 @@ HELPER(READYTORUN_HELPER_ThrowDivZero, CORINFO_HELP_THROWDIVZERO, HELPER(READYTORUN_HELPER_WriteBarrier, CORINFO_HELP_ASSIGN_REF, ) HELPER(READYTORUN_HELPER_CheckedWriteBarrier, CORINFO_HELP_CHECKED_ASSIGN_REF, ) HELPER(READYTORUN_HELPER_ByRefWriteBarrier, CORINFO_HELP_ASSIGN_BYREF, ) +HELPER(READYTORUN_HELPER_ByRefWriteBarrierBatch, CORINFO_HELP_ASSIGN_BYREF_BATCH, ) HELPER(READYTORUN_HELPER_Stelem_Ref, CORINFO_HELP_ARRADDR_ST, ) HELPER(READYTORUN_HELPER_Ldelema_Ref, CORINFO_HELP_LDELEMA_REF, ) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 79d7c44d3901d7..5b39d99f62f602 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -4266,33 +4266,30 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) else { #if defined(TARGET_AMD64) - if (!compiler->opts.IsReadyToRun()) + // How many continuous GC slots we have? + unsigned gcSlotCount = 0; + unsigned j = i; + do { - // How many continuous GC slots we have? - unsigned gcSlotCount = 0; - unsigned j = i; - do - { - gcSlotCount++; - j++; - } while ((j < slots) && layout->IsGCPtr(j)); + gcSlotCount++; + j++; + } while ((j < slots) && layout->IsGCPtr(j)); - // Limit the max size of a batch, we don't want to get stuck in the write-barrier - // moving a huge batch while GC is suspending threads. - gcSlotCount = min(gcSlotCount, 256); + // Limit the max size of a batch, we don't want to get stuck in the write-barrier + // moving a huge batch while GC is suspending threads. + gcSlotCount = min(gcSlotCount, 256); - // Use a batched version of write-barrier if there are more than 1 continuous GC slots - if (gcSlotCount > 1) - { - // Number of continuous GC slots is passed in R8 - assert((genRegMask(REG_R8) & (RBM_INT_CALLEE_TRASH)) == genRegMask(REG_R8)); - - instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_R8, gcSlotCount); - genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF_BATCH, 0, EA_PTRSIZE); - gcPtrCount -= gcSlotCount; - i += gcSlotCount; - continue; - } + // Use a batched version of write-barrier if there are more than 1 continuous GC slots + if (gcSlotCount > 1) + { + // Number of continuous GC slots is passed in R8 + assert((genRegMask(REG_R8) & (RBM_INT_CALLEE_TRASH)) == genRegMask(REG_R8)); + + instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_R8, gcSlotCount); + genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF_BATCH, 0, EA_PTRSIZE); + gcPtrCount -= gcSlotCount; + i += gcSlotCount; + continue; } #endif genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index 1a54b9bcc9b55b..e6e27cad59f4d6 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -309,6 +309,11 @@ EXTERN_C CODE_LOCATION RhpByRefAssignRefAVLocation1; EXTERN_C CODE_LOCATION RhpByRefAssignRefAVLocation2; #endif +#if defined(HOST_AMD64) +EXTERN_C CODE_LOCATION RhpByRefAssignRefBatchAVLocation1; +EXTERN_C CODE_LOCATION RhpByRefAssignRefBatchAVLocation2; +#endif + #if defined(HOST_ARM64) && !defined(LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT) EXTERN_C CODE_LOCATION RhpCheckedLockCmpXchgAVLocation2; EXTERN_C CODE_LOCATION RhpCheckedXchgAVLocation2; @@ -333,6 +338,10 @@ static bool InWriteBarrierHelper(uintptr_t faultingIP) #if !defined(HOST_ARM64) (uintptr_t)&RhpByRefAssignRefAVLocation2, #endif +#if defined(HOST_AMD64) + (uintptr_t)&RhpByRefAssignRefBatchAVLocation1, + (uintptr_t)&RhpByRefAssignRefBatchAVLocation2, +#endif #if defined(HOST_ARM64) && !defined(LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT) (uintptr_t)&RhpCheckedLockCmpXchgAVLocation2, (uintptr_t)&RhpCheckedXchgAVLocation2, diff --git a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S index cc740a9c0601f7..a4b886a8c283ac 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S @@ -332,3 +332,105 @@ LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired): add rsi, 0x8 ret LEAF_END RhpByRefAssignRef, _TEXT + +// +// RhpByRefAssignRefBatch "Batch" version of RhpByRefAssignRef. +// +// On entry: +// rdi: address of ref-field (assigned to) +// rsi: address of the data (source) +// r8: number of byrefs +// +// On exit: +// rdi, rsi are incremented by 8, +// rcx, r10, r11: trashed +// +LEAF_ENTRY RhpByRefAssignRefBatch, _TEXT +LOCAL_LABEL(RhpByRefAssignRefBatch_NextByref): +ALTERNATE_ENTRY RhpByRefAssignRefBatchAVLocation1 + mov rcx, [rsi] +ALTERNATE_ENTRY RhpByRefAssignRefBatchAVLocation2 + mov [rdi], rcx + + // Check whether the writes were even into the heap. If not there's no card update required. + cmp rdi, [C_VAR(g_lowest_address)] + jb LOCAL_LABEL(RhpByRefAssignRefBatch_NotInHeap) + cmp rdi, [C_VAR(g_highest_address)] + jae LOCAL_LABEL(RhpByRefAssignRefBatch_NotInHeap) + + // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + // we're in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW BASENAME, rcx, rdi + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + mov r11, [C_VAR(g_write_watch_table)] + cmp r11, 0x0 + je LOCAL_LABEL(RhpByRefAssignRefBatch_CheckCardTable) + + mov r10, rdi + shr r10, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift + add r10, r11 + cmp byte ptr [r10], 0x0 + jne LOCAL_LABEL(RhpByRefAssignRefBatch_CheckCardTable) + mov byte ptr [r10], 0xFF +#endif + +LOCAL_LABEL(RhpByRefAssignRefBatch_CheckCardTable): + + // If the reference is to an object that's not in an ephemeral generation we have no need to track it + // (since the object won't be collected or moved by an ephemeral collection). + cmp rcx, [C_VAR(g_ephemeral_low)] + jb LOCAL_LABEL(RhpByRefAssignRefBatch_NoBarrierRequired) + cmp rcx, [C_VAR(g_ephemeral_high)] + jae LOCAL_LABEL(RhpByRefAssignRefBatch_NoBarrierRequired) + + // move current rdi value into rcx, we need to keep rdi and eventually increment by 8 + mov rcx, rdi + + // We have a location on the GC heap being updated with a reference to an ephemeral object so we must + // track this write. The location address is translated into an offset in the card table bitmap. We set + // an entire byte in the card table since it's quicker than messing around with bitmasks and we only write + // the byte if it hasn't already been done since writes are expensive and impact scaling. + shr rcx, 0x0B + mov r10, [C_VAR(g_card_table)] + cmp byte ptr [rcx + r10], 0x0FF + je LOCAL_LABEL(RhpByRefAssignRefBatch_NoBarrierRequired) + +// We get here if it's necessary to update the card table. + mov byte ptr [rcx + r10], 0xFF + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + // Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already) + shr rcx, 0x0A + add rcx, [C_VAR(g_card_bundle_table)] + cmp byte ptr [rcx], 0xFF + je LOCAL_LABEL(RhpByRefAssignRefBatch_NoBarrierRequired) + + mov byte ptr [rcx], 0xFF +#endif + +LOCAL_LABEL(RhpByRefAssignRefBatch_NotInHeap): + // At least one write is already done, increment the pointers + add rdi, 8h + add rsi, 8h + dec r8d + je LOCAL_LABEL(RhpByRefAssignRefBatch_NotInHeapExit) + // Now we can do the rest of the writes without checking the heap +LOCAL_LABEL(RhpByRefAssignRefBatch_NextByrefUnchecked): + mov rcx, [rsi] + mov [rdi], rcx + add rdi, 8h + add rsi, 8h + dec r8d + jne LOCAL_LABEL(RhpByRefAssignRefBatch_NextByrefUnchecked) +LOCAL_LABEL(RhpByRefAssignRefBatch_NotInHeapExit): + ret + +LOCAL_LABEL(RhpByRefAssignRefBatch_NoBarrierRequired): + // Increment the pointers before leaving + add rdi, 0x8 + add rsi, 0x8 + dec r8d + jne LOCAL_LABEL(RhpByRefAssignRefBatch_NextByref) + ret +LEAF_END RhpByRefAssignRefBatch, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm index 148aa7d1301b1c..8148427ff594a1 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm @@ -349,4 +349,108 @@ RhpByRefAssignRef_NoBarrierRequired: ret LEAF_END RhpByRefAssignRef, _TEXT +;; +;; RhpByRefAssignRefBatch "Batch" version of RhpByRefAssignRef. +;; +;; On entry: +;; rdi: address of ref-field (assigned to) +;; rsi: address of the data (source) +;; r8: number of byrefs +;; +;; On exit: +;; rdi, rsi are incremented by 8, +;; r8 is 0 +;; rcx, r10, r11: trashed +;; +LEAF_ENTRY RhpByRefAssignRefBatch, _TEXT +RhpByRefAssignRefBatch_NextByref: + +ALTERNATE_ENTRY RhpByRefAssignRefBatchAVLocation1 + mov rcx, [rsi] +ALTERNATE_ENTRY RhpByRefAssignRefBatchAVLocation2 + mov [rdi], rcx + + ;; Check whether the writes were even into the heap. If not there's no card update required. + cmp rdi, [g_lowest_address] + jb RhpByRefAssignRefBatch_NotInHeap + cmp rdi, [g_highest_address] + jae RhpByRefAssignRefBatch_NotInHeap + + ;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + ;; we're in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW BASENAME, rcx, rdi + +ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + mov r11, [g_write_watch_table] + cmp r11, 0 + je RhpByRefAssignRefBatch_CheckCardTable + + mov r10, rdi + shr r10, 0Ch ;; SoftwareWriteWatch::AddressToTableByteIndexShift + add r10, r11 + cmp byte ptr [r10], 0 + jne RhpByRefAssignRefBatch_CheckCardTable + mov byte ptr [r10], 0FFh +endif + +RhpByRefAssignRefBatch_CheckCardTable: + + ;; If the reference is to an object that's not in an ephemeral generation we have no need to track it + ;; (since the object won't be collected or moved by an ephemeral collection). + cmp rcx, [g_ephemeral_low] + jb RhpByRefAssignRefBatch_NoBarrierRequired + cmp rcx, [g_ephemeral_high] + jae RhpByRefAssignRefBatch_NoBarrierRequired + + ;; move current rdi value into rcx, we need to keep rdi and eventually increment by 8 + mov rcx, rdi + + ;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must + ;; track this write. The location address is translated into an offset in the card table bitmap. We set + ;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write + ;; the byte if it hasn't already been done since writes are expensive and impact scaling. + shr rcx, 0Bh + mov r10, [g_card_table] + cmp byte ptr [rcx + r10], 0FFh + je RhpByRefAssignRefBatch_NoBarrierRequired + +;; We get here if it's necessary to update the card table. + mov byte ptr [rcx + r10], 0FFh + +ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + ;; Shift rcx by 0Ah more to get the card bundle byte (we shifted by 0Bh already) + shr rcx, 0Ah + add rcx, [g_card_bundle_table] + cmp byte ptr [rcx], 0FFh + je RhpByRefAssignRefBatch_NoBarrierRequired + + mov byte ptr [rcx], 0FFh +endif + +RhpByRefAssignRefBatch_NotInHeap: + ;; At least one write is already done, increment the pointers + add rdi, 8h + add rsi, 8h + dec r8d + je RhpByRefAssignRefBatch_NotInHeapExit + ;; Now we can do the rest of the writes without checking the heap +RhpByRefAssignRefBatch_NextByrefUnchecked: + mov rcx, [rsi] + mov [rdi], rcx + add rdi, 8h + add rsi, 8h + dec r8d + jne RhpByRefAssignRefBatch_NextByrefUnchecked +RhpByRefAssignRefBatch_NotInHeapExit: + ret + +RhpByRefAssignRefBatch_NoBarrierRequired: + ;; Increment the pointers before leaving + add rdi, 8h + add rsi, 8h + dec r8d + jne RhpByRefAssignRefBatch_NextByref + ret +LEAF_END RhpByRefAssignRefBatch, _TEXT + end diff --git a/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h b/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h index 6a3b24a3944870..d4360273d28ac8 100644 --- a/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h +++ b/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h @@ -12,7 +12,7 @@ struct ReadyToRunHeaderConstants static const uint32_t Signature = 0x00525452; // 'RTR' static const uint32_t CurrentMajorVersion = 9; - static const uint32_t CurrentMinorVersion = 2; + static const uint32_t CurrentMinorVersion = 3; }; struct ReadyToRunHeader diff --git a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs index 6fc5d9542e1609..8420c9a4803b28 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs @@ -16,7 +16,7 @@ internal struct ReadyToRunHeaderConstants public const uint Signature = 0x00525452; // 'RTR' public const ushort CurrentMajorVersion = 9; - public const ushort CurrentMinorVersion = 2; + public const ushort CurrentMinorVersion = 3; } #if READYTORUN #pragma warning disable 0169 diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs index a37945534865bf..768439e12a791f 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs @@ -234,6 +234,7 @@ public enum ReadyToRunHelper WriteBarrier = 0x30, CheckedWriteBarrier = 0x31, ByRefWriteBarrier = 0x32, + ByRefWriteBarrierBatch = 0x33, // Array helpers Stelem_Ref = 0x38, diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index cf1d04ca666af1..ef70a02e198a86 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -70,6 +70,11 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, case ReadyToRunHelper.ByRefWriteBarrier: mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpByRefAssignRefArm64" : "RhpByRefAssignRef"; break; + case ReadyToRunHelper.ByRefWriteBarrierBatch: + if (context.Target.Architecture != TargetArchitecture.X64) + throw new NotImplementedException(); + mangledName = "RhpByRefAssignRefBatch"; + break; case ReadyToRunHelper.WriteBarrier_EAX: mangledName = "RhpAssignRefEAX"; break; diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index ad83b1eb42a5d6..63d8811512583c 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -999,6 +999,9 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_ASSIGN_BYREF: id = ReadyToRunHelper.ByRefWriteBarrier; break; + case CorInfoHelpFunc.CORINFO_HELP_ASSIGN_BYREF_BATCH: + id = ReadyToRunHelper.ByRefWriteBarrierBatch; + break; case CorInfoHelpFunc.CORINFO_HELP_ARRADDR_ST: id = ReadyToRunHelper.Stelem_Ref; diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs index 0eae2f10cb8f00..df14fe42ac545f 100644 --- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs +++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs @@ -1671,6 +1671,9 @@ private void ParseHelper(StringBuilder builder) case ReadyToRunHelper.ByRefWriteBarrier: builder.Append("BYREF_WRITE_BARRIER"); break; + case ReadyToRunHelper.ByRefWriteBarrierBatch: + builder.Append("BYREF_WRITE_BARRIER_BATCH"); + break; // Array helpers case ReadyToRunHelper.Stelem_Ref: diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index 8755580e3f2903..54c6245f0b2e49 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -501,6 +501,9 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_ASSIGN_BYREF: id = ReadyToRunHelper.ByRefWriteBarrier; break; + case CorInfoHelpFunc.CORINFO_HELP_ASSIGN_BYREF_BATCH: + id = ReadyToRunHelper.ByRefWriteBarrierBatch; + break; case CorInfoHelpFunc.CORINFO_HELP_ASSIGN_REF_EAX: id = ReadyToRunHelper.WriteBarrier_EAX; break; diff --git a/src/coreclr/tools/aot/ILCompiler/repro/Program.cs b/src/coreclr/tools/aot/ILCompiler/repro/Program.cs index 9e71394c3732a5..2386e61c81b91d 100644 --- a/src/coreclr/tools/aot/ILCompiler/repro/Program.cs +++ b/src/coreclr/tools/aot/ILCompiler/repro/Program.cs @@ -2,11 +2,41 @@ // The .NET Foundation licenses this file to you under the MIT license. using System; +using System.Runtime.CompilerServices; -class Program +public class Prog { static void Main() { - Console.WriteLine("Hello world"); + Foo(); + Console.ReadKey(); } -} + + struct MyStruct + { + public string a1; + public string a2; + public string a3; + public string a4; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void Foo() + { + MyStruct ms = new MyStruct + { + a1 = 10001.ToString(), + a2 = 10002.ToString(), + a3 = 10003.ToString(), + a4 = 10004.ToString() + }; + ms = Test(ms); + Console.WriteLine(ms.a1); + Console.WriteLine(ms.a2); + Console.WriteLine(ms.a3); + Console.WriteLine(ms.a4); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static MyStruct Test(MyStruct ms) => ms; +} \ No newline at end of file From ec35a6d74e2266222b0ab88557a1afa187bde9c4 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 1 Mar 2024 00:44:41 +0100 Subject: [PATCH 14/14] 0x8 --- src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S index a4b886a8c283ac..55018bbe5b3e1f 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S @@ -411,16 +411,16 @@ LOCAL_LABEL(RhpByRefAssignRefBatch_CheckCardTable): LOCAL_LABEL(RhpByRefAssignRefBatch_NotInHeap): // At least one write is already done, increment the pointers - add rdi, 8h - add rsi, 8h + add rdi, 0x8 + add rsi, 0x8 dec r8d je LOCAL_LABEL(RhpByRefAssignRefBatch_NotInHeapExit) // Now we can do the rest of the writes without checking the heap LOCAL_LABEL(RhpByRefAssignRefBatch_NextByrefUnchecked): mov rcx, [rsi] mov [rdi], rcx - add rdi, 8h - add rsi, 8h + add rdi, 0x8 + add rsi, 0x8 dec r8d jne LOCAL_LABEL(RhpByRefAssignRefBatch_NextByrefUnchecked) LOCAL_LABEL(RhpByRefAssignRefBatch_NotInHeapExit):