Skip to content
1 change: 1 addition & 0 deletions src/coreclr/inc/corinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,7 @@ enum CorInfoHelpFunc
CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP, // Do the store, and ensure that the target was not in the heap.

CORINFO_HELP_ASSIGN_BYREF,
CORINFO_HELP_ASSIGN_BYREF_BATCH,
CORINFO_HELP_ASSIGN_STRUCT,


Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* 86eab154-5d93-4fad-bc07-e94fd9268b70 */
0x86eab154,
0x5d93,
0x4fad,
{0xbc, 0x07, 0xe9, 0x4f, 0xd9, 0x26, 0x8b, 0x70}
constexpr GUID JITEEVersionIdentifier = { /* e9339b0c-8569-4bd3-ba2a-cd098e174073 */
0xe9339b0c,
0x8569,
0x4bd3,
{0xba, 0x2a, 0xcd, 0x09, 0x8e, 0x17, 0x40, 0x73}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/inc/jithelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@
JITHELPER(CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP, JIT_WriteBarrierEnsureNonHeapTarget,CORINFO_HELP_SIG_REG_ONLY)

DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF, JIT_ByRefWriteBarrier,CORINFO_HELP_SIG_NO_ALIGN_STUB)
#if defined(TARGET_AMD64)
DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF_BATCH, JIT_ByRefWriteBarrierBatch,CORINFO_HELP_SIG_NO_ALIGN_STUB)
#else
DYNAMICJITHELPER(CORINFO_HELP_ASSIGN_BYREF_BATCH, NULL,CORINFO_HELP_SIG_NO_ALIGN_STUB)
#endif

JITHELPER(CORINFO_HELP_ASSIGN_STRUCT, JIT_StructWriteBarrier,CORINFO_HELP_SIG_4_STACK)

Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/inc/readytorun.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
// If you update this, ensure you run `git grep MINIMUM_READYTORUN_MAJOR_VERSION`
// and handle pending work.
#define READYTORUN_MAJOR_VERSION 0x0009
#define READYTORUN_MINOR_VERSION 0x0002
#define READYTORUN_MINOR_VERSION 0x0003

#define MINIMUM_READYTORUN_MAJOR_VERSION 0x009

Expand All @@ -34,6 +34,7 @@
// R2R Version 9.0 adds support for the Vector512 type
// R2R Version 9.1 adds new helpers to allocate objects on frozen segments
// R2R Version 9.2 adds MemZero and NativeMemSet helpers
// R2R Version 9.3 adds ByRefWriteBarrierBatch helper


struct READYTORUN_CORE_HEADER
Expand Down Expand Up @@ -322,6 +323,7 @@ enum ReadyToRunHelper
READYTORUN_HELPER_WriteBarrier = 0x30,
READYTORUN_HELPER_CheckedWriteBarrier = 0x31,
READYTORUN_HELPER_ByRefWriteBarrier = 0x32,
READYTORUN_HELPER_ByRefWriteBarrierBatch = 0x33,

// Array helpers
READYTORUN_HELPER_Stelem_Ref = 0x38,
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/inc/readytorunhelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ HELPER(READYTORUN_HELPER_ThrowDivZero, CORINFO_HELP_THROWDIVZERO,
HELPER(READYTORUN_HELPER_WriteBarrier, CORINFO_HELP_ASSIGN_REF, )
HELPER(READYTORUN_HELPER_CheckedWriteBarrier, CORINFO_HELP_CHECKED_ASSIGN_REF, )
HELPER(READYTORUN_HELPER_ByRefWriteBarrier, CORINFO_HELP_ASSIGN_BYREF, )
HELPER(READYTORUN_HELPER_ByRefWriteBarrierBatch, CORINFO_HELP_ASSIGN_BYREF_BATCH, )

HELPER(READYTORUN_HELPER_Stelem_Ref, CORINFO_HELP_ARRADDR_ST, )
HELPER(READYTORUN_HELPER_Ldelema_Ref, CORINFO_HELP_LDELEMA_REF, )
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,7 @@ regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
return RBM_CALLEE_TRASH_WRITEBARRIER;

case CORINFO_HELP_ASSIGN_BYREF:
case CORINFO_HELP_ASSIGN_BYREF_BATCH:
return RBM_CALLEE_TRASH_WRITEBARRIER_BYREF;

case CORINFO_HELP_PROF_FCN_ENTER:
Expand Down
27 changes: 27 additions & 0 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4265,6 +4265,33 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
}
else
{
#if defined(TARGET_AMD64)
// How many continuous GC slots we have?
unsigned gcSlotCount = 0;
unsigned j = i;
do
{
gcSlotCount++;
j++;
} while ((j < slots) && layout->IsGCPtr(j));

// Limit the max size of a batch, we don't want to get stuck in the write-barrier
// moving a huge batch while GC is suspending threads.
gcSlotCount = min(gcSlotCount, 256);

// Use a batched version of write-barrier if there are more than 1 continuous GC slots
if (gcSlotCount > 1)
{
// Number of continuous GC slots is passed in R8
assert((genRegMask(REG_R8) & (RBM_INT_CALLEE_TRASH)) == genRegMask(REG_R8));

instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_R8, gcSlotCount);
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF_BATCH, 0, EA_PTRSIZE);
gcPtrCount -= gcSlotCount;
i += gcSlotCount;
continue;
}
#endif
genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
gcPtrCount--;
i++;
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2850,6 +2850,7 @@ bool emitter::emitNoGChelper(CorInfoHelpFunc helpFunc)
case CORINFO_HELP_ASSIGN_REF:
case CORINFO_HELP_CHECKED_ASSIGN_REF:
case CORINFO_HELP_ASSIGN_BYREF:
case CORINFO_HELP_ASSIGN_BYREF_BATCH:

case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR:
case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR:
Expand Down Expand Up @@ -10422,6 +10423,7 @@ regMaskTP emitter::emitGetGCRegsKilledByNoGCCall(CorInfoHelpFunc helper)
break;

case CORINFO_HELP_ASSIGN_BYREF:
case CORINFO_HELP_ASSIGN_BYREF_BATCH:
result = RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF;
break;

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1747,6 +1747,7 @@ void HelperCallProperties::init()
case CORINFO_HELP_CHECKED_ASSIGN_REF:
case CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP:
case CORINFO_HELP_ASSIGN_BYREF:
case CORINFO_HELP_ASSIGN_BYREF_BATCH:
case CORINFO_HELP_ASSIGN_STRUCT:

mutatesHeap = true;
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/nativeaot/Runtime/EHHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,11 @@ EXTERN_C CODE_LOCATION RhpByRefAssignRefAVLocation1;
EXTERN_C CODE_LOCATION RhpByRefAssignRefAVLocation2;
#endif

#if defined(HOST_AMD64)
EXTERN_C CODE_LOCATION RhpByRefAssignRefBatchAVLocation1;
EXTERN_C CODE_LOCATION RhpByRefAssignRefBatchAVLocation2;
#endif

#if defined(HOST_ARM64) && !defined(LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT)
EXTERN_C CODE_LOCATION RhpCheckedLockCmpXchgAVLocation2;
EXTERN_C CODE_LOCATION RhpCheckedXchgAVLocation2;
Expand All @@ -333,6 +338,10 @@ static bool InWriteBarrierHelper(uintptr_t faultingIP)
#if !defined(HOST_ARM64)
(uintptr_t)&RhpByRefAssignRefAVLocation2,
#endif
#if defined(HOST_AMD64)
(uintptr_t)&RhpByRefAssignRefBatchAVLocation1,
(uintptr_t)&RhpByRefAssignRefBatchAVLocation2,
#endif
#if defined(HOST_ARM64) && !defined(LSE_INSTRUCTIONS_ENABLED_BY_DEFAULT)
(uintptr_t)&RhpCheckedLockCmpXchgAVLocation2,
(uintptr_t)&RhpCheckedXchgAVLocation2,
Expand Down
102 changes: 102 additions & 0 deletions src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.S
Original file line number Diff line number Diff line change
Expand Up @@ -332,3 +332,105 @@ LOCAL_LABEL(RhpByRefAssignRef_NoBarrierRequired):
add rsi, 0x8
ret
LEAF_END RhpByRefAssignRef, _TEXT

//
// RhpByRefAssignRefBatch "Batch" version of RhpByRefAssignRef.
//
// On entry:
// rdi: address of ref-field (assigned to)
// rsi: address of the data (source)
// r8: number of byrefs
//
// On exit:
// rdi, rsi are incremented by 8,
// rcx, r10, r11: trashed
//
LEAF_ENTRY RhpByRefAssignRefBatch, _TEXT
LOCAL_LABEL(RhpByRefAssignRefBatch_NextByref):
ALTERNATE_ENTRY RhpByRefAssignRefBatchAVLocation1
mov rcx, [rsi]
ALTERNATE_ENTRY RhpByRefAssignRefBatchAVLocation2
mov [rdi], rcx

// Check whether the writes were even into the heap. If not there's no card update required.
cmp rdi, [C_VAR(g_lowest_address)]
jb LOCAL_LABEL(RhpByRefAssignRefBatch_NotInHeap)
cmp rdi, [C_VAR(g_highest_address)]
jae LOCAL_LABEL(RhpByRefAssignRefBatch_NotInHeap)

// Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
// we're in a debug build and write barrier checking has been enabled).
UPDATE_GC_SHADOW BASENAME, rcx, rdi

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
mov r11, [C_VAR(g_write_watch_table)]
cmp r11, 0x0
je LOCAL_LABEL(RhpByRefAssignRefBatch_CheckCardTable)

mov r10, rdi
shr r10, 0xC // SoftwareWriteWatch::AddressToTableByteIndexShift
add r10, r11
cmp byte ptr [r10], 0x0
jne LOCAL_LABEL(RhpByRefAssignRefBatch_CheckCardTable)
mov byte ptr [r10], 0xFF
#endif

LOCAL_LABEL(RhpByRefAssignRefBatch_CheckCardTable):

// If the reference is to an object that's not in an ephemeral generation we have no need to track it
// (since the object won't be collected or moved by an ephemeral collection).
cmp rcx, [C_VAR(g_ephemeral_low)]
jb LOCAL_LABEL(RhpByRefAssignRefBatch_NoBarrierRequired)
cmp rcx, [C_VAR(g_ephemeral_high)]
jae LOCAL_LABEL(RhpByRefAssignRefBatch_NoBarrierRequired)

// move current rdi value into rcx, we need to keep rdi and eventually increment by 8
mov rcx, rdi

// We have a location on the GC heap being updated with a reference to an ephemeral object so we must
// track this write. The location address is translated into an offset in the card table bitmap. We set
// an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
// the byte if it hasn't already been done since writes are expensive and impact scaling.
shr rcx, 0x0B
mov r10, [C_VAR(g_card_table)]
cmp byte ptr [rcx + r10], 0x0FF
je LOCAL_LABEL(RhpByRefAssignRefBatch_NoBarrierRequired)

// We get here if it's necessary to update the card table.
mov byte ptr [rcx + r10], 0xFF

#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
// Shift rcx by 0x0A more to get the card bundle byte (we shifted by 0x0B already)
shr rcx, 0x0A
add rcx, [C_VAR(g_card_bundle_table)]
cmp byte ptr [rcx], 0xFF
je LOCAL_LABEL(RhpByRefAssignRefBatch_NoBarrierRequired)

mov byte ptr [rcx], 0xFF
#endif

LOCAL_LABEL(RhpByRefAssignRefBatch_NotInHeap):
// At least one write is already done, increment the pointers
add rdi, 0x8
add rsi, 0x8
dec r8d
je LOCAL_LABEL(RhpByRefAssignRefBatch_NotInHeapExit)
// Now we can do the rest of the writes without checking the heap
LOCAL_LABEL(RhpByRefAssignRefBatch_NextByrefUnchecked):
mov rcx, [rsi]
mov [rdi], rcx
add rdi, 0x8
add rsi, 0x8
dec r8d
jne LOCAL_LABEL(RhpByRefAssignRefBatch_NextByrefUnchecked)
LOCAL_LABEL(RhpByRefAssignRefBatch_NotInHeapExit):
ret

LOCAL_LABEL(RhpByRefAssignRefBatch_NoBarrierRequired):
// Increment the pointers before leaving
add rdi, 0x8
add rsi, 0x8
dec r8d
jne LOCAL_LABEL(RhpByRefAssignRefBatch_NextByref)
ret
LEAF_END RhpByRefAssignRefBatch, _TEXT
104 changes: 104 additions & 0 deletions src/coreclr/nativeaot/Runtime/amd64/WriteBarriers.asm
Original file line number Diff line number Diff line change
Expand Up @@ -349,4 +349,108 @@ RhpByRefAssignRef_NoBarrierRequired:
ret
LEAF_END RhpByRefAssignRef, _TEXT

;;
;; RhpByRefAssignRefBatch "Batch" version of RhpByRefAssignRef.
;;
;; On entry:
;; rdi: address of ref-field (assigned to)
;; rsi: address of the data (source)
;; r8: number of byrefs
;;
;; On exit:
;; rdi, rsi are incremented by 8,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't them be increased by the bytes processed? And also comments in other asm code.

;; r8 is 0
;; rcx, r10, r11: trashed
;;
LEAF_ENTRY RhpByRefAssignRefBatch, _TEXT
RhpByRefAssignRefBatch_NextByref:

ALTERNATE_ENTRY RhpByRefAssignRefBatchAVLocation1
mov rcx, [rsi]
ALTERNATE_ENTRY RhpByRefAssignRefBatchAVLocation2
mov [rdi], rcx

;; Check whether the writes were even into the heap. If not there's no card update required.
cmp rdi, [g_lowest_address]
jb RhpByRefAssignRefBatch_NotInHeap
cmp rdi, [g_highest_address]
jae RhpByRefAssignRefBatch_NotInHeap

;; Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless
;; we're in a debug build and write barrier checking has been enabled).
UPDATE_GC_SHADOW BASENAME, rcx, rdi

ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
mov r11, [g_write_watch_table]
cmp r11, 0
je RhpByRefAssignRefBatch_CheckCardTable

mov r10, rdi
shr r10, 0Ch ;; SoftwareWriteWatch::AddressToTableByteIndexShift
add r10, r11
cmp byte ptr [r10], 0
jne RhpByRefAssignRefBatch_CheckCardTable
mov byte ptr [r10], 0FFh
endif

RhpByRefAssignRefBatch_CheckCardTable:

;; If the reference is to an object that's not in an ephemeral generation we have no need to track it
;; (since the object won't be collected or moved by an ephemeral collection).
cmp rcx, [g_ephemeral_low]
jb RhpByRefAssignRefBatch_NoBarrierRequired
cmp rcx, [g_ephemeral_high]
jae RhpByRefAssignRefBatch_NoBarrierRequired

;; move current rdi value into rcx, we need to keep rdi and eventually increment by 8
mov rcx, rdi

;; We have a location on the GC heap being updated with a reference to an ephemeral object so we must
;; track this write. The location address is translated into an offset in the card table bitmap. We set
;; an entire byte in the card table since it's quicker than messing around with bitmasks and we only write
;; the byte if it hasn't already been done since writes are expensive and impact scaling.
shr rcx, 0Bh
mov r10, [g_card_table]
cmp byte ptr [rcx + r10], 0FFh
je RhpByRefAssignRefBatch_NoBarrierRequired

;; We get here if it's necessary to update the card table.
mov byte ptr [rcx + r10], 0FFh

ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES
;; Shift rcx by 0Ah more to get the card bundle byte (we shifted by 0Bh already)
shr rcx, 0Ah
add rcx, [g_card_bundle_table]
cmp byte ptr [rcx], 0FFh
je RhpByRefAssignRefBatch_NoBarrierRequired

mov byte ptr [rcx], 0FFh
endif

RhpByRefAssignRefBatch_NotInHeap:
;; At least one write is already done, increment the pointers
add rdi, 8h
add rsi, 8h
dec r8d
je RhpByRefAssignRefBatch_NotInHeapExit
;; Now we can do the rest of the writes without checking the heap
RhpByRefAssignRefBatch_NextByrefUnchecked:
mov rcx, [rsi]
mov [rdi], rcx
add rdi, 8h
add rsi, 8h
dec r8d
jne RhpByRefAssignRefBatch_NextByrefUnchecked
RhpByRefAssignRefBatch_NotInHeapExit:
ret

RhpByRefAssignRefBatch_NoBarrierRequired:
;; Increment the pointers before leaving
add rdi, 8h
add rsi, 8h
dec r8d
jne RhpByRefAssignRefBatch_NextByref
ret
LEAF_END RhpByRefAssignRefBatch, _TEXT

end
2 changes: 1 addition & 1 deletion src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ struct ReadyToRunHeaderConstants
static const uint32_t Signature = 0x00525452; // 'RTR'

static const uint32_t CurrentMajorVersion = 9;
static const uint32_t CurrentMinorVersion = 2;
static const uint32_t CurrentMinorVersion = 3;
};

struct ReadyToRunHeader
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ internal struct ReadyToRunHeaderConstants
public const uint Signature = 0x00525452; // 'RTR'

public const ushort CurrentMajorVersion = 9;
public const ushort CurrentMinorVersion = 2;
public const ushort CurrentMinorVersion = 3;
}
#if READYTORUN
#pragma warning disable 0169
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ public enum ReadyToRunHelper
WriteBarrier = 0x30,
CheckedWriteBarrier = 0x31,
ByRefWriteBarrier = 0x32,
ByRefWriteBarrierBatch = 0x33,

// Array helpers
Stelem_Ref = 0x38,
Expand Down
Loading