From 508eb9c464c7b11e30f1cd62e5a61d36b167c68a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 18 Feb 2024 04:18:21 +0100 Subject: [PATCH 01/40] Remove JIT_MemSet/JIT_MemCpy --- docs/design/coreclr/botr/guide-for-porting.md | 2 - src/coreclr/inc/jithelpers.h | 9 +-- .../Runtime/CompilerHelpers/MemoryHelpers.cs | 9 ++- src/coreclr/vm/CMakeLists.txt | 7 -- src/coreclr/vm/amd64/CrtHelpers.asm | 79 ------------------ src/coreclr/vm/amd64/crthelpers.S | 74 ----------------- src/coreclr/vm/appdomain.cpp | 2 +- src/coreclr/vm/arm/crthelpers.S | 51 ------------ src/coreclr/vm/arm64/crthelpers.S | 33 -------- src/coreclr/vm/arm64/crthelpers.asm | 81 ------------------- src/coreclr/vm/corelib.h | 2 + src/coreclr/vm/ecall.cpp | 10 ++- src/coreclr/vm/ecall.h | 2 +- src/coreclr/vm/excep.cpp | 6 -- src/coreclr/vm/exceptionhandling.cpp | 2 +- src/coreclr/vm/jitinterface.cpp | 4 +- src/coreclr/vm/jitinterface.h | 3 - src/coreclr/vm/loongarch64/crthelpers.S | 37 --------- src/coreclr/vm/metasig.h | 2 + src/coreclr/vm/riscv64/crthelpers.S | 36 --------- .../CompilerServices/RuntimeHelpers.cs | 21 +++++ 21 files changed, 48 insertions(+), 424 deletions(-) delete mode 100644 src/coreclr/vm/amd64/CrtHelpers.asm delete mode 100644 src/coreclr/vm/amd64/crthelpers.S delete mode 100644 src/coreclr/vm/arm/crthelpers.S delete mode 100644 src/coreclr/vm/arm64/crthelpers.S delete mode 100644 src/coreclr/vm/arm64/crthelpers.asm delete mode 100644 src/coreclr/vm/loongarch64/crthelpers.S delete mode 100644 src/coreclr/vm/riscv64/crthelpers.S diff --git a/docs/design/coreclr/botr/guide-for-porting.md b/docs/design/coreclr/botr/guide-for-porting.md index 5d2c01aa52d066..f5549cde22521b 100644 --- a/docs/design/coreclr/botr/guide-for-porting.md +++ b/docs/design/coreclr/botr/guide-for-porting.md @@ -417,8 +417,6 @@ Here is an annotated list of the stubs implemented for Unix on Arm64. locations for NullReferenceExceptions to be generated out of a SIGSEGV signal. - 1. `JIT_MemSet`, and `JIT_MemCpy` have this requirement - #### cgencpu.h This header is included by various code in the VM directory. It provides a large diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index 65167abd6a4dd6..fc555ad444a42c 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -235,13 +235,8 @@ DYNAMICJITHELPER(CORINFO_HELP_INIT_PINVOKE_FRAME, NULL, CORINFO_HELP_SIG_REG_ONLY) #endif -#ifdef TARGET_X86 - JITHELPER(CORINFO_HELP_MEMSET, NULL, CORINFO_HELP_SIG_CANNOT_USE_ALIGN_STUB) - JITHELPER(CORINFO_HELP_MEMCPY, NULL, CORINFO_HELP_SIG_CANNOT_USE_ALIGN_STUB) -#else - JITHELPER(CORINFO_HELP_MEMSET, JIT_MemSet, CORINFO_HELP_SIG_REG_ONLY) - JITHELPER(CORINFO_HELP_MEMCPY, JIT_MemCpy, CORINFO_HELP_SIG_REG_ONLY) -#endif + DYNAMICJITHELPER(CORINFO_HELP_MEMSET, NULL, CORINFO_HELP_SIG_REG_ONLY) + DYNAMICJITHELPER(CORINFO_HELP_MEMCPY, NULL, CORINFO_HELP_SIG_REG_ONLY) // Generics JITHELPER(CORINFO_HELP_RUNTIMEHANDLE_METHOD, JIT_GenericHandleMethod, CORINFO_HELP_SIG_REG_ONLY) diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MemoryHelpers.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MemoryHelpers.cs index 644fcf1a59940e..c87063ab114e46 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MemoryHelpers.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MemoryHelpers.cs @@ -3,6 +3,7 @@ using System; using System.Runtime; +using System.Runtime.CompilerServices; namespace Internal.Runtime.CompilerHelpers { @@ -15,7 +16,8 @@ private static unsafe void MemSet(ref byte dest, byte value, nuint size) { if (size > 0) { - _ = dest; + // Implicit nullcheck + _ = Unsafe.ReadUnaligned(ref dest); SpanHelpers.Fill(ref dest, size, value); } } @@ -24,8 +26,9 @@ private static unsafe void MemCopy(ref byte dest, ref byte src, nuint size) { if (size > 0) { - _ = dest; - _ = src; + // Implicit nullchecks + _ = Unsafe.ReadUnaligned(ref dest); + _ = Unsafe.ReadUnaligned(ref src); Buffer.Memmove(ref dest, ref src, size); } } diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index eb00b7c6a6757d..ccd8bc35c8bf4e 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -636,7 +636,6 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/AsmHelpers.asm ${ARCH_SOURCES_DIR}/CallDescrWorkerAMD64.asm ${ARCH_SOURCES_DIR}/ComCallPreStub.asm - ${ARCH_SOURCES_DIR}/CrtHelpers.asm ${ARCH_SOURCES_DIR}/GenericComCallStubs.asm ${ARCH_SOURCES_DIR}/GenericComPlusCallStubs.asm ${ARCH_SOURCES_DIR}/getstate.asm @@ -676,7 +675,6 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM64) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/AsmHelpers.asm ${ARCH_SOURCES_DIR}/CallDescrWorkerARM64.asm - ${ARCH_SOURCES_DIR}/CrtHelpers.asm ${ARCH_SOURCES_DIR}/patchedcode.asm ${ARCH_SOURCES_DIR}/PInvokeStubs.asm ${ARCH_SOURCES_DIR}/thunktemplates.asm @@ -693,7 +691,6 @@ else(CLR_CMAKE_TARGET_WIN32) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkeramd64.S - ${ARCH_SOURCES_DIR}/crthelpers.S ${ARCH_SOURCES_DIR}/externalmethodfixupthunk.S ${ARCH_SOURCES_DIR}/getstate.S ${ARCH_SOURCES_DIR}/jithelpers_fast.S @@ -723,7 +720,6 @@ else(CLR_CMAKE_TARGET_WIN32) elseif(CLR_CMAKE_TARGET_ARCH_ARM) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S - ${ARCH_SOURCES_DIR}/crthelpers.S ${ARCH_SOURCES_DIR}/ehhelpers.S ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S @@ -733,7 +729,6 @@ else(CLR_CMAKE_TARGET_WIN32) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkerarm64.S - ${ARCH_SOURCES_DIR}/crthelpers.S ${ARCH_SOURCES_DIR}/patchedcode.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S @@ -742,7 +737,6 @@ else(CLR_CMAKE_TARGET_WIN32) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkerloongarch64.S - ${ARCH_SOURCES_DIR}/crthelpers.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S ) @@ -750,7 +744,6 @@ else(CLR_CMAKE_TARGET_WIN32) set(VM_SOURCES_WKS_ARCH_ASM ${ARCH_SOURCES_DIR}/asmhelpers.S ${ARCH_SOURCES_DIR}/calldescrworkerriscv64.S - ${ARCH_SOURCES_DIR}/crthelpers.S ${ARCH_SOURCES_DIR}/pinvokestubs.S ${ARCH_SOURCES_DIR}/thunktemplates.S ) diff --git a/src/coreclr/vm/amd64/CrtHelpers.asm b/src/coreclr/vm/amd64/CrtHelpers.asm deleted file mode 100644 index 09f48fa5879bd1..00000000000000 --- a/src/coreclr/vm/amd64/CrtHelpers.asm +++ /dev/null @@ -1,79 +0,0 @@ -; Licensed to the .NET Foundation under one or more agreements. -; The .NET Foundation licenses this file to you under the MIT license. - -include AsmMacros.inc - -extern memset:proc -extern memmove:proc - -; JIT_MemSet/JIT_MemCpy -; -; It is IMPORTANT that the exception handling code is able to find these guys -; on the stack, but on windows platforms we can just defer to the platform -; implementation. -; - -; void JIT_MemSet(void* dest, int c, size_t count) -; -; Purpose: -; Sets the first "count" bytes of the block of memory pointed byte -; "dest" to the specified value (interpreted as an unsigned char). -; -; Entry: -; RCX: void* dest - Pointer to the block of memory to fill. -; RDX: int c - Value to be set. -; R8: size_t count - Number of bytes to be set to the value. -; -; Exit: -; -; Uses: -; -; Exceptions: -; -LEAF_ENTRY JIT_MemSet, _TEXT - test r8, r8 ; check if count is zero - jz Exit_MemSet ; if zero, no bytes to set - - cmp byte ptr [rcx], 0 ; check dest for null - - jmp memset ; forward to the CRT implementation - -Exit_MemSet: - ret - -LEAF_END_MARKED JIT_MemSet, _TEXT - -; void JIT_MemCpy(void* dest, const void* src, size_t count) -; -; Purpose: -; Copies the values of "count" bytes from the location pointed to -; by "src" to the memory block pointed by "dest". -; -; Entry: -; RCX: void* dest - Pointer to the destination array where content is to be copied. -; RDX: const void* src - Pointer to the source of the data to be copied. -; R8: size_t count - Number of bytes to copy. -; -; Exit: -; -; Uses: -; -; Exceptions: -; -LEAF_ENTRY JIT_MemCpy, _TEXT - test r8, r8 ; check if count is zero - jz Exit_MemCpy ; if zero, no bytes to copy - - cmp byte ptr [rcx], 0 ; check dest for null - cmp byte ptr [rdx], 0 ; check src for null - - ; Use memmove to handle overlapping buffers for better - ; compatibility with .NET Framework. Needing to handle - ; overlapping buffers in cpblk is undefined by the spec. - jmp memmove ; forward to the CRT implementation - -Exit_MemCpy: - ret - -LEAF_END_MARKED JIT_MemCpy, _TEXT - end diff --git a/src/coreclr/vm/amd64/crthelpers.S b/src/coreclr/vm/amd64/crthelpers.S deleted file mode 100644 index 82219e574092da..00000000000000 --- a/src/coreclr/vm/amd64/crthelpers.S +++ /dev/null @@ -1,74 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -.intel_syntax noprefix -#include "unixasmmacros.inc" -#include "asmconstants.h" - -// JIT_MemSet/JIT_MemCpy -// -// It is IMPORTANT that the exception handling code is able to find these guys -// on the stack, but on non-windows platforms we can just defer to the platform -// implementation. -// - -// void JIT_MemSet(void* dest, int c, size_t count) -// -// Purpose: -// Sets the first "count" bytes of the block of memory pointed byte -// "dest" to the specified value (interpreted as an unsigned char). -// -// Entry: -// RDI: void* dest - Pointer to the block of memory to fill. -// RSI: int c - Value to be set. -// RDX: size_t count - Number of bytes to be set to the value. -// -// Exit: -// -// Uses: -// -// Exceptions: -// -LEAF_ENTRY JIT_MemSet, _TEXT - test rdx, rdx // check if count is zero - jz Exit_MemSet // if zero, no bytes to set - - cmp byte ptr [rdi], 0 // check dest for null - - jmp C_PLTFUNC(memset) // forward to the CRT implementation - -Exit_MemSet: - ret - -LEAF_END_MARKED JIT_MemSet, _TEXT - -// void JIT_MemCpy(void* dest, const void* src, size_t count) -// -// Purpose: -// Copies the values of "count" bytes from the location pointed to -// by "src" to the memory block pointed by "dest". -// -// Entry: -// RDI: void* dest - Pointer to the destination array where content is to be copied. -// RSI: const void* src - Pointer to the source of the data to be copied. -// RDX: size_t count - Number of bytes to copy. -// -// Exit: -// -// Uses: -// -// Exceptions: -// -LEAF_ENTRY JIT_MemCpy, _TEXT - test rdx, rdx // check if count is zero - jz Exit_MemCpy // if zero, no bytes to set - - cmp byte ptr [rdi], 0 // check dest for null - cmp byte ptr [rsi], 0 // check src for null - - jmp C_PLTFUNC(memcpy) // forward to the CRT implementation - -Exit_MemCpy: - ret - -LEAF_END_MARKED JIT_MemCpy, _TEXT diff --git a/src/coreclr/vm/appdomain.cpp b/src/coreclr/vm/appdomain.cpp index feafd1f8abad6d..bb5d3d17e00534 100644 --- a/src/coreclr/vm/appdomain.cpp +++ b/src/coreclr/vm/appdomain.cpp @@ -1352,7 +1352,7 @@ void SystemDomain::LoadBaseSystemClasses() // further loading of nonprimitive types may need casting support. // initialize cast cache here. CastCache::Initialize(); - ECall::PopulateManagedCastHelpers(); + ECall::PopulateManagedHelpers(); // used by IsImplicitInterfaceOfSZArray CoreLibBinder::GetClass(CLASS__IENUMERABLEGENERIC); diff --git a/src/coreclr/vm/arm/crthelpers.S b/src/coreclr/vm/arm/crthelpers.S deleted file mode 100644 index db0ed192c4d60f..00000000000000 --- a/src/coreclr/vm/arm/crthelpers.S +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// *********************************************************************** -// File: crthelpers.S -// -// *********************************************************************** - -#include "unixasmmacros.inc" -#include "asmconstants.h" - -.syntax unified -.thumb - -// JIT_MemSet/JIT_MemCpy -// -// It is IMPORANT that the exception handling code is able to find these guys -// on the stack, but to keep them from being tailcalled by VC++ we need to turn -// off optimization and it ends up being a wasteful implementation. -// -// Hence these assembly helpers. -// -//EXTERN_C void __stdcall JIT_MemSet(void* _dest, int c, size_t count) -LEAF_ENTRY JIT_MemSet, _TEXT - - cmp r2, #0 - it eq - bxeq lr - - ldrb r3, [r0] - - b C_PLTFUNC(memset) - -LEAF_END_MARKED JIT_MemSet, _TEXT - - -//EXTERN_C void __stdcall JIT_MemCpy(void* _dest, const void *_src, size_t count) -LEAF_ENTRY JIT_MemCpy, _TEXT -// - - cmp r2, #0 - it eq - bxeq lr - - ldrb r3, [r0] - ldrb r3, [r1] - - b C_PLTFUNC(memcpy) - -LEAF_END_MARKED JIT_MemCpy, _TEXT - diff --git a/src/coreclr/vm/arm64/crthelpers.S b/src/coreclr/vm/arm64/crthelpers.S deleted file mode 100644 index e123fc82808d16..00000000000000 --- a/src/coreclr/vm/arm64/crthelpers.S +++ /dev/null @@ -1,33 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include "unixasmmacros.inc" - -// JIT_MemSet/JIT_MemCpy -// -// It is IMPORTANT that the exception handling code is able to find these guys -// on the stack, but on non-windows platforms we can just defer to the platform -// implementation. -// -LEAF_ENTRY JIT_MemSet, _TEXT - cbz x2, LOCAL_LABEL(JIT_MemSet_ret) - - ldrb wzr, [x0] - - b C_PLTFUNC(memset) - -LOCAL_LABEL(JIT_MemSet_ret): - ret lr -LEAF_END_MARKED JIT_MemSet, _TEXT - -LEAF_ENTRY JIT_MemCpy, _TEXT - cbz x2, LOCAL_LABEL(JIT_MemCpy_ret) - - ldrb wzr, [x0] - ldrb wzr, [x1] - - b C_PLTFUNC(memcpy) - -LOCAL_LABEL(JIT_MemCpy_ret): - ret lr -LEAF_END_MARKED JIT_MemCpy, _TEXT diff --git a/src/coreclr/vm/arm64/crthelpers.asm b/src/coreclr/vm/arm64/crthelpers.asm deleted file mode 100644 index d4d13351365c95..00000000000000 --- a/src/coreclr/vm/arm64/crthelpers.asm +++ /dev/null @@ -1,81 +0,0 @@ -; Licensed to the .NET Foundation under one or more agreements. -; The .NET Foundation licenses this file to you under the MIT license. - -#include "ksarm64.h" -#include "asmconstants.h" -#include "asmmacros.h" - - IMPORT memset - IMPORT memmove - -; JIT_MemSet/JIT_MemCpy -; -; It is IMPORTANT that the exception handling code is able to find these guys -; on the stack, but on windows platforms we can just defer to the platform -; implementation. -; - -; void JIT_MemSet(void* dest, int c, size_t count) -; -; Purpose: -; Sets the first "count" bytes of the block of memory pointed byte -; "dest" to the specified value (interpreted as an unsigned char). -; -; Entry: -; RCX: void* dest - Pointer to the block of memory to fill. -; RDX: int c - Value to be set. -; R8: size_t count - Number of bytes to be set to the value. -; -; Exit: -; -; Uses: -; -; Exceptions: -; - - TEXTAREA - - LEAF_ENTRY JIT_MemSet - cbz x2, JIT_MemSet_ret ; check if count is zero, no bytes to set - - ldrb wzr, [x0] ; check dest for null - - b memset ; forward to the CRT implementation - -JIT_MemSet_ret - ret lr - - LEAF_END_MARKED JIT_MemSet - -; void JIT_MemCpy(void* dest, const void* src, size_t count) -; -; Purpose: -; Copies the values of "count" bytes from the location pointed to -; by "src" to the memory block pointed by "dest". -; -; Entry: -; RCX: void* dest - Pointer to the destination array where content is to be copied. -; RDX: const void* src - Pointer to the source of the data to be copied. -; R8: size_t count - Number of bytes to copy. -; -; Exit: -; -; Uses: -; -; Exceptions: -; - LEAF_ENTRY JIT_MemCpy - cbz x2, JIT_MemCpy_ret ; check if count is zero, no bytes to set - - ldrb wzr, [x0] ; check dest for null - ldrb wzr, [x1] ; check src for null - - b memmove ; forward to the CRT implementation - -JIT_MemCpy_ret - ret lr - - LEAF_END_MARKED JIT_MemCpy - -; Must be at very end of file - END diff --git a/src/coreclr/vm/corelib.h b/src/coreclr/vm/corelib.h index bd4a2090166522..8421c0984ba716 100644 --- a/src/coreclr/vm/corelib.h +++ b/src/coreclr/vm/corelib.h @@ -638,6 +638,8 @@ DEFINE_METHOD(RUNTIME_HELPERS, ENUM_COMPARE_TO, EnumCompareTo, NoSig DEFINE_METHOD(RUNTIME_HELPERS, ALLOC_TAILCALL_ARG_BUFFER, AllocTailCallArgBuffer, SM_Int_IntPtr_RetIntPtr) DEFINE_METHOD(RUNTIME_HELPERS, GET_TAILCALL_INFO, GetTailCallInfo, NoSig) DEFINE_METHOD(RUNTIME_HELPERS, DISPATCH_TAILCALLS, DispatchTailCalls, NoSig) +DEFINE_METHOD(RUNTIME_HELPERS, MEMSET, MemSet, SM_RefByte_Byte_UIntPtr_RetVoid) +DEFINE_METHOD(RUNTIME_HELPERS, MEMCOPY, MemCopy, SM_RefByte_RefByte_UIntPtr_RetVoid) DEFINE_CLASS(UNSAFE, CompilerServices, Unsafe) DEFINE_METHOD(UNSAFE, AS_POINTER, AsPointer, NoSig) diff --git a/src/coreclr/vm/ecall.cpp b/src/coreclr/vm/ecall.cpp index 37ac50d124f6f6..6a93b575d9f0ca 100644 --- a/src/coreclr/vm/ecall.cpp +++ b/src/coreclr/vm/ecall.cpp @@ -96,7 +96,7 @@ void ECall::PopulateManagedStringConstructors() INDEBUG(fInitialized = true); } -void ECall::PopulateManagedCastHelpers() +void ECall::PopulateManagedHelpers() { STANDARD_VM_CONTRACT; @@ -144,6 +144,14 @@ void ECall::PopulateManagedCastHelpers() pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__CASTHELPERS__LDELEMAREF)); pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_LDELEMA_REF, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__RUNTIME_HELPERS__MEMSET)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_MEMSET, pDest); + + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__RUNTIME_HELPERS__MEMCOPY)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_MEMCPY, pDest); } static CrstStatic gFCallLock; diff --git a/src/coreclr/vm/ecall.h b/src/coreclr/vm/ecall.h index bc9d63ae467137..792eea633e8f7a 100644 --- a/src/coreclr/vm/ecall.h +++ b/src/coreclr/vm/ecall.h @@ -94,7 +94,7 @@ class ECall static void PopulateManagedStringConstructors(); - static void PopulateManagedCastHelpers(); + static void PopulateManagedHelpers(); #ifdef DACCESS_COMPILE // Enumerates all gFCallMethods for minidumps. diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index e723e087d57f4c..e4aa0b044de1e7 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6292,9 +6292,6 @@ EXTERN_C void JIT_StackProbe_End(); #ifdef FEATURE_EH_FUNCLETS #ifndef TARGET_X86 -EXTERN_C void JIT_MemSet_End(); -EXTERN_C void JIT_MemCpy_End(); - EXTERN_C void JIT_WriteBarrier_End(); EXTERN_C void JIT_CheckedWriteBarrier_End(); EXTERN_C void JIT_ByRefWriteBarrier_End(); @@ -6345,9 +6342,6 @@ bool IsIPInMarkedJitHelper(UINT_PTR uControlPc) if (GetEEFuncEntryPoint(name) <= uControlPc && uControlPc < GetEEFuncEntryPoint(name##_End)) return true; #ifndef TARGET_X86 - CHECK_RANGE(JIT_MemSet) - CHECK_RANGE(JIT_MemCpy) - CHECK_RANGE(JIT_WriteBarrier) CHECK_RANGE(JIT_CheckedWriteBarrier) CHECK_RANGE(JIT_ByRefWriteBarrier) diff --git a/src/coreclr/vm/exceptionhandling.cpp b/src/coreclr/vm/exceptionhandling.cpp index 8c10895a4f12ea..a00edf9a59a012 100644 --- a/src/coreclr/vm/exceptionhandling.cpp +++ b/src/coreclr/vm/exceptionhandling.cpp @@ -5438,7 +5438,7 @@ BOOL HandleHardwareException(PAL_SEHException* ex) if (ex->GetExceptionRecord()->ExceptionCode != STATUS_BREAKPOINT && ex->GetExceptionRecord()->ExceptionCode != STATUS_SINGLE_STEP) { // A hardware exception is handled only if it happened in a jitted code or - // in one of the JIT helper functions (JIT_MemSet, ...) + // in one of the JIT helper functions PCODE controlPc = GetIP(ex->GetContextRecord()); if (ExecutionManager::IsManagedCode(controlPc) && IsGcMarker(ex->GetContextRecord(), ex->GetExceptionRecord())) { diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 20c8321a7c4960..3a38c7c8f1d11d 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -10680,7 +10680,9 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc ftnNum, /* IN */ dynamicFtnNum == DYNAMIC_CORINFO_HELP_CHKCASTCLASS_SPECIAL || dynamicFtnNum == DYNAMIC_CORINFO_HELP_UNBOX || dynamicFtnNum == DYNAMIC_CORINFO_HELP_ARRADDR_ST || - dynamicFtnNum == DYNAMIC_CORINFO_HELP_LDELEMA_REF) + dynamicFtnNum == DYNAMIC_CORINFO_HELP_LDELEMA_REF || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMSET || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMCPY) { Precode* pPrecode = Precode::GetPrecodeFromEntryPoint((PCODE)hlpDynamicFuncTable[dynamicFtnNum].pfnHelper); _ASSERTE(pPrecode->GetType() == PRECODE_FIXUP); diff --git a/src/coreclr/vm/jitinterface.h b/src/coreclr/vm/jitinterface.h index 1708a05df5e314..9ce9a622213c88 100644 --- a/src/coreclr/vm/jitinterface.h +++ b/src/coreclr/vm/jitinterface.h @@ -400,9 +400,6 @@ extern "C" #endif // TARGET_AMD64 || TARGET_ARM - void STDCALL JIT_MemSet(void *dest, int c, SIZE_T count); - void STDCALL JIT_MemCpy(void *dest, const void *src, SIZE_T count); - void STDMETHODCALLTYPE JIT_ProfilerEnterLeaveTailcallStub(UINT_PTR ProfilerHandle); #if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) void STDCALL JIT_StackProbe(); diff --git a/src/coreclr/vm/loongarch64/crthelpers.S b/src/coreclr/vm/loongarch64/crthelpers.S deleted file mode 100644 index 88fd21938fdaa2..00000000000000 --- a/src/coreclr/vm/loongarch64/crthelpers.S +++ /dev/null @@ -1,37 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include "unixasmmacros.inc" - -// JIT_MemSet/JIT_MemCpy -// -// It is IMPORTANT that the exception handling code is able to find these guys -// on the stack, but on non-windows platforms we can just defer to the platform -// implementation. -// -LEAF_ENTRY JIT_MemSet, _TEXT - beq $a2, $zero, LOCAL_LABEL(JIT_MemSet_ret) - - ld.b $zero, $a0, 0 //Is this really needed ? - - b memset - -LOCAL_LABEL(JIT_MemSet_ret): - jirl $r0, $ra, 0 - -////NOTO: Here must use LEAF_END_MARKED! not LEAF_END !!! -LEAF_END_MARKED JIT_MemSet, _TEXT - -LEAF_ENTRY JIT_MemCpy, _TEXT - beq $a2, $zero, LOCAL_LABEL(JIT_MemCpy_ret) - - ld.b $zero, $a0, 0 - ld.b $zero, $a1, 0 //Is this really needed ? - - b memcpy - -LOCAL_LABEL(JIT_MemCpy_ret): - jirl $r0, $ra, 0 - -////NOTO: Here must use LEAF_END_MARKED! not LEAF_END !!! -LEAF_END_MARKED JIT_MemCpy, _TEXT diff --git a/src/coreclr/vm/metasig.h b/src/coreclr/vm/metasig.h index 45cb5700db5293..ef8ac7943fd7cc 100644 --- a/src/coreclr/vm/metasig.h +++ b/src/coreclr/vm/metasig.h @@ -237,6 +237,8 @@ DEFINE_METASIG(SM(PtrSByt_RetInt, P(B), i)) DEFINE_METASIG(SM(IntPtr_RetIntPtr, I, I)) DEFINE_METASIG(SM(UIntPtr_RetIntPtr, U, I)) DEFINE_METASIG(SM(PtrByte_PtrByte_Int_RetVoid, P(b) P(b) i, v)) +DEFINE_METASIG(SM(RefByte_RefByte_UIntPtr_RetVoid, r(b) r(b) U, v)) +DEFINE_METASIG(SM(RefByte_Byte_UIntPtr_RetVoid, r(b) b U, v)) DEFINE_METASIG(SM(PtrVoid_Byte_UInt_RetVoid, P(v) b K, v)) DEFINE_METASIG(SM(RefObj_IntPtr_RetVoid, r(j) I, v)) DEFINE_METASIG(SM(RefObj_RefIntPtr_RetVoid, r(j) r(I), v)) diff --git a/src/coreclr/vm/riscv64/crthelpers.S b/src/coreclr/vm/riscv64/crthelpers.S deleted file mode 100644 index 3151387b3cafd3..00000000000000 --- a/src/coreclr/vm/riscv64/crthelpers.S +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include "unixasmmacros.inc" - -// JIT_MemSet/JIT_MemCpy -// -// It is IMPORTANT that the exception handling code is able to find these guys -// on the stack, but on non-windows platforms we can just defer to the platform -// implementation. -// -LEAF_ENTRY JIT_MemSet, _TEXT - beq a2, zero, LOCAL_LABEL(JIT_MemSet_ret) - - lb zero, 0(a0) // Is this really needed ? - - tail memset - -LOCAL_LABEL(JIT_MemSet_ret): - ret -LEAF_END_MARKED JIT_MemSet, _TEXT - -////NOTE: Here must use LEAF_END_MARKED! not LEAF_END !!! -LEAF_ENTRY JIT_MemCpy, _TEXT - beq a2, zero, LOCAL_LABEL(JIT_MemCpy_ret) - - lb zero, 0(a0) - lb zero, 0(a1) // Is this really needed ? - - tail memcpy - -LOCAL_LABEL(JIT_MemCpy_ret): - ret - -////NOTE: Here must use LEAF_END_MARKED! not LEAF_END !!! -LEAF_END_MARKED JIT_MemCpy, _TEXT diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs index cf213590851497..f8a610f8729d12 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs @@ -129,5 +129,26 @@ internal static bool IsPrimitiveType(this CorElementType et) [Intrinsic] internal static bool IsKnownConstant(int t) => false; #pragma warning restore IDE0060 + + private static unsafe void MemSet(ref byte dest, byte value, nuint size) + { + if (size > 0) + { + // Implicit nullcheck + _ = Unsafe.ReadUnaligned(ref dest); + SpanHelpers.Fill(ref dest, size, value); + } + } + + private static unsafe void MemCopy(ref byte dest, ref byte src, nuint size) + { + if (size > 0) + { + // Implicit nullchecks + _ = Unsafe.ReadUnaligned(ref dest); + _ = Unsafe.ReadUnaligned(ref src); + Buffer.Memmove(ref dest, ref src, size); + } + } } } From 32ec60e0f6b4c945da27c6ce13b64b977eb38107 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 18 Feb 2024 11:10:37 +0100 Subject: [PATCH 02/40] Add a test --- .../System.Private.CoreLib/src/System/Span.cs | 14 +---- .../src/System/SpanHelpers.cs | 5 +- .../JIT/opt/Structs/MemsetMemcpyNullref.cs | 57 +++++++++++++++++++ .../opt/Structs/MemsetMemcpyNullref.csproj | 10 ++++ 4 files changed, 71 insertions(+), 15 deletions(-) create mode 100644 src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs create mode 100644 src/tests/JIT/opt/Structs/MemsetMemcpyNullref.csproj diff --git a/src/libraries/System.Private.CoreLib/src/System/Span.cs b/src/libraries/System.Private.CoreLib/src/System/Span.cs index aaf3763d81b755..1c66a341b0fde1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Span.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Span.cs @@ -300,19 +300,7 @@ public unsafe void Clear() [MethodImpl(MethodImplOptions.AggressiveInlining)] public unsafe void Fill(T value) { - if (sizeof(T) == 1) - { - // Special-case single-byte types like byte / sbyte / bool. - // The runtime eventually calls memset, which can efficiently support large buffers. - // We don't need to check IsReferenceOrContainsReferences because no references - // can ever be stored in types this small. - Unsafe.InitBlockUnaligned(ref Unsafe.As(ref _reference), *(byte*)&value, (uint)_length); - } - else - { - // Call our optimized workhorse method for all other types. - SpanHelpers.Fill(ref _reference, (uint)_length, value); - } + SpanHelpers.Fill(ref _reference, (uint)_length, value); } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs index a7e5f48d63180d..95c94a42a08154 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs @@ -18,11 +18,12 @@ public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength) return; #if TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 - // The exact matrix on when ZeroMemory is faster than InitBlockUnaligned is very complex. The factors to consider include + // The exact matrix on when ZeroMemory is faster than SpanHelpers.Fill is very complex. The factors to consider include // type of hardware and memory alignment. This threshold was chosen as a good balance across different configurations. if (byteLength > 768) goto PInvoke; - Unsafe.InitBlockUnaligned(ref b, 0, (uint)byteLength); + + SpanHelpers.Fill(ref b, byteLength, (byte)0); return; #else // TODO: Optimize other platforms to be on par with AMD64 CoreCLR diff --git a/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs b/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs new file mode 100644 index 00000000000000..1c958a99e4aada --- /dev/null +++ b/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs @@ -0,0 +1,57 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using Xunit; + +public unsafe class MemsetMemcpyNullref +{ + [Fact] + public static void MemsetMemcpyThrowNullRefonNull() + { + Assert.Throws(() => MemoryInit(null)); + Assert.Throws(() => MemoryCopy(null, null)); + Assert.Throws(() => + { + // Check when only src is null + HugeStruct hs = default; + MemoryCopy(&hs, null); + }); + Assert.Throws(() => + { + // Check when only dst is null + HugeStruct hs = default; + MemoryCopy(null, &hs); + }); + + // Check various lengths + uint[] lengths = [1, 10, 100, 1000, 10000, 100000, 1000000]; + foreach (uint length in lengths) + { + Assert.Throws(() => MemoryInitByref(ref Unsafe.NullRef(), length)); + Assert.Throws(() => MemoryCopyByref(ref Unsafe.NullRef(), ref Unsafe.NullRef(), length)); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void MemoryCopy(HugeStruct* dst, HugeStruct* src) => + *dst = *src; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void MemoryCopyByref(ref byte dst, ref byte src, uint len) => + Unsafe.CopyBlockUnaligned(ref dst, ref src, len); + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void MemoryInit(HugeStruct* dst) => + *dst = default; + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void MemoryInitByref(ref byte dst, uint len) => + Unsafe.InitBlockUnaligned(ref dst, 42, len); + + private struct HugeStruct + { + public fixed byte Data[20_000]; + } +} diff --git a/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.csproj b/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.csproj new file mode 100644 index 00000000000000..23d7b90be5361c --- /dev/null +++ b/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.csproj @@ -0,0 +1,10 @@ + + + true + None + True + + + + + From 0d4f4f4b95b6e5e79b92c5a3dc04cb3821c639ef Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 18 Feb 2024 12:16:23 +0100 Subject: [PATCH 03/40] Address feedback --- .../Runtime/CompilerHelpers/MemoryHelpers.cs | 36 ------------------- .../src/System.Private.CoreLib.csproj | 1 - .../Common/TypeSystem/IL/HelperExtensions.cs | 13 +++++-- .../ILCompiler.Compiler/Compiler/JitHelper.cs | 4 +-- .../IL/ILImporter.Scanner.cs | 3 ++ 5 files changed, 15 insertions(+), 42 deletions(-) delete mode 100644 src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MemoryHelpers.cs diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MemoryHelpers.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MemoryHelpers.cs deleted file mode 100644 index c87063ab114e46..00000000000000 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/Internal/Runtime/CompilerHelpers/MemoryHelpers.cs +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System; -using System.Runtime; -using System.Runtime.CompilerServices; - -namespace Internal.Runtime.CompilerHelpers -{ - /// - /// These methods are used to implement memcpy and memset intrinsics with null checks. - /// - internal static class MemoryHelpers - { - private static unsafe void MemSet(ref byte dest, byte value, nuint size) - { - if (size > 0) - { - // Implicit nullcheck - _ = Unsafe.ReadUnaligned(ref dest); - SpanHelpers.Fill(ref dest, size, value); - } - } - - private static unsafe void MemCopy(ref byte dest, ref byte src, nuint size) - { - if (size > 0) - { - // Implicit nullchecks - _ = Unsafe.ReadUnaligned(ref dest); - _ = Unsafe.ReadUnaligned(ref src); - Buffer.Memmove(ref dest, ref src, size); - } - } - } -} diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj b/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj index d6e8c3a7e90274..4ca91458c70e71 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System.Private.CoreLib.csproj @@ -100,7 +100,6 @@ - diff --git a/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs b/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs index 4ccaff2d6dd9f0..802377523134e2 100644 --- a/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs +++ b/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs @@ -12,17 +12,24 @@ namespace Internal.IL { internal static class HelperExtensions { - private const string HelperTypesNamespace = "Internal.Runtime.CompilerHelpers"; + private static string GetTypeNamespace(string type) + { + return type switch + { + "RuntimeHelpers" => "System.Runtime.CompilerServices", + _ => "Internal.Runtime.CompilerHelpers" + }; + } public static MetadataType GetHelperType(this TypeSystemContext context, string name) { - MetadataType helperType = context.SystemModule.GetKnownType(HelperTypesNamespace, name); + MetadataType helperType = context.SystemModule.GetKnownType(GetTypeNamespace(name), name); return helperType; } public static MetadataType GetOptionalHelperType(this TypeSystemContext context, string name) { - MetadataType helperType = context.SystemModule.GetType(HelperTypesNamespace, name, throwIfNotFound: false); + MetadataType helperType = context.SystemModule.GetType(GetTypeNamespace(name), name, throwIfNotFound: false); return helperType; } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index c55dc58175b05f..004c09ae745fc6 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -134,10 +134,10 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, break; case ReadyToRunHelper.MemCpy: - methodDesc = context.GetHelperEntryPoint("MemoryHelpers", "MemCopy"); + methodDesc = context.GetHelperEntryPoint("RuntimeHelpers", "MemCopy"); break; case ReadyToRunHelper.MemSet: - methodDesc = context.GetHelperEntryPoint("MemoryHelpers", "MemSet"); + methodDesc = context.GetHelperEntryPoint("RuntimeHelpers", "MemSet"); break; case ReadyToRunHelper.GetRuntimeTypeHandle: diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs index 7ae867370bdf40..f90cfdbaa83574 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs @@ -170,6 +170,9 @@ public DependencyList Import() } + _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.MemCpy), "Can be used by multiple IL opcodes implicitly"); + _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.MemSet), "Can be used by multiple IL opcodes implicitly"); + FindBasicBlocks(); ImportBasicBlocks(); From 321bb14c4aa97b77c338576bdc59e3c6482b69de Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 18 Feb 2024 17:33:32 +0100 Subject: [PATCH 04/40] Address feedback --- src/coreclr/jit/codegenarm64.cpp | 5 +---- .../tools/Common/TypeSystem/IL/HelperExtensions.cs | 13 +++---------- .../aot/ILCompiler.Compiler/Compiler/JitHelper.cs | 4 ++-- src/tests/issues.targets | 3 +++ 4 files changed, 9 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 3883b20118ad8a..aa7fc5eeb7bdb4 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5108,10 +5108,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, callTarget = callTargetReg; - // adrp + add with relocations will be emitted - GetEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTarget, - (ssize_t)pAddr DEBUGARG((size_t)compiler->eeFindHelper(helper)) - DEBUGARG(GTF_ICON_METHOD_HDL)); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, callTarget, (ssize_t)pAddr); GetEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, callTarget, callTarget); callType = emitter::EC_INDIR_R; } diff --git a/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs b/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs index 802377523134e2..4ccaff2d6dd9f0 100644 --- a/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs +++ b/src/coreclr/tools/Common/TypeSystem/IL/HelperExtensions.cs @@ -12,24 +12,17 @@ namespace Internal.IL { internal static class HelperExtensions { - private static string GetTypeNamespace(string type) - { - return type switch - { - "RuntimeHelpers" => "System.Runtime.CompilerServices", - _ => "Internal.Runtime.CompilerHelpers" - }; - } + private const string HelperTypesNamespace = "Internal.Runtime.CompilerHelpers"; public static MetadataType GetHelperType(this TypeSystemContext context, string name) { - MetadataType helperType = context.SystemModule.GetKnownType(GetTypeNamespace(name), name); + MetadataType helperType = context.SystemModule.GetKnownType(HelperTypesNamespace, name); return helperType; } public static MetadataType GetOptionalHelperType(this TypeSystemContext context, string name) { - MetadataType helperType = context.SystemModule.GetType(GetTypeNamespace(name), name, throwIfNotFound: false); + MetadataType helperType = context.SystemModule.GetType(HelperTypesNamespace, name, throwIfNotFound: false); return helperType; } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index 004c09ae745fc6..8f01a08eea6ee3 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -134,10 +134,10 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, break; case ReadyToRunHelper.MemCpy: - methodDesc = context.GetHelperEntryPoint("RuntimeHelpers", "MemCopy"); + methodDesc = context.SystemModule.GetKnownType("System.Runtime", "RuntimeHelpers").GetKnownMethod("MemCopy", null); break; case ReadyToRunHelper.MemSet: - methodDesc = context.GetHelperEntryPoint("RuntimeHelpers", "MemSet"); + methodDesc = context.SystemModule.GetKnownType("System.Runtime", "RuntimeHelpers").GetKnownMethod("MemSet", null); break; case ReadyToRunHelper.GetRuntimeTypeHandle: diff --git a/src/tests/issues.targets b/src/tests/issues.targets index 60f4dd83b52c09..71263b60902e68 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -1873,6 +1873,9 @@ https://github.com/dotnet/runtime/issues/90374 + + https://github.com/dotnet/runtime/issues/98628 + From 4904b4b6bcf2d03358feeaec3eed5630744d785f Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 18 Feb 2024 17:37:24 +0100 Subject: [PATCH 05/40] Address feedback --- .../tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index 8f01a08eea6ee3..572ff50c4d5791 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -134,10 +134,10 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, break; case ReadyToRunHelper.MemCpy: - methodDesc = context.SystemModule.GetKnownType("System.Runtime", "RuntimeHelpers").GetKnownMethod("MemCopy", null); + methodDesc = context.SystemModule.GetKnownType("System.Runtime.CompilerServices", "RuntimeHelpers").GetKnownMethod("MemCopy", null); break; case ReadyToRunHelper.MemSet: - methodDesc = context.SystemModule.GetKnownType("System.Runtime", "RuntimeHelpers").GetKnownMethod("MemSet", null); + methodDesc = context.SystemModule.GetKnownType("System.Runtime.CompilerServices", "RuntimeHelpers").GetKnownMethod("MemSet", null); break; case ReadyToRunHelper.GetRuntimeTypeHandle: From 1573925d846d82f468efb9babc88d4f18b6bc12d Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 18 Feb 2024 18:32:36 +0100 Subject: [PATCH 06/40] Address feedback --- .../tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs | 4 ++-- .../tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs | 3 --- .../src/System/Runtime/CompilerServices/RuntimeHelpers.cs | 6 ++++++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index 572ff50c4d5791..28d8442442226e 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -134,10 +134,10 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, break; case ReadyToRunHelper.MemCpy: - methodDesc = context.SystemModule.GetKnownType("System.Runtime.CompilerServices", "RuntimeHelpers").GetKnownMethod("MemCopy", null); + mangledName = "RhRuntimeHelpers_MemCopy"; break; case ReadyToRunHelper.MemSet: - methodDesc = context.SystemModule.GetKnownType("System.Runtime.CompilerServices", "RuntimeHelpers").GetKnownMethod("MemSet", null); + mangledName = "RhRuntimeHelpers_MemSet"; break; case ReadyToRunHelper.GetRuntimeTypeHandle: diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs index f90cfdbaa83574..7ae867370bdf40 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/IL/ILImporter.Scanner.cs @@ -170,9 +170,6 @@ public DependencyList Import() } - _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.MemCpy), "Can be used by multiple IL opcodes implicitly"); - _dependencies.Add(GetHelperEntrypoint(ReadyToRunHelper.MemSet), "Can be used by multiple IL opcodes implicitly"); - FindBasicBlocks(); ImportBasicBlocks(); diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs index f8a610f8729d12..c62a72ac491891 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs @@ -130,6 +130,9 @@ internal static bool IsPrimitiveType(this CorElementType et) internal static bool IsKnownConstant(int t) => false; #pragma warning restore IDE0060 +#if NATIVEAOT + [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemSet")] +#endif private static unsafe void MemSet(ref byte dest, byte value, nuint size) { if (size > 0) @@ -140,6 +143,9 @@ private static unsafe void MemSet(ref byte dest, byte value, nuint size) } } +#if NATIVEAOT + [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemCopy")] +#endif private static unsafe void MemCopy(ref byte dest, ref byte src, nuint size) { if (size > 0) From cbcfaf428578f79152d76d496002bbdca2ef9ebd Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 18 Feb 2024 18:44:42 +0100 Subject: [PATCH 07/40] Copy Fill's impl --- .../src/System/SpanHelpers.cs | 361 ++++-------------- 1 file changed, 73 insertions(+), 288 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs index 95c94a42a08154..d4d9178c3ce3f6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; +using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -12,326 +13,110 @@ namespace System { internal static partial class SpanHelpers { - public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength) + public static void ClearWithoutReferences(ref byte b, nuint byteLength) { - if (byteLength == 0) - return; - -#if TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 - // The exact matrix on when ZeroMemory is faster than SpanHelpers.Fill is very complex. The factors to consider include - // type of hardware and memory alignment. This threshold was chosen as a good balance across different configurations. - if (byteLength > 768) - goto PInvoke; - - SpanHelpers.Fill(ref b, byteLength, (byte)0); - return; -#else - // TODO: Optimize other platforms to be on par with AMD64 CoreCLR - // Note: It's important that this switch handles lengths at least up to 22. - // See notes below near the main loop for why. - - // The switch will be very fast since it can be implemented using a jump - // table in assembly. See http://stackoverflow.com/a/449297/4077294 for more info. + if (!Vector.IsHardwareAccelerated) { goto CannotVectorize; } + if (byteLength > (nuint)Vector.Count) { goto CannotVectorize; } - switch (byteLength) + if (byteLength >= (uint)(Vector.Count)) { - case 1: - b = 0; - return; - case 2: - Unsafe.As(ref b) = 0; - return; - case 3: - Unsafe.As(ref b) = 0; - Unsafe.Add(ref b, 2) = 0; - return; - case 4: - Unsafe.As(ref b) = 0; - return; - case 5: - Unsafe.As(ref b) = 0; - Unsafe.Add(ref b, 4) = 0; - return; - case 6: - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - return; - case 7: - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.Add(ref b, 6) = 0; - return; - case 8: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - return; - case 9: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.Add(ref b, 8) = 0; - return; - case 10: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - return; - case 11: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.Add(ref b, 10) = 0; - return; - case 12: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - return; - case 13: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.Add(ref b, 12) = 0; - return; - case 14: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; - return; - case 15: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; - Unsafe.Add(ref b, 14) = 0; - return; - case 16: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - return; - case 17: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.Add(ref b, 16) = 0; - return; - case 18: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - return; - case 19: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - Unsafe.Add(ref b, 18) = 0; - return; - case 20: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - return; - case 21: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - Unsafe.Add(ref b, 20) = 0; - return; - case 22: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 20)) = 0; - return; - } + // We have enough data for at least one vectorized write. - // P/Invoke into the native version for large lengths - if (byteLength >= 512) goto PInvoke; + Vector vector = Vector.Zero; - nuint i = 0; // byte offset at which we're copying + nuint stopLoopAtOffset = byteLength & (nuint)(nint)(2 * (int)-Vector.Count); // intentional sign extension carries the negative bit + nuint offset = 0; - if (((nuint)Unsafe.AsPointer(ref b) & 3) != 0) - { - if (((nuint)Unsafe.AsPointer(ref b) & 1) != 0) + // Loop, writing 2 vectors at a time. + // Compare 'numElements' rather than 'stopLoopAtOffset' because we don't want a dependency + // on the very recently calculated 'stopLoopAtOffset' value. + + if (byteLength >= (uint)(2 * Vector.Count)) { - b = 0; - i += 1; - if (((nuint)Unsafe.AsPointer(ref b) & 2) != 0) - goto IntAligned; + do + { + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, offset), vector); + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, offset + (nuint)Vector.Count), vector); + offset += (uint)(2 * Vector.Count); + } while (offset < stopLoopAtOffset); } - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - i += 2; - } - IntAligned: + // At this point, if any data remains to be written, it's strictly less than + // 2 * sizeof(Vector) bytes. The loop above had us write an even number of vectors. + // If the total byte length instead involves us writing an odd number of vectors, write + // one additional vector now. The bit check below tells us if we're in an "odd vector + // count" situation. - // On 64-bit IntPtr.Size == 8, so we want to advance to the next 8-aligned address. If - // (int)b % 8 is 0, 5, 6, or 7, we will already have advanced by 0, 3, 2, or 1 - // bytes to the next aligned address (respectively), so do nothing. On the other hand, - // if it is 1, 2, 3, or 4 we will want to copy-and-advance another 4 bytes until - // we're aligned. - // The thing 1, 2, 3, and 4 have in common that the others don't is that if you - // subtract one from them, their 3rd lsb will not be set. Hence, the below check. - - if ((((nuint)Unsafe.AsPointer(ref b) - 1) & 4) == 0) - { - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - i += 4; - } - - nuint end = byteLength - 16; - byteLength -= i; // lower 4 bits of byteLength represent how many bytes are left *after* the unrolled loop + if ((byteLength & (nuint)Vector.Count) != 0) + { + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, offset), vector); + } - // We know due to the above switch-case that this loop will always run 1 iteration; max - // bytes we clear before checking is 23 (7 to align the pointers, 16 for 1 iteration) so - // the switch handles lengths 0-22. - Debug.Assert(end >= 7 && i <= end); + // It's possible that some small buffer remains to be populated - something that won't + // fit an entire vector's worth of data. Instead of falling back to a loop, we'll write + // a vector at the very end of the buffer. This may involve overwriting previously + // populated data, which is fine since we're splatting the same value for all entries. + // There's no need to perform a length check here because we already performed this + // check before entering the vectorized code path. - // This is separated out into a different variable, so the i + 16 addition can be - // performed at the start of the pipeline and the loop condition does not have - // a dependency on the writes. - nuint counter; + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, byteLength - (nuint)Vector.Count), vector); - do - { - counter = i + 16; + // And we're done! - // This loop looks very costly since there appear to be a bunch of temporary values - // being created with the adds, but the jit (for x86 anyways) will convert each of - // these to use memory addressing operands. + return; + } - // So the only cost is a bit of code size, which is made up for by the fact that - // we save on writes to b. + CannotVectorize: -#if TARGET_64BIT - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 8)) = 0; -#else - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 4)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 8)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 12)) = 0; -#endif + // If we reached this point, we cannot vectorize this data, or there are too few + // elements for us to vectorize. Fall back to an unrolled loop. - i = counter; + nuint i = 0; - // See notes above for why this wasn't used instead - // i += 16; - } - while (counter <= end); + // Write 8 elements at a time - if ((byteLength & 8) != 0) + if (byteLength >= 8) { -#if TARGET_64BIT - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; -#else - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 4)) = 0; -#endif - i += 8; + nuint stopLoopAtOffset = byteLength & ~(nuint)7; + do + { + Unsafe.Add(ref b, (nint)i + 0) = 0; + Unsafe.Add(ref b, (nint)i + 1) = 0; + Unsafe.Add(ref b, (nint)i + 2) = 0; + Unsafe.Add(ref b, (nint)i + 3) = 0; + Unsafe.Add(ref b, (nint)i + 4) = 0; + Unsafe.Add(ref b, (nint)i + 5) = 0; + Unsafe.Add(ref b, (nint)i + 6) = 0; + Unsafe.Add(ref b, (nint)i + 7) = 0; + } while ((i += 8) < stopLoopAtOffset); } + + // Write next 4 elements if needed + if ((byteLength & 4) != 0) { - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; + Unsafe.Add(ref b, (nint)i + 0) = 0; + Unsafe.Add(ref b, (nint)i + 1) = 0; + Unsafe.Add(ref b, (nint)i + 2) = 0; + Unsafe.Add(ref b, (nint)i + 3) = 0; i += 4; } + + // Write next 2 elements if needed + if ((byteLength & 2) != 0) { - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; + Unsafe.Add(ref b, (nint)i + 0) = 0; + Unsafe.Add(ref b, (nint)i + 1) = 0; i += 2; } + + // Write final element if needed + if ((byteLength & 1) != 0) { - Unsafe.AddByteOffset(ref b, i) = 0; - // We're not using i after this, so not needed - // i += 1; + Unsafe.Add(ref b, (nint)i) = 0; } - - return; -#endif - - PInvoke: - Buffer._ZeroMemory(ref b, byteLength); } public static unsafe void ClearWithReferences(ref IntPtr ip, nuint pointerSizeLength) From 63c893cb216dd13654c98d3c4ab2bd758d787553 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 18 Feb 2024 21:10:51 +0100 Subject: [PATCH 08/40] Address feedback + clean up --- .../src/System/Buffer.Unix.cs | 1 + .../src/System/Buffer.Windows.cs | 1 + .../src/System/SpanHelpers.cs | 53 ++++++------------- src/tests/issues.targets | 2 +- 4 files changed, 19 insertions(+), 38 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs index 008bc9310a2417..edbe2fbb69f14d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs @@ -15,5 +15,6 @@ public static partial class Buffer #else private const nuint MemmoveNativeThreshold = 2048; #endif + internal const nuint ZeroMemoryNativeThreshold = 768; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs index 4dea08790b91a1..5a86ee780c14d1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs @@ -12,5 +12,6 @@ public static partial class Buffer #else private const nuint MemmoveNativeThreshold = 2048; #endif + internal const nuint ZeroMemoryNativeThreshold = 768; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs index d4d9178c3ce3f6..5486cba9ba6ccf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs @@ -15,28 +15,24 @@ internal static partial class SpanHelpers { public static void ClearWithoutReferences(ref byte b, nuint byteLength) { - if (!Vector.IsHardwareAccelerated) { goto CannotVectorize; } - if (byteLength > (nuint)Vector.Count) { goto CannotVectorize; } + if (byteLength > Buffer.ZeroMemoryNativeThreshold) + goto PInvoke; - if (byteLength >= (uint)(Vector.Count)) + if (Vector.IsHardwareAccelerated && byteLength >= (uint)(Vector.Count)) { // We have enough data for at least one vectorized write. - - Vector vector = Vector.Zero; - nuint stopLoopAtOffset = byteLength & (nuint)(nint)(2 * (int)-Vector.Count); // intentional sign extension carries the negative bit nuint offset = 0; // Loop, writing 2 vectors at a time. // Compare 'numElements' rather than 'stopLoopAtOffset' because we don't want a dependency // on the very recently calculated 'stopLoopAtOffset' value. - if (byteLength >= (uint)(2 * Vector.Count)) { do { - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, offset), vector); - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, offset + (nuint)Vector.Count), vector); + Vector.Zero.StoreUnsafe(ref b, offset); + Vector.Zero.StoreUnsafe(ref b, offset + (nuint)Vector.Count); offset += (uint)(2 * Vector.Count); } while (offset < stopLoopAtOffset); } @@ -46,10 +42,9 @@ public static void ClearWithoutReferences(ref byte b, nuint byteLength) // If the total byte length instead involves us writing an odd number of vectors, write // one additional vector now. The bit check below tells us if we're in an "odd vector // count" situation. - if ((byteLength & (nuint)Vector.Count) != 0) { - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, offset), vector); + Vector.Zero.StoreUnsafe(ref b, offset); } // It's possible that some small buffer remains to be populated - something that won't @@ -58,65 +53,49 @@ public static void ClearWithoutReferences(ref byte b, nuint byteLength) // populated data, which is fine since we're splatting the same value for all entries. // There's no need to perform a length check here because we already performed this // check before entering the vectorized code path. - - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, byteLength - (nuint)Vector.Count), vector); + Vector.Zero.StoreUnsafe(ref b, byteLength - (nuint)Vector.Count); // And we're done! - return; } - CannotVectorize: - - // If we reached this point, we cannot vectorize this data, or there are too few + // If we reached this point, we cannot vectorize this T, or there are too few // elements for us to vectorize. Fall back to an unrolled loop. - nuint i = 0; // Write 8 elements at a time - if (byteLength >= 8) { nuint stopLoopAtOffset = byteLength & ~(nuint)7; do { - Unsafe.Add(ref b, (nint)i + 0) = 0; - Unsafe.Add(ref b, (nint)i + 1) = 0; - Unsafe.Add(ref b, (nint)i + 2) = 0; - Unsafe.Add(ref b, (nint)i + 3) = 0; - Unsafe.Add(ref b, (nint)i + 4) = 0; - Unsafe.Add(ref b, (nint)i + 5) = 0; - Unsafe.Add(ref b, (nint)i + 6) = 0; - Unsafe.Add(ref b, (nint)i + 7) = 0; + Unsafe.AddByteOffset(ref Unsafe.As(ref b), (nint)i) = 0; + Unsafe.AddByteOffset(ref Unsafe.As(ref b), (nint)i + 4) = 0; } while ((i += 8) < stopLoopAtOffset); } // Write next 4 elements if needed - if ((byteLength & 4) != 0) { - Unsafe.Add(ref b, (nint)i + 0) = 0; - Unsafe.Add(ref b, (nint)i + 1) = 0; - Unsafe.Add(ref b, (nint)i + 2) = 0; - Unsafe.Add(ref b, (nint)i + 3) = 0; + Unsafe.AddByteOffset(ref Unsafe.As(ref b), (nint)i) = 0; i += 4; } // Write next 2 elements if needed - if ((byteLength & 2) != 0) { - Unsafe.Add(ref b, (nint)i + 0) = 0; - Unsafe.Add(ref b, (nint)i + 1) = 0; + Unsafe.AddByteOffset(ref Unsafe.As(ref b), (nint)i) = 0; i += 2; } // Write final element if needed - if ((byteLength & 1) != 0) { - Unsafe.Add(ref b, (nint)i) = 0; + Unsafe.AddByteOffset(ref b, (nint)i) = 0; } + + PInvoke: + Buffer._ZeroMemory(ref b, byteLength); } public static unsafe void ClearWithReferences(ref IntPtr ip, nuint pointerSizeLength) diff --git a/src/tests/issues.targets b/src/tests/issues.targets index 71263b60902e68..dc2d50cb40ead8 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -1873,7 +1873,7 @@ https://github.com/dotnet/runtime/issues/90374 - + https://github.com/dotnet/runtime/issues/98628 From 36e9c5f09ff2fc91ff3e7db52c8e26460215aa8c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 18 Feb 2024 21:16:43 +0100 Subject: [PATCH 09/40] Fix comments --- src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs index 5486cba9ba6ccf..66eda011187508 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs @@ -59,7 +59,7 @@ public static void ClearWithoutReferences(ref byte b, nuint byteLength) return; } - // If we reached this point, we cannot vectorize this T, or there are too few + // If we reached this point, we cannot vectorize this data, or there are too few // elements for us to vectorize. Fall back to an unrolled loop. nuint i = 0; @@ -69,6 +69,7 @@ public static void ClearWithoutReferences(ref byte b, nuint byteLength) nuint stopLoopAtOffset = byteLength & ~(nuint)7; do { + // JIT is expected to coalesce these stores into a single 8-byte store on 64-bit platforms Unsafe.AddByteOffset(ref Unsafe.As(ref b), (nint)i) = 0; Unsafe.AddByteOffset(ref Unsafe.As(ref b), (nint)i + 4) = 0; } while ((i += 8) < stopLoopAtOffset); From 4bc7578b2304b434446fc213f6e00dd6ca073dd6 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 18 Feb 2024 23:46:05 +0100 Subject: [PATCH 10/40] Fix regressions --- src/coreclr/jit/codegenarm64.cpp | 13 ++++++++++++- src/tests/issues.targets | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index aa7fc5eeb7bdb4..477d5ebc9b971b 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5108,7 +5108,18 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, callTarget = callTargetReg; - instGen_Set_Reg_To_Imm(EA_PTRSIZE, callTarget, (ssize_t)pAddr); + if (compiler->opts.compReloc) + { + // adrp + add with relocations will be emitted + GetEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTarget, + (ssize_t)pAddr DEBUGARG((size_t)compiler->eeFindHelper(helper)) + DEBUGARG(GTF_ICON_METHOD_HDL)); + } + else + { + instGen_Set_Reg_To_Imm(EA_PTRSIZE, callTarget, (ssize_t)pAddr); + } + GetEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, callTarget, callTarget); callType = emitter::EC_INDIR_R; } diff --git a/src/tests/issues.targets b/src/tests/issues.targets index dc2d50cb40ead8..8be6b601c25716 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -1873,7 +1873,7 @@ https://github.com/dotnet/runtime/issues/90374 - + https://github.com/dotnet/runtime/issues/98628 From 41584034f3ee2f152ccf24b20f77471bb692b525 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 19 Feb 2024 02:49:38 +0100 Subject: [PATCH 11/40] CI test --- .../src/System/Runtime/CompilerServices/RuntimeHelpers.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs index c62a72ac491891..b95fa47490b03e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs @@ -133,6 +133,7 @@ internal static bool IsPrimitiveType(this CorElementType et) #if NATIVEAOT [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemSet")] #endif + [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static unsafe void MemSet(ref byte dest, byte value, nuint size) { if (size > 0) @@ -146,6 +147,7 @@ private static unsafe void MemSet(ref byte dest, byte value, nuint size) #if NATIVEAOT [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemCopy")] #endif + [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static unsafe void MemCopy(ref byte dest, ref byte src, nuint size) { if (size > 0) From 4d7991b3ffec9de2bb364abfbccf75123101d86e Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Wed, 21 Feb 2024 03:35:21 +0100 Subject: [PATCH 12/40] Update src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs Co-authored-by: Jan Kotas --- src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs index 66eda011187508..6102f62921ed65 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs @@ -95,6 +95,8 @@ public static void ClearWithoutReferences(ref byte b, nuint byteLength) Unsafe.AddByteOffset(ref b, (nint)i) = 0; } + return; + PInvoke: Buffer._ZeroMemory(ref b, byteLength); } From a76a5900655f70b403c334609b07330fac1e4430 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 21 Feb 2024 03:48:33 +0100 Subject: [PATCH 13/40] Address feedback --- .../src/System/Buffer.Unix.cs | 3 ++- .../src/System/Buffer.Windows.cs | 3 ++- .../Runtime/CompilerServices/RuntimeHelpers.cs | 2 -- .../src/System/SpanHelpers.cs | 14 +++++++++----- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs index edbe2fbb69f14d..4a54e8698da41e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs @@ -15,6 +15,7 @@ public static partial class Buffer #else private const nuint MemmoveNativeThreshold = 2048; #endif - internal const nuint ZeroMemoryNativeThreshold = 768; + // TODO: Determine optimal value + internal const nuint ZeroMemoryNativeThreshold = 1024; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs index 5a86ee780c14d1..b0f0fe29b20680 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs @@ -12,6 +12,7 @@ public static partial class Buffer #else private const nuint MemmoveNativeThreshold = 2048; #endif - internal const nuint ZeroMemoryNativeThreshold = 768; + // TODO: Determine optimal value + internal const nuint ZeroMemoryNativeThreshold = 1024; } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs index b95fa47490b03e..c62a72ac491891 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs @@ -133,7 +133,6 @@ internal static bool IsPrimitiveType(this CorElementType et) #if NATIVEAOT [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemSet")] #endif - [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static unsafe void MemSet(ref byte dest, byte value, nuint size) { if (size > 0) @@ -147,7 +146,6 @@ private static unsafe void MemSet(ref byte dest, byte value, nuint size) #if NATIVEAOT [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemCopy")] #endif - [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static unsafe void MemCopy(ref byte dest, ref byte src, nuint size) { if (size > 0) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs index 6102f62921ed65..e833a59c006ec7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs @@ -31,6 +31,7 @@ public static void ClearWithoutReferences(ref byte b, nuint byteLength) { do { + Vector.Zero.StoreUnsafe(ref b, offset); Vector.Zero.StoreUnsafe(ref b, offset); Vector.Zero.StoreUnsafe(ref b, offset + (nuint)Vector.Count); offset += (uint)(2 * Vector.Count); @@ -69,23 +70,26 @@ public static void ClearWithoutReferences(ref byte b, nuint byteLength) nuint stopLoopAtOffset = byteLength & ~(nuint)7; do { - // JIT is expected to coalesce these stores into a single 8-byte store on 64-bit platforms - Unsafe.AddByteOffset(ref Unsafe.As(ref b), (nint)i) = 0; - Unsafe.AddByteOffset(ref Unsafe.As(ref b), (nint)i + 4) = 0; +#if TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, i), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, i), 0); + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, i + 4), 0); +#endif } while ((i += 8) < stopLoopAtOffset); } // Write next 4 elements if needed if ((byteLength & 4) != 0) { - Unsafe.AddByteOffset(ref Unsafe.As(ref b), (nint)i) = 0; + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, i), 0); i += 4; } // Write next 2 elements if needed if ((byteLength & 2) != 0) { - Unsafe.AddByteOffset(ref Unsafe.As(ref b), (nint)i) = 0; + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, i), 0); i += 2; } From e896a47202406a008371104a3d1690dee54e4a9b Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 21 Feb 2024 03:52:59 +0100 Subject: [PATCH 14/40] revert jit change --- src/coreclr/jit/codegenarm64.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 6fb22451d7f471..4587bace1697ab 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -5109,18 +5109,10 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, callTarget = callTargetReg; - if (compiler->opts.compReloc) - { - // adrp + add with relocations will be emitted - GetEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTarget, - (ssize_t)pAddr DEBUGARG((size_t)compiler->eeFindHelper(helper)) - DEBUGARG(GTF_ICON_METHOD_HDL)); - } - else - { - instGen_Set_Reg_To_Imm(EA_PTRSIZE, callTarget, (ssize_t)pAddr); - } - + // adrp + add with relocations will be emitted + GetEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTarget, + (ssize_t)pAddr DEBUGARG((size_t)compiler->eeFindHelper(helper)) + DEBUGARG(GTF_ICON_METHOD_HDL)); GetEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, callTarget, callTarget); callType = emitter::EC_INDIR_R; } From a8d3b988b59edebb603df1782788a97d9ff10bd4 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Wed, 21 Feb 2024 04:44:23 +0100 Subject: [PATCH 15/40] Update issues.targets --- src/tests/issues.targets | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/issues.targets b/src/tests/issues.targets index 5890855c3eb3ed..28e77319517359 100644 --- a/src/tests/issues.targets +++ b/src/tests/issues.targets @@ -1870,7 +1870,7 @@ https://github.com/dotnet/runtime/issues/90374 - + https://github.com/dotnet/runtime/issues/98628 From 974a361a82b0a53f03be4fe0e6d84c11ae514050 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 21 Feb 2024 10:54:31 +0100 Subject: [PATCH 16/40] Clean up --- .../System.Private.CoreLib/src/System/SpanHelpers.cs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs index e833a59c006ec7..284b16d93ab306 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs @@ -31,7 +31,6 @@ public static void ClearWithoutReferences(ref byte b, nuint byteLength) { do { - Vector.Zero.StoreUnsafe(ref b, offset); Vector.Zero.StoreUnsafe(ref b, offset); Vector.Zero.StoreUnsafe(ref b, offset + (nuint)Vector.Count); offset += (uint)(2 * Vector.Count); @@ -70,12 +69,7 @@ public static void ClearWithoutReferences(ref byte b, nuint byteLength) nuint stopLoopAtOffset = byteLength & ~(nuint)7; do { -#if TARGET_64BIT Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, i), 0); -#else - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, i), 0); - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, i + 4), 0); -#endif } while ((i += 8) < stopLoopAtOffset); } From 2e00789d56ce2c6ae38fffa30bb1765aee04ef2a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 21 Feb 2024 22:10:42 +0100 Subject: [PATCH 17/40] Add native memset helper --- src/coreclr/inc/corinfo.h | 1 + src/coreclr/inc/jiteeversionguid.h | 10 +++++----- src/coreclr/inc/jithelpers.h | 1 + src/coreclr/inc/readytorun.h | 6 +++++- src/coreclr/inc/readytorunhelpers.h | 1 + src/coreclr/jit/codegencommon.cpp | 4 +++- src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h | 2 +- .../tools/Common/Internal/Runtime/ModuleHeaders.cs | 2 +- .../Common/Internal/Runtime/ReadyToRunConstants.cs | 2 ++ .../tools/Common/JitInterface/CorInfoHelpFunc.cs | 1 + .../aot/ILCompiler.Compiler/Compiler/JitHelper.cs | 3 +++ .../JitInterface/CorInfoImpl.ReadyToRun.cs | 3 +++ .../ReadyToRunSignature.cs | 4 ++++ .../JitInterface/CorInfoImpl.RyuJit.cs | 3 +++ src/coreclr/vm/jithelpers.cpp | 8 ++++++++ 16 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 86680d6e20c91e..b535f9d3da8d14 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -573,6 +573,7 @@ enum CorInfoHelpFunc CORINFO_HELP_MEMSET, // Init block of memory CORINFO_HELP_MEMCPY, // Copy block of memory + CORINFO_HELP_NATIVE_MEMSET, // Init block of memory using native memset (not safe for pDst being null) CORINFO_HELP_RUNTIMEHANDLE_METHOD, // determine a type/field/method handle at run-time CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG, // determine a type/field/method handle at run-time, with IBC logging diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 6355fc20dd0fd5..5f29888ffbf454 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* 0fb71692-0ee6-4914-88a8-6446e45f23e8 */ - 0x0fb71692, - 0x0ee6, - 0x4914, - {0x88, 0xa8, 0x64, 0x46, 0xe4, 0x5f, 0x23, 0xe8} +constexpr GUID JITEEVersionIdentifier = { /* 8340020b-7035-462b-be3f-759932831f20 */ + 0x8340020b, + 0x7035, + 0x462b, + {0xbe, 0x3f, 0x75, 0x99, 0x32, 0x83, 0x1f, 0x20} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index fc555ad444a42c..323cf79bc208f4 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -237,6 +237,7 @@ DYNAMICJITHELPER(CORINFO_HELP_MEMSET, NULL, CORINFO_HELP_SIG_REG_ONLY) DYNAMICJITHELPER(CORINFO_HELP_MEMCPY, NULL, CORINFO_HELP_SIG_REG_ONLY) + JITHELPER(CORINFO_HELP_NATIVE_MEMSET, Jit_NativeMemSet, CORINFO_HELP_SIG_REG_ONLY) // Generics JITHELPER(CORINFO_HELP_RUNTIMEHANDLE_METHOD, JIT_GenericHandleMethod, CORINFO_HELP_SIG_REG_ONLY) diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h index b3128cb00e4b73..e17520d855dca4 100644 --- a/src/coreclr/inc/readytorun.h +++ b/src/coreclr/inc/readytorun.h @@ -20,7 +20,7 @@ // If you update this, ensure you run `git grep MINIMUM_READYTORUN_MAJOR_VERSION` // and handle pending work. #define READYTORUN_MAJOR_VERSION 0x0009 -#define READYTORUN_MINOR_VERSION 0x0001 +#define READYTORUN_MINOR_VERSION 0x0002 #define MINIMUM_READYTORUN_MAJOR_VERSION 0x009 @@ -33,6 +33,8 @@ // R2R Version 8.0 Changes the alignment of the Int128 type // R2R Version 9.0 adds support for the Vector512 type // R2R Version 9.1 adds new helpers to allocate objects on frozen segments +// R2R Version 9.2 adds new helper to perform native memset + struct READYTORUN_CORE_HEADER { @@ -445,6 +447,8 @@ enum ReadyToRunHelper // Array helpers for use with native ints READYTORUN_HELPER_Stelem_Ref_I = 0x113, READYTORUN_HELPER_Ldelema_Ref_I = 0x114, + + READYTORUN_HELPER_NativeMemSet = 0x115, }; #include "readytoruninstructionset.h" diff --git a/src/coreclr/inc/readytorunhelpers.h b/src/coreclr/inc/readytorunhelpers.h index 8691f9b9cb8c0c..f9c713f28d8438 100644 --- a/src/coreclr/inc/readytorunhelpers.h +++ b/src/coreclr/inc/readytorunhelpers.h @@ -29,6 +29,7 @@ HELPER(READYTORUN_HELPER_Stelem_Ref, CORINFO_HELP_ARRADDR_ST, HELPER(READYTORUN_HELPER_Ldelema_Ref, CORINFO_HELP_LDELEMA_REF, ) HELPER(READYTORUN_HELPER_MemSet, CORINFO_HELP_MEMSET, ) +HELPER(READYTORUN_HELPER_NativeMemSet, CORINFO_HELP_NATIVE_MEMSET, ) HELPER(READYTORUN_HELPER_MemCpy, CORINFO_HELP_MEMCPY, ) HELPER(READYTORUN_HELPER_LogMethodEnter, CORINFO_HELP_BBT_FCN_ENTER, ) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 3c001249c9a054..580523ebff95a4 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -8361,7 +8361,9 @@ void CodeGen::genPoisonFrame(regMaskTP regLiveIn) GetEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_0, (int)varNum, 0); instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_1, static_cast(poisonVal)); instGen_Set_Reg_To_Imm(EA_PTRSIZE, REG_ARG_2, size); - genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); + + // Call non-managed memset + genEmitHelperCall(CORINFO_HELP_NATIVE_MEMSET, 0, EA_UNKNOWN); // May kill REG_SCRATCH, so we need to reload it. hasPoisonImm = false; #endif diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 3b9ec7f388aec1..df1af6a419a3ec 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2481,7 +2481,7 @@ void LinearScan::buildIntervals() killed = RBM_EDI | RBM_ECX | RBM_EAX; #else // Poisoning uses REG_SCRATCH for small vars and memset helper for big vars. - killed = genRegMask(REG_SCRATCH) | compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET); + killed = genRegMask(REG_SCRATCH) | compiler->compHelperCallKillSet(CORINFO_HELP_NATIVE_MEMSET); #endif addRefsForPhysRegMask(killed, currentLoc + 1, RefTypeKill, true); currentLoc += 2; diff --git a/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h b/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h index 750faccc828383..6a3b24a3944870 100644 --- a/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h +++ b/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h @@ -12,7 +12,7 @@ struct ReadyToRunHeaderConstants static const uint32_t Signature = 0x00525452; // 'RTR' static const uint32_t CurrentMajorVersion = 9; - static const uint32_t CurrentMinorVersion = 1; + static const uint32_t CurrentMinorVersion = 2; }; struct ReadyToRunHeader diff --git a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs index 01071442f962b6..6fc5d9542e1609 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs @@ -16,7 +16,7 @@ internal struct ReadyToRunHeaderConstants public const uint Signature = 0x00525452; // 'RTR' public const ushort CurrentMajorVersion = 9; - public const ushort CurrentMinorVersion = 1; + public const ushort CurrentMinorVersion = 2; } #if READYTORUN #pragma warning disable 0169 diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs index 63383b7ddfa679..de7f23cb5ed34f 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs @@ -345,6 +345,8 @@ public enum ReadyToRunHelper GetCurrentManagedThreadId = 0x112, + NativeMemSet = 0x115, + // ********************************************************************************************** // // These are not actually part of the R2R file format. We have them here because it's convenient. diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 2a9dbe302dac2e..9dd602b2019b2e 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -215,6 +215,7 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_MEMSET, // Init block of memory CORINFO_HELP_MEMCPY, // Copy block of memory + CORINFO_HELP_NATIVE_MEMSET, // Init block of memory using native memset (not safe for pDst being null) CORINFO_HELP_RUNTIMEHANDLE_METHOD, // determine a type/field/method handle at run-time CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG, // determine a type/field/method handle at run-time, with IBC logging diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index 285640e1bcf35f..cb9431ed2970ad 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -144,6 +144,9 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, case ReadyToRunHelper.MemSet: mangledName = "RhRuntimeHelpers_MemSet"; break; + case ReadyToRunHelper.NativeMemSet: + mangledName = "memset"; + break; case ReadyToRunHelper.GetRuntimeTypeHandle: methodDesc = context.GetHelperEntryPoint("LdTokenHelpers", "GetRuntimeTypeHandle"); diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index f7527c96dd9ddb..7a89fa850575d8 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -1031,6 +1031,9 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_MEMCPY: id = ReadyToRunHelper.MemCpy; break; + case CorInfoHelpFunc.CORINFO_HELP_NATIVE_MEMSET: + id = ReadyToRunHelper.NativeMemSet; + break; case CorInfoHelpFunc.CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD: id = ReadyToRunHelper.GetRuntimeMethodHandle; diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs index 8d325f467d600f..41f7e343c17d95 100644 --- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs +++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs @@ -1689,6 +1689,10 @@ private void ParseHelper(StringBuilder builder) builder.Append("MEM_CPY"); break; + case ReadyToRunHelper.NativeMemSet: + builder.Append("NATIVE_MEM_SET"); + break; + // PInvoke helpers case ReadyToRunHelper.PInvokeBegin: builder.Append("PINVOKE_BEGIN"); diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index 4495e0322d4a78..432b7b2c4ec671 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -545,6 +545,9 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_MEMCPY: id = ReadyToRunHelper.MemCpy; break; + case CorInfoHelpFunc.CORINFO_HELP_NATIVE_MEMSET: + id = ReadyToRunHelper.NativeMemSet; + break; case CorInfoHelpFunc.CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE: id = ReadyToRunHelper.GetRuntimeType; diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 450752ae367789..1da02114960293 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -3596,6 +3596,14 @@ NOINLINE HCIMPL3(CORINFO_MethodPtr, JIT_VirtualFunctionPointer_Framed, Object * } HCIMPLEND +HCIMPL3(void, Jit_NativeMemSet, void* pDest, int value, size_t length) +{ + _ASSERTE(pDest != nullptr); + FCALL_CONTRACT; + memset(pDest, value, length); +} +HCIMPLEND + HCIMPL1(Object*, JIT_GetRuntimeFieldStub, CORINFO_FIELD_HANDLE field) { FCALL_CONTRACT; From c87288d9d2a4c0ff99096bd05dfc0e9c882c836a Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Thu, 22 Feb 2024 00:03:30 +0100 Subject: [PATCH 18/40] Update src/coreclr/inc/corinfo.h Co-authored-by: Jan Kotas --- src/coreclr/inc/corinfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 600672ee4921c2..4663a3574ccbc3 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -573,7 +573,7 @@ enum CorInfoHelpFunc CORINFO_HELP_MEMSET, // Init block of memory CORINFO_HELP_MEMCPY, // Copy block of memory - CORINFO_HELP_NATIVE_MEMSET, // Init block of memory using native memset (not safe for pDst being null) + CORINFO_HELP_NATIVE_MEMSET, // Init block of memory using native memset (not safe for pDst being null, not safe for unbounded size, does not trigger GC) CORINFO_HELP_RUNTIMEHANDLE_METHOD, // determine a type/field/method handle at run-time CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG, // determine a type/field/method handle at run-time, with IBC logging From f06ed6191b59f9ca680aec34ea009b014a5200f9 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 22 Feb 2024 00:09:48 +0100 Subject: [PATCH 19/40] Address feedback --- src/coreclr/inc/corinfo.h | 3 ++- src/coreclr/inc/readytorun.h | 9 ++------- .../tools/Common/Internal/Runtime/ReadyToRunConstants.cs | 3 ++- src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs | 3 ++- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 4663a3574ccbc3..d6a2bb202c953c 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -573,7 +573,8 @@ enum CorInfoHelpFunc CORINFO_HELP_MEMSET, // Init block of memory CORINFO_HELP_MEMCPY, // Copy block of memory - CORINFO_HELP_NATIVE_MEMSET, // Init block of memory using native memset (not safe for pDst being null, not safe for unbounded size, does not trigger GC) + CORINFO_HELP_NATIVE_MEMSET, // Init block of memory using native memset (not safe for pDst being null, + // not safe for unbounded size, does not trigger GC) CORINFO_HELP_RUNTIMEHANDLE_METHOD, // determine a type/field/method handle at run-time CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG, // determine a type/field/method handle at run-time, with IBC logging diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h index e17520d855dca4..97eca9f8026536 100644 --- a/src/coreclr/inc/readytorun.h +++ b/src/coreclr/inc/readytorun.h @@ -327,7 +327,8 @@ enum ReadyToRunHelper READYTORUN_HELPER_Stelem_Ref = 0x38, READYTORUN_HELPER_Ldelema_Ref = 0x39, - READYTORUN_HELPER_MemSet = 0x40, + READYTORUN_HELPER_MemSet = 0x3F, + READYTORUN_HELPER_NativeMemSet = 0x40, READYTORUN_HELPER_MemCpy = 0x41, // PInvoke helpers @@ -443,12 +444,6 @@ enum ReadyToRunHelper READYTORUN_HELPER_StackProbe = 0x111, READYTORUN_HELPER_GetCurrentManagedThreadId = 0x112, - - // Array helpers for use with native ints - READYTORUN_HELPER_Stelem_Ref_I = 0x113, - READYTORUN_HELPER_Ldelema_Ref_I = 0x114, - - READYTORUN_HELPER_NativeMemSet = 0x115, }; #include "readytoruninstructionset.h" diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs index de7f23cb5ed34f..86e0aba7f1bbfc 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs @@ -239,7 +239,8 @@ public enum ReadyToRunHelper Stelem_Ref = 0x38, Ldelema_Ref = 0x39, - MemSet = 0x40, + MemSet = 0x3F, + NativeMemSet = 0x40, MemCpy = 0x41, // P/Invoke support diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 9dd602b2019b2e..15dec39bfae26b 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -215,7 +215,8 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_MEMSET, // Init block of memory CORINFO_HELP_MEMCPY, // Copy block of memory - CORINFO_HELP_NATIVE_MEMSET, // Init block of memory using native memset (not safe for pDst being null) + CORINFO_HELP_NATIVE_MEMSET, // Init block of memory using native memset (not safe for pDst being null, + // not safe for unbounded size, does not trigger GC) CORINFO_HELP_RUNTIMEHANDLE_METHOD, // determine a type/field/method handle at run-time CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG, // determine a type/field/method handle at run-time, with IBC logging From 96fa40b7de9cbb1ebe3ff728d9c36a8b3f00481f Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 22 Feb 2024 00:37:55 +0100 Subject: [PATCH 20/40] Better impl for ClearWithoutReferences --- .../src/System/Buffer.cs | 4 +- .../src/System/SpanHelpers.cs | 237 +++++++++++++----- 2 files changed, 171 insertions(+), 70 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.cs index 51ec733aaef590..c854c22110eacd 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffer.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffer.cs @@ -343,10 +343,10 @@ private static unsafe void _Memmove(ref byte dest, ref byte src, nuint len) #if HAS_CUSTOM_BLOCKS [StructLayout(LayoutKind.Sequential, Size = 16)] - private struct Block16 { } + internal struct Block16 { } [StructLayout(LayoutKind.Sequential, Size = 64)] - private struct Block64 { } + internal struct Block64 { } #endif // HAS_CUSTOM_BLOCKS // Non-inlinable wrapper around the QCall that avoids polluting the fast path diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs index 284b16d93ab306..5db436b7ebceb7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs @@ -1,6 +1,10 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#if TARGET_AMD64 || TARGET_ARM64 || (TARGET_32BIT && !TARGET_ARM) || TARGET_LOONGARCH64 +#define HAS_CUSTOM_BLOCKS +#endif + using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; @@ -15,84 +19,181 @@ internal static partial class SpanHelpers { public static void ClearWithoutReferences(ref byte b, nuint byteLength) { - if (byteLength > Buffer.ZeroMemoryNativeThreshold) - goto PInvoke; - - if (Vector.IsHardwareAccelerated && byteLength >= (uint)(Vector.Count)) - { - // We have enough data for at least one vectorized write. - nuint stopLoopAtOffset = byteLength & (nuint)(nint)(2 * (int)-Vector.Count); // intentional sign extension carries the negative bit - nuint offset = 0; - - // Loop, writing 2 vectors at a time. - // Compare 'numElements' rather than 'stopLoopAtOffset' because we don't want a dependency - // on the very recently calculated 'stopLoopAtOffset' value. - if (byteLength >= (uint)(2 * Vector.Count)) - { - do - { - Vector.Zero.StoreUnsafe(ref b, offset); - Vector.Zero.StoreUnsafe(ref b, offset + (nuint)Vector.Count); - offset += (uint)(2 * Vector.Count); - } while (offset < stopLoopAtOffset); - } + ref byte bEnd = ref Unsafe.Add(ref b, byteLength); + + if (byteLength <= 16) + goto MZER02; + if (byteLength > 64) + goto MZER05; + + MZER00: + // Clear bytes which are multiples of 16 and leave the remainder for MZER01 to handle. + Debug.Assert(byteLength > 16 && byteLength <= 64); +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref b, default); // [0,16] +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); // [0,16] +#else + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 12), 0); // [0,16] +#endif + if (byteLength <= 32) + goto MZER01; +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), default); // [0,32] +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); // [0,32] +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 20), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 28), 0); // [0,32] +#endif + if (byteLength <= 48) + goto MZER01; +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), default); // [0,48] +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); // [0,48] +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 36), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 44), 0); // [0,48] +#endif + + MZER01: + // Unconditionally clear the last 16 bytes using bEnd and return. + Debug.Assert(byteLength > 16 && byteLength <= 64); +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); +#endif + return; - // At this point, if any data remains to be written, it's strictly less than - // 2 * sizeof(Vector) bytes. The loop above had us write an even number of vectors. - // If the total byte length instead involves us writing an odd number of vectors, write - // one additional vector now. The bit check below tells us if we're in an "odd vector - // count" situation. - if ((byteLength & (nuint)Vector.Count) != 0) - { - Vector.Zero.StoreUnsafe(ref b, offset); - } + MZER02: + // Clear the first 8 bytes and then unconditionally clear the last 8 bytes and return. + if ((byteLength & 24) == 0) + goto MZER03; + Debug.Assert(byteLength >= 8 && byteLength <= 16); +#if TARGET_64BIT + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); +#else + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); +#endif + return; - // It's possible that some small buffer remains to be populated - something that won't - // fit an entire vector's worth of data. Instead of falling back to a loop, we'll write - // a vector at the very end of the buffer. This may involve overwriting previously - // populated data, which is fine since we're splatting the same value for all entries. - // There's no need to perform a length check here because we already performed this - // check before entering the vectorized code path. - Vector.Zero.StoreUnsafe(ref b, byteLength - (nuint)Vector.Count); + MZER03: + // Clear the first 4 bytes and then unconditionally clear the last 4 bytes and return. + if ((byteLength & 4) == 0) + goto MZER04; + Debug.Assert(byteLength >= 4 && byteLength < 8); + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); + return; - // And we're done! + MZER04: + // Clear the first byte. For pending bytes, do an unconditionally clear of the last 2 bytes and return. + Debug.Assert(byteLength < 4); + if (byteLength == 0) return; - } - - // If we reached this point, we cannot vectorize this data, or there are too few - // elements for us to vectorize. Fall back to an unrolled loop. - nuint i = 0; - - // Write 8 elements at a time - if (byteLength >= 8) - { - nuint stopLoopAtOffset = byteLength & ~(nuint)7; - do - { - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, i), 0); - } while ((i += 8) < stopLoopAtOffset); - } - - // Write next 4 elements if needed - if ((byteLength & 4) != 0) - { - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, i), 0); - i += 4; - } + b = 0; + if ((byteLength & 2) == 0) + return; + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -2), 0); + return; - // Write next 2 elements if needed - if ((byteLength & 2) != 0) + MZER05: + // PInvoke to the native version when the clear length exceeds the threshold. + if (byteLength > Buffer.ZeroMemoryNativeThreshold) { - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref b, i), 0); - i += 2; + goto PInvoke; } - // Write final element if needed - if ((byteLength & 1) != 0) +#if HAS_CUSTOM_BLOCKS + if (byteLength >= 256) { - Unsafe.AddByteOffset(ref b, (nint)i) = 0; + unsafe + { + // Try to opportunistically align the destination below. The input isn't pinned, so the GC + // is free to move the references. We're therefore assuming that reads may still be unaligned. + nuint misalignedElements = 64 - (nuint)Unsafe.AsPointer(ref b) & 63; + Unsafe.WriteUnaligned(ref b, default); + b = ref Unsafe.Add(ref b, misalignedElements); + byteLength -= misalignedElements; + } } - +#endif + // Clear 64-bytes at a time until the remainder is less than 64. + // If remainder is greater than 16 bytes, then jump to MZER00. Otherwise, unconditionally clear the last 16 bytes and return. + Debug.Assert(byteLength > 64 && byteLength <= Buffer.ZeroMemoryNativeThreshold); + nuint n = byteLength >> 6; + + MZER06: +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref b, default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 48), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 56), 0); +#else + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 20), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 28), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 36), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 44), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 48), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 52), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 56), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 60), 0); +#endif + b = ref Unsafe.Add(ref b, 64); + n--; + if (n != 0) + goto MZER06; + + byteLength %= 64; + if (byteLength > 16) + goto MZER00; +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); +#endif return; PInvoke: From 8dbe6d75d7648a449200c0a7a9ee58b1ce7b47e3 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 22 Feb 2024 00:38:20 +0100 Subject: [PATCH 21/40] Fix build --- .../tools/Common/Internal/Runtime/ReadyToRunConstants.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs index 86e0aba7f1bbfc..dabc51c553564a 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs @@ -346,8 +346,6 @@ public enum ReadyToRunHelper GetCurrentManagedThreadId = 0x112, - NativeMemSet = 0x115, - // ********************************************************************************************** // // These are not actually part of the R2R file format. We have them here because it's convenient. From 86c25d784ec8c438bc4a9b48a1fdcb5cce2135d8 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 22 Feb 2024 00:41:17 +0100 Subject: [PATCH 22/40] Remove whitespace --- src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 15dec39bfae26b..1c0eb87f969d57 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -215,7 +215,7 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_MEMSET, // Init block of memory CORINFO_HELP_MEMCPY, // Copy block of memory - CORINFO_HELP_NATIVE_MEMSET, // Init block of memory using native memset (not safe for pDst being null, + CORINFO_HELP_NATIVE_MEMSET, // Init block of memory using native memset (not safe for pDst being null, // not safe for unbounded size, does not trigger GC) CORINFO_HELP_RUNTIMEHANDLE_METHOD, // determine a type/field/method handle at run-time From 7d4ff29de316a4e6af61483c1ceba69ece59abe6 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 23 Feb 2024 20:02:58 +0100 Subject: [PATCH 23/40] Clean up --- src/coreclr/inc/corinfo.h | 1 + src/coreclr/inc/jithelpers.h | 1 + src/coreclr/inc/readytorun.h | 3 +- src/coreclr/inc/readytorunhelpers.h | 1 + src/coreclr/jit/lower.cpp | 30 +- .../Internal/Runtime/ReadyToRunConstants.cs | 1 + .../Common/JitInterface/CorInfoHelpFunc.cs | 1 + .../ILCompiler.Compiler/Compiler/JitHelper.cs | 3 + .../JitInterface/CorInfoImpl.ReadyToRun.cs | 3 + .../ReadyToRunSignature.cs | 4 + .../JitInterface/CorInfoImpl.RyuJit.cs | 3 + src/coreclr/vm/corelib.h | 1 + src/coreclr/vm/ecall.cpp | 4 + src/coreclr/vm/jitinterface.cpp | 1 + src/coreclr/vm/metasig.h | 1 + .../System.Private.CoreLib.Shared.projitems | 1 + .../src/System/Buffer.Windows.cs | 4 +- .../src/System/Buffer.cs | 328 +++++++++++++++++- .../CompilerServices/RuntimeHelpers.cs | 27 -- .../System.Private.CoreLib/src/System/Span.cs | 14 +- .../src/System/SpanHelpers.cs | 189 ---------- 21 files changed, 393 insertions(+), 228 deletions(-) diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index d6a2bb202c953c..5fad5e4b2429e4 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -572,6 +572,7 @@ enum CorInfoHelpFunc CORINFO_HELP_INIT_PINVOKE_FRAME, // initialize an inlined PInvoke Frame for the JIT-compiler CORINFO_HELP_MEMSET, // Init block of memory + CORINFO_HELP_MEMZERO, // Init block of memory with zeroes CORINFO_HELP_MEMCPY, // Copy block of memory CORINFO_HELP_NATIVE_MEMSET, // Init block of memory using native memset (not safe for pDst being null, // not safe for unbounded size, does not trigger GC) diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index 323cf79bc208f4..a0982f3ac6520f 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -236,6 +236,7 @@ #endif DYNAMICJITHELPER(CORINFO_HELP_MEMSET, NULL, CORINFO_HELP_SIG_REG_ONLY) + DYNAMICJITHELPER(CORINFO_HELP_MEMZERO, NULL, CORINFO_HELP_SIG_REG_ONLY) DYNAMICJITHELPER(CORINFO_HELP_MEMCPY, NULL, CORINFO_HELP_SIG_REG_ONLY) JITHELPER(CORINFO_HELP_NATIVE_MEMSET, Jit_NativeMemSet, CORINFO_HELP_SIG_REG_ONLY) diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h index 97eca9f8026536..41a4aa251fa742 100644 --- a/src/coreclr/inc/readytorun.h +++ b/src/coreclr/inc/readytorun.h @@ -33,7 +33,7 @@ // R2R Version 8.0 Changes the alignment of the Int128 type // R2R Version 9.0 adds support for the Vector512 type // R2R Version 9.1 adds new helpers to allocate objects on frozen segments -// R2R Version 9.2 adds new helper to perform native memset +// R2R Version 9.2 adds MemZero and NativeMemSet helpers struct READYTORUN_CORE_HEADER @@ -327,6 +327,7 @@ enum ReadyToRunHelper READYTORUN_HELPER_Stelem_Ref = 0x38, READYTORUN_HELPER_Ldelema_Ref = 0x39, + READYTORUN_HELPER_MemZero = 0x3E, READYTORUN_HELPER_MemSet = 0x3F, READYTORUN_HELPER_NativeMemSet = 0x40, READYTORUN_HELPER_MemCpy = 0x41, diff --git a/src/coreclr/inc/readytorunhelpers.h b/src/coreclr/inc/readytorunhelpers.h index f9c713f28d8438..bbb586e8eb4a30 100644 --- a/src/coreclr/inc/readytorunhelpers.h +++ b/src/coreclr/inc/readytorunhelpers.h @@ -29,6 +29,7 @@ HELPER(READYTORUN_HELPER_Stelem_Ref, CORINFO_HELP_ARRADDR_ST, HELPER(READYTORUN_HELPER_Ldelema_Ref, CORINFO_HELP_LDELEMA_REF, ) HELPER(READYTORUN_HELPER_MemSet, CORINFO_HELP_MEMSET, ) +HELPER(READYTORUN_HELPER_MemZero, CORINFO_HELP_MEMZERO, ) HELPER(READYTORUN_HELPER_NativeMemSet, CORINFO_HELP_NATIVE_MEMSET, ) HELPER(READYTORUN_HELPER_MemCpy, CORINFO_HELP_MEMCPY, ) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index f9cd17f6510046..511beb40a23fa0 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8125,7 +8125,18 @@ void Lowering::LowerBlockStoreAsHelperCall(GenTreeBlk* blkNode) GenTree* dataPlaceholder = comp->gtNewZeroConNode(genActualType(data)); GenTree* sizePlaceholder = comp->gtNewZeroConNode(genActualType(size)); - GenTreeCall* call = comp->gtNewHelperCallNode(helper, TYP_VOID, destPlaceholder, dataPlaceholder, sizePlaceholder); + const bool isMemzero = helper == CORINFO_HELP_MEMSET ? data->IsIntegralConst(0) : false; + + GenTreeCall* call; + if (isMemzero) + { + BlockRange().Remove(data); + call = comp->gtNewHelperCallNode(CORINFO_HELP_MEMZERO, TYP_VOID, destPlaceholder, sizePlaceholder); + } + else + { + call = comp->gtNewHelperCallNode(helper, TYP_VOID, destPlaceholder, dataPlaceholder, sizePlaceholder); + } comp->fgMorphArgs(call); LIR::Range range = LIR::SeqTree(comp, call); @@ -8136,18 +8147,22 @@ void Lowering::LowerBlockStoreAsHelperCall(GenTreeBlk* blkNode) blkNode->gtBashToNOP(); LIR::Use destUse; - LIR::Use dataUse; LIR::Use sizeUse; BlockRange().TryGetUse(destPlaceholder, &destUse); - BlockRange().TryGetUse(dataPlaceholder, &dataUse); BlockRange().TryGetUse(sizePlaceholder, &sizeUse); destUse.ReplaceWith(dest); - dataUse.ReplaceWith(data); sizeUse.ReplaceWith(size); destPlaceholder->SetUnusedValue(); - dataPlaceholder->SetUnusedValue(); sizePlaceholder->SetUnusedValue(); + if (!isMemzero) + { + LIR::Use dataUse; + BlockRange().TryGetUse(dataPlaceholder, &dataUse); + dataUse.ReplaceWith(data); + dataPlaceholder->SetUnusedValue(); + } + LowerRange(rangeStart, rangeEnd); // Finally move all GT_PUTARG_* nodes @@ -8155,8 +8170,11 @@ void Lowering::LowerBlockStoreAsHelperCall(GenTreeBlk* blkNode) MoveCFGCallArgs(call); BlockRange().Remove(destPlaceholder); - BlockRange().Remove(dataPlaceholder); BlockRange().Remove(sizePlaceholder); + if (!isMemzero) + { + BlockRange().Remove(dataPlaceholder); + } // Wrap with memory barriers on weak memory models // if the block store was volatile diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs index dabc51c553564a..a37945534865bf 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunConstants.cs @@ -239,6 +239,7 @@ public enum ReadyToRunHelper Stelem_Ref = 0x38, Ldelema_Ref = 0x39, + MemZero = 0x3E, MemSet = 0x3F, NativeMemSet = 0x40, MemCpy = 0x41, diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index 1c0eb87f969d57..5346806c1aff60 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -214,6 +214,7 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_INIT_PINVOKE_FRAME, // initialize an inlined PInvoke Frame for the JIT-compiler CORINFO_HELP_MEMSET, // Init block of memory + CORINFO_HELP_MEMZERO, // Init block of memory with zeroes CORINFO_HELP_MEMCPY, // Copy block of memory CORINFO_HELP_NATIVE_MEMSET, // Init block of memory using native memset (not safe for pDst being null, // not safe for unbounded size, does not trigger GC) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index cb9431ed2970ad..5240ff4d013fed 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -144,6 +144,9 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, case ReadyToRunHelper.MemSet: mangledName = "RhRuntimeHelpers_MemSet"; break; + case ReadyToRunHelper.MemZero: + mangledName = "RhRuntimeHelpers_MemZero"; + break; case ReadyToRunHelper.NativeMemSet: mangledName = "memset"; break; diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index 7a89fa850575d8..ad83b1eb42a5d6 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -1028,6 +1028,9 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_MEMSET: id = ReadyToRunHelper.MemSet; break; + case CorInfoHelpFunc.CORINFO_HELP_MEMZERO: + id = ReadyToRunHelper.MemZero; + break; case CorInfoHelpFunc.CORINFO_HELP_MEMCPY: id = ReadyToRunHelper.MemCpy; break; diff --git a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs index 41f7e343c17d95..0eae2f10cb8f00 100644 --- a/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs +++ b/src/coreclr/tools/aot/ILCompiler.Reflection.ReadyToRun/ReadyToRunSignature.cs @@ -1685,6 +1685,10 @@ private void ParseHelper(StringBuilder builder) builder.Append("MEM_SET"); break; + case ReadyToRunHelper.MemZero: + builder.Append("MEM_ZERO"); + break; + case ReadyToRunHelper.MemCpy: builder.Append("MEM_CPY"); break; diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index 432b7b2c4ec671..8755580e3f2903 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -542,6 +542,9 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_MEMSET: id = ReadyToRunHelper.MemSet; break; + case CorInfoHelpFunc.CORINFO_HELP_MEMZERO: + id = ReadyToRunHelper.MemZero; + break; case CorInfoHelpFunc.CORINFO_HELP_MEMCPY: id = ReadyToRunHelper.MemCpy; break; diff --git a/src/coreclr/vm/corelib.h b/src/coreclr/vm/corelib.h index cf8e7b0a8a172a..5f1a86d9203ecd 100644 --- a/src/coreclr/vm/corelib.h +++ b/src/coreclr/vm/corelib.h @@ -633,6 +633,7 @@ DEFINE_METHOD(RUNTIME_HELPERS, ALLOC_TAILCALL_ARG_BUFFER, AllocTailCallArgB DEFINE_METHOD(RUNTIME_HELPERS, GET_TAILCALL_INFO, GetTailCallInfo, NoSig) DEFINE_METHOD(RUNTIME_HELPERS, DISPATCH_TAILCALLS, DispatchTailCalls, NoSig) DEFINE_METHOD(RUNTIME_HELPERS, MEMSET, MemSet, SM_RefByte_Byte_UIntPtr_RetVoid) +DEFINE_METHOD(RUNTIME_HELPERS, MEMZERO, MemZero, SM_RefByte_UIntPtr_RetVoid) DEFINE_METHOD(RUNTIME_HELPERS, MEMCOPY, MemCopy, SM_RefByte_RefByte_UIntPtr_RetVoid) DEFINE_CLASS(UNSAFE, CompilerServices, Unsafe) diff --git a/src/coreclr/vm/ecall.cpp b/src/coreclr/vm/ecall.cpp index 6a93b575d9f0ca..0d914bd4bd4ebc 100644 --- a/src/coreclr/vm/ecall.cpp +++ b/src/coreclr/vm/ecall.cpp @@ -149,6 +149,10 @@ void ECall::PopulateManagedHelpers() pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_MEMSET, pDest); + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__RUNTIME_HELPERS__MEMZERO)); + pDest = pMD->GetMultiCallableAddrOfCode(); + SetJitHelperFunction(CORINFO_HELP_MEMZERO, pDest); + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__RUNTIME_HELPERS__MEMCOPY)); pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_MEMCPY, pDest); diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 72ed7893eee505..5e6b0cbeeafdd1 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -10691,6 +10691,7 @@ void* CEEJitInfo::getHelperFtn(CorInfoHelpFunc ftnNum, /* IN */ dynamicFtnNum == DYNAMIC_CORINFO_HELP_ARRADDR_ST || dynamicFtnNum == DYNAMIC_CORINFO_HELP_LDELEMA_REF || dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMSET || + dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMZERO || dynamicFtnNum == DYNAMIC_CORINFO_HELP_MEMCPY) { Precode* pPrecode = Precode::GetPrecodeFromEntryPoint((PCODE)hlpDynamicFuncTable[dynamicFtnNum].pfnHelper); diff --git a/src/coreclr/vm/metasig.h b/src/coreclr/vm/metasig.h index ef8ac7943fd7cc..182acc55e643fe 100644 --- a/src/coreclr/vm/metasig.h +++ b/src/coreclr/vm/metasig.h @@ -239,6 +239,7 @@ DEFINE_METASIG(SM(UIntPtr_RetIntPtr, U, I)) DEFINE_METASIG(SM(PtrByte_PtrByte_Int_RetVoid, P(b) P(b) i, v)) DEFINE_METASIG(SM(RefByte_RefByte_UIntPtr_RetVoid, r(b) r(b) U, v)) DEFINE_METASIG(SM(RefByte_Byte_UIntPtr_RetVoid, r(b) b U, v)) +DEFINE_METASIG(SM(RefByte_UIntPtr_RetVoid, r(b) U, v)) DEFINE_METASIG(SM(PtrVoid_Byte_UInt_RetVoid, P(v) b K, v)) DEFINE_METASIG(SM(RefObj_IntPtr_RetVoid, r(j) I, v)) DEFINE_METASIG(SM(RefObj_RefIntPtr_RetVoid, r(j) r(I), v)) diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index a73e8247a58e72..849a03e3128751 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -882,6 +882,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs index b0f0fe29b20680..3f579e9cc23741 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs @@ -8,9 +8,9 @@ public static partial class Buffer #if TARGET_ARM64 // Determine optimal value for Windows. // https://github.com/dotnet/runtime/issues/8896 - private static nuint MemmoveNativeThreshold => nuint.MaxValue; + internal static nuint MemmoveNativeThreshold => nuint.MaxValue; #else - private const nuint MemmoveNativeThreshold = 2048; + internal const nuint MemmoveNativeThreshold = 2048; #endif // TODO: Determine optimal value internal const nuint ZeroMemoryNativeThreshold = 1024; diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.cs index c854c22110eacd..ebe99ff1847aa8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffer.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffer.cs @@ -8,7 +8,6 @@ using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; namespace System { @@ -334,7 +333,7 @@ internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) // Non-inlinable wrapper around the QCall that avoids polluting the fast path // with P/Invoke prolog/epilog. [MethodImpl(MethodImplOptions.NoInlining)] - private static unsafe void _Memmove(ref byte dest, ref byte src, nuint len) + internal static unsafe void _Memmove(ref byte dest, ref byte src, nuint len) { fixed (byte* pDest = &dest) fixed (byte* pSrc = &src) @@ -440,4 +439,329 @@ private static void _BulkMoveWithWriteBarrier(ref byte destination, ref byte sou #endif // !MONO } + + internal static partial class SpanHelpers + { + [Intrinsic] // Unrolled for small sizes + public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength) + { + if (byteLength == 0) + return; + +#if TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 + // The exact matrix on when ZeroMemory is faster than InitBlockUnaligned is very complex. The factors to consider include + // type of hardware and memory alignment. This threshold was chosen as a good balance across different configurations. + if (byteLength > 768) + goto PInvoke; + Unsafe.InitBlockUnaligned(ref b, 0, (uint)byteLength); + return; +#else + // TODO: Optimize other platforms to be on par with AMD64 CoreCLR + // Note: It's important that this switch handles lengths at least up to 22. + // See notes below near the main loop for why. + + // The switch will be very fast since it can be implemented using a jump + // table in assembly. See http://stackoverflow.com/a/449297/4077294 for more info. + + switch (byteLength) + { + case 1: + b = 0; + return; + case 2: + Unsafe.As(ref b) = 0; + return; + case 3: + Unsafe.As(ref b) = 0; + Unsafe.Add(ref b, 2) = 0; + return; + case 4: + Unsafe.As(ref b) = 0; + return; + case 5: + Unsafe.As(ref b) = 0; + Unsafe.Add(ref b, 4) = 0; + return; + case 6: + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; + return; + case 7: + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; + Unsafe.Add(ref b, 6) = 0; + return; + case 8: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; +#endif + return; + case 9: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; +#endif + Unsafe.Add(ref b, 8) = 0; + return; + case 10: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; +#endif + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; + return; + case 11: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; +#endif + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; + Unsafe.Add(ref b, 10) = 0; + return; + case 12: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; +#endif + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; + return; + case 13: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; +#endif + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; + Unsafe.Add(ref b, 12) = 0; + return; + case 14: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; +#endif + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; + return; + case 15: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; +#endif + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; + Unsafe.Add(ref b, 14) = 0; + return; + case 16: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; +#endif + return; + case 17: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; +#endif + Unsafe.Add(ref b, 16) = 0; + return; + case 18: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; +#endif + Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; + return; + case 19: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; +#endif + Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; + Unsafe.Add(ref b, 18) = 0; + return; + case 20: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; +#endif + Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; + return; + case 21: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; +#endif + Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; + Unsafe.Add(ref b, 20) = 0; + return; + case 22: +#if TARGET_64BIT + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; +#else + Unsafe.As(ref b) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; +#endif + Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; + Unsafe.As(ref Unsafe.Add(ref b, 20)) = 0; + return; + } + + // P/Invoke into the native version for large lengths + if (byteLength >= 512) goto PInvoke; + + nuint i = 0; // byte offset at which we're copying + + if (((nuint)Unsafe.AsPointer(ref b) & 3) != 0) + { + if (((nuint)Unsafe.AsPointer(ref b) & 1) != 0) + { + b = 0; + i += 1; + if (((nuint)Unsafe.AsPointer(ref b) & 2) != 0) + goto IntAligned; + } + Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; + i += 2; + } + + IntAligned: + + // On 64-bit IntPtr.Size == 8, so we want to advance to the next 8-aligned address. If + // (int)b % 8 is 0, 5, 6, or 7, we will already have advanced by 0, 3, 2, or 1 + // bytes to the next aligned address (respectively), so do nothing. On the other hand, + // if it is 1, 2, 3, or 4 we will want to copy-and-advance another 4 bytes until + // we're aligned. + // The thing 1, 2, 3, and 4 have in common that the others don't is that if you + // subtract one from them, their 3rd lsb will not be set. Hence, the below check. + + if ((((nuint)Unsafe.AsPointer(ref b) - 1) & 4) == 0) + { + Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; + i += 4; + } + + nuint end = byteLength - 16; + byteLength -= i; // lower 4 bits of byteLength represent how many bytes are left *after* the unrolled loop + + // We know due to the above switch-case that this loop will always run 1 iteration; max + // bytes we clear before checking is 23 (7 to align the pointers, 16 for 1 iteration) so + // the switch handles lengths 0-22. + Debug.Assert(end >= 7 && i <= end); + + // This is separated out into a different variable, so the i + 16 addition can be + // performed at the start of the pipeline and the loop condition does not have + // a dependency on the writes. + nuint counter; + + do + { + counter = i + 16; + + // This loop looks very costly since there appear to be a bunch of temporary values + // being created with the adds, but the jit (for x86 anyways) will convert each of + // these to use memory addressing operands. + + // So the only cost is a bit of code size, which is made up for by the fact that + // we save on writes to b. + +#if TARGET_64BIT + Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; + Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 8)) = 0; +#else + Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; + Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 4)) = 0; + Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 8)) = 0; + Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 12)) = 0; +#endif + + i = counter; + + // See notes above for why this wasn't used instead + // i += 16; + } + while (counter <= end); + + if ((byteLength & 8) != 0) + { +#if TARGET_64BIT + Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; +#else + Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; + Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 4)) = 0; +#endif + i += 8; + } + if ((byteLength & 4) != 0) + { + Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; + i += 4; + } + if ((byteLength & 2) != 0) + { + Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; + i += 2; + } + if ((byteLength & 1) != 0) + { + Unsafe.AddByteOffset(ref b, i) = 0; + // We're not using i after this, so not needed + // i += 1; + } + + return; +#endif + + PInvoke: + Buffer._ZeroMemory(ref b, byteLength); + } + } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs index c62a72ac491891..cf213590851497 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs @@ -129,32 +129,5 @@ internal static bool IsPrimitiveType(this CorElementType et) [Intrinsic] internal static bool IsKnownConstant(int t) => false; #pragma warning restore IDE0060 - -#if NATIVEAOT - [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemSet")] -#endif - private static unsafe void MemSet(ref byte dest, byte value, nuint size) - { - if (size > 0) - { - // Implicit nullcheck - _ = Unsafe.ReadUnaligned(ref dest); - SpanHelpers.Fill(ref dest, size, value); - } - } - -#if NATIVEAOT - [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemCopy")] -#endif - private static unsafe void MemCopy(ref byte dest, ref byte src, nuint size) - { - if (size > 0) - { - // Implicit nullchecks - _ = Unsafe.ReadUnaligned(ref dest); - _ = Unsafe.ReadUnaligned(ref src); - Buffer.Memmove(ref dest, ref src, size); - } - } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Span.cs b/src/libraries/System.Private.CoreLib/src/System/Span.cs index 1c66a341b0fde1..aaf3763d81b755 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Span.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Span.cs @@ -300,7 +300,19 @@ public unsafe void Clear() [MethodImpl(MethodImplOptions.AggressiveInlining)] public unsafe void Fill(T value) { - SpanHelpers.Fill(ref _reference, (uint)_length, value); + if (sizeof(T) == 1) + { + // Special-case single-byte types like byte / sbyte / bool. + // The runtime eventually calls memset, which can efficiently support large buffers. + // We don't need to check IsReferenceOrContainsReferences because no references + // can ever be stored in types this small. + Unsafe.InitBlockUnaligned(ref Unsafe.As(ref _reference), *(byte*)&value, (uint)_length); + } + else + { + // Call our optimized workhorse method for all other types. + SpanHelpers.Fill(ref _reference, (uint)_length, value); + } } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs index 9d1ae8aaf8e0ea..aa7ed473d9feff 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs @@ -1,12 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -#if TARGET_AMD64 || TARGET_ARM64 || (TARGET_32BIT && !TARGET_ARM) || TARGET_LOONGARCH64 -#define HAS_CUSTOM_BLOCKS -#endif - using System.Diagnostics; -using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -17,190 +12,6 @@ namespace System { internal static partial class SpanHelpers { - [Intrinsic] // Unrolled for small sizes - public static void ClearWithoutReferences(ref byte b, nuint byteLength) - { - ref byte bEnd = ref Unsafe.Add(ref b, byteLength); - - if (byteLength <= 16) - goto MZER02; - if (byteLength > 64) - goto MZER05; - - MZER00: - // Clear bytes which are multiples of 16 and leave the remainder for MZER01 to handle. - Debug.Assert(byteLength > 16 && byteLength <= 64); -#if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref b, default); // [0,16] -#elif TARGET_64BIT - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); // [0,16] -#else - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 12), 0); // [0,16] -#endif - if (byteLength <= 32) - goto MZER01; -#if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), default); // [0,32] -#elif TARGET_64BIT - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); // [0,32] -#else - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 20), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 28), 0); // [0,32] -#endif - if (byteLength <= 48) - goto MZER01; -#if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), default); // [0,48] -#elif TARGET_64BIT - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); // [0,48] -#else - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 36), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 44), 0); // [0,48] -#endif - - MZER01: - // Unconditionally clear the last 16 bytes using bEnd and return. - Debug.Assert(byteLength > 16 && byteLength <= 64); -#if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), default); -#elif TARGET_64BIT - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); -#else - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -12), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); -#endif - return; - - MZER02: - // Clear the first 8 bytes and then unconditionally clear the last 8 bytes and return. - if ((byteLength & 24) == 0) - goto MZER03; - Debug.Assert(byteLength >= 8 && byteLength <= 16); -#if TARGET_64BIT - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); -#else - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); -#endif - return; - - MZER03: - // Clear the first 4 bytes and then unconditionally clear the last 4 bytes and return. - if ((byteLength & 4) == 0) - goto MZER04; - Debug.Assert(byteLength >= 4 && byteLength < 8); - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); - return; - - MZER04: - // Clear the first byte. For pending bytes, do an unconditionally clear of the last 2 bytes and return. - Debug.Assert(byteLength < 4); - if (byteLength == 0) - return; - b = 0; - if ((byteLength & 2) == 0) - return; - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -2), 0); - return; - - MZER05: - // PInvoke to the native version when the clear length exceeds the threshold. - if (byteLength > Buffer.ZeroMemoryNativeThreshold) - { - goto PInvoke; - } - -#if HAS_CUSTOM_BLOCKS - if (byteLength >= 256) - { - unsafe - { - // Try to opportunistically align the destination below. The input isn't pinned, so the GC - // is free to move the references. We're therefore assuming that reads may still be unaligned. - nuint misalignedElements = 64 - (nuint)Unsafe.AsPointer(ref b) & 63; - Unsafe.WriteUnaligned(ref b, default); - b = ref Unsafe.Add(ref b, misalignedElements); - byteLength -= misalignedElements; - } - } -#endif - // Clear 64-bytes at a time until the remainder is less than 64. - // If remainder is greater than 16 bytes, then jump to MZER00. Otherwise, unconditionally clear the last 16 bytes and return. - Debug.Assert(byteLength > 64 && byteLength <= Buffer.ZeroMemoryNativeThreshold); - nuint n = byteLength >> 6; - - MZER06: -#if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref b, default); -#elif TARGET_64BIT - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 48), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 56), 0); -#else - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 12), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 20), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 28), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 36), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 44), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 48), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 52), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 56), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 60), 0); -#endif - b = ref Unsafe.Add(ref b, 64); - n--; - if (n != 0) - goto MZER06; - - byteLength %= 64; - if (byteLength > 16) - goto MZER00; -#if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), default); -#elif TARGET_64BIT - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); -#else - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -12), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); -#endif - return; - - PInvoke: - Buffer._ZeroMemory(ref b, byteLength); - } - public static unsafe void ClearWithReferences(ref IntPtr ip, nuint pointerSizeLength) { Debug.Assert((int)Unsafe.AsPointer(ref ip) % sizeof(IntPtr) == 0, "Should've been aligned on natural word boundary."); From f10915b60ee58eac56329e6da89a7932e88baf27 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 23 Feb 2024 20:03:14 +0100 Subject: [PATCH 24/40] Add file --- .../CompilerServices/RuntimeHelpers.MemOps.cs | 537 ++++++++++++++++++ 1 file changed, 537 insertions(+) create mode 100644 src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.MemOps.cs diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.MemOps.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.MemOps.cs new file mode 100644 index 00000000000000..ab7f098f5db548 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.MemOps.cs @@ -0,0 +1,537 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#if TARGET_AMD64 || TARGET_ARM64 || (TARGET_32BIT && !TARGET_ARM) || TARGET_LOONGARCH64 +#define HAS_CUSTOM_BLOCKS +#endif + +using System.Diagnostics; +using System.Numerics; + +namespace System.Runtime.CompilerServices +{ + public static partial class RuntimeHelpers + { +#if NATIVEAOT + [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemSet")] +#endif + private static unsafe void MemSet(ref byte dest, byte value, nuint numElements) + { + if (numElements == 0) + { + return; + } + + // Implicit nullcheck + _ = Unsafe.ReadUnaligned(ref dest); + + // Early checks to see if it's even possible to vectorize - JIT will turn these checks into consts. + // - T cannot contain references (GC can't track references in vectors) + // - Vectorization must be hardware-accelerated + // - T's size must not exceed the vector's size + // - T's size must be a whole power of 2 + + if (!Vector.IsHardwareAccelerated) { goto CannotVectorize; } + + if (numElements >= (uint)(Vector.Count)) + { + // We have enough data for at least one vectorized write. + + Vector vector = new(value); + + nuint stopLoopAtOffset = numElements & (nuint)(nint)(2 * (int)-Vector.Count); // intentional sign extension carries the negative bit + nuint offset = 0; + + // Loop, writing 2 vectors at a time. + // Compare 'numElements' rather than 'stopLoopAtOffset' because we don't want a dependency + // on the very recently calculated 'stopLoopAtOffset' value. + + if (numElements >= (uint)(2 * Vector.Count)) + { + do + { + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset), vector); + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset + (nuint)Vector.Count), vector); + offset += (uint)(2 * Vector.Count); + } while (offset < stopLoopAtOffset); + } + + // At this point, if any data remains to be written, it's strictly less than + // 2 * sizeof(Vector) bytes. The loop above had us write an even number of vectors. + // If the total byte length instead involves us writing an odd number of vectors, write + // one additional vector now. The bit check below tells us if we're in an "odd vector + // count" situation. + + if ((numElements & (nuint)Vector.Count) != 0) + { + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset), vector); + } + + // It's possible that some small buffer remains to be populated - something that won't + // fit an entire vector's worth of data. Instead of falling back to a loop, we'll write + // a vector at the very end of the buffer. This may involve overwriting previously + // populated data, which is fine since we're splatting the same value for all entries. + // There's no need to perform a length check here because we already performed this + // check before entering the vectorized code path. + + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, numElements - (nuint)Vector.Count), vector); + + // And we're done! + return; + } + + CannotVectorize: + // If we reached this point, we cannot vectorize this T, or there are too few + // elements for us to vectorize. Fall back to an unrolled loop. + + nuint i = 0; + + // Write 8 elements at a time + if (numElements >= 8) + { + nuint stopLoopAtOffset = numElements & ~(nuint)7; + do + { + Unsafe.Add(ref dest, (nint)i + 0) = value; + Unsafe.Add(ref dest, (nint)i + 1) = value; + Unsafe.Add(ref dest, (nint)i + 2) = value; + Unsafe.Add(ref dest, (nint)i + 3) = value; + Unsafe.Add(ref dest, (nint)i + 4) = value; + Unsafe.Add(ref dest, (nint)i + 5) = value; + Unsafe.Add(ref dest, (nint)i + 6) = value; + Unsafe.Add(ref dest, (nint)i + 7) = value; + } while ((i += 8) < stopLoopAtOffset); + } + + // Write next 4 elements if needed + if ((numElements & 4) != 0) + { + Unsafe.Add(ref dest, (nint)i + 0) = value; + Unsafe.Add(ref dest, (nint)i + 1) = value; + Unsafe.Add(ref dest, (nint)i + 2) = value; + Unsafe.Add(ref dest, (nint)i + 3) = value; + i += 4; + } + + // Write next 2 elements if needed + if ((numElements & 2) != 0) + { + Unsafe.Add(ref dest, (nint)i + 0) = value; + Unsafe.Add(ref dest, (nint)i + 1) = value; + i += 2; + } + + // Write final element if needed + if ((numElements & 1) != 0) + { + Unsafe.Add(ref dest, (nint)i) = value; + } + } + +#if NATIVEAOT + [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemZero")] +#endif + private static unsafe void MemZero(ref byte b, nuint byteLength) + { + if (byteLength == 0) + return; + + // Implicit nullcheck + _ = Unsafe.ReadUnaligned(ref b); + + ref byte bEnd = ref Unsafe.Add(ref b, byteLength); + + if (byteLength <= 16) + goto MZER02; + if (byteLength > 64) + goto MZER05; + + MZER00: + // Clear bytes which are multiples of 16 and leave the remainder for MZER01 to handle. + Debug.Assert(byteLength > 16 && byteLength <= 64); +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref b, default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); +#else + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 12), 0); +#endif + if (byteLength <= 32) + goto MZER01; +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 20), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 28), 0); +#endif + if (byteLength <= 48) + goto MZER01; +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 36), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 44), 0); +#endif + + MZER01: + // Unconditionally clear the last 16 bytes using bEnd and return. + Debug.Assert(byteLength > 16 && byteLength <= 64); +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); +#endif + return; + + MZER02: + // Clear the first 8 bytes and then unconditionally clear the last 8 bytes and return. + if ((byteLength & 24) == 0) + goto MZER03; + Debug.Assert(byteLength >= 8 && byteLength <= 16); +#if TARGET_64BIT + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); +#else + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); +#endif + return; + + MZER03: + // Clear the first 4 bytes and then unconditionally clear the last 4 bytes and return. + if ((byteLength & 4) == 0) + goto MZER04; + Debug.Assert(byteLength >= 4 && byteLength < 8); + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); + return; + + MZER04: + // Clear the first byte. For pending bytes, do an unconditionally clear of the last 2 bytes and return. + Debug.Assert(byteLength < 4); + if (byteLength == 0) + return; + b = 0; + if ((byteLength & 2) == 0) + return; + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -2), 0); + return; + + MZER05: + // PInvoke to the native version when the clear length exceeds the threshold. + if (byteLength > Buffer.ZeroMemoryNativeThreshold) + { + goto PInvoke; + } + +#if HAS_CUSTOM_BLOCKS + if (byteLength >= 256) + { + unsafe + { + // Try to opportunistically align the destination below. The input isn't pinned, so the GC + // is free to move the references. We're therefore assuming that reads may still be unaligned. + nuint misalignedElements = 64 - (nuint)Unsafe.AsPointer(ref b) & 63; + Unsafe.WriteUnaligned(ref b, default); + b = ref Unsafe.Add(ref b, misalignedElements); + byteLength -= misalignedElements; + } + } +#endif + // Clear 64-bytes at a time until the remainder is less than 64. + // If remainder is greater than 16 bytes, then jump to MZER00. Otherwise, unconditionally clear the last 16 bytes and return. + Debug.Assert(byteLength > 64 && byteLength <= Buffer.ZeroMemoryNativeThreshold); + nuint n = byteLength >> 6; + + MZER06: +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref b, default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 48), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 56), 0); +#else + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 20), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 28), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 36), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 44), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 48), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 52), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 56), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 60), 0); +#endif + b = ref Unsafe.Add(ref b, 64); + n--; + if (n != 0) + goto MZER06; + + byteLength %= 64; + if (byteLength > 16) + goto MZER00; +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); +#endif + return; + + PInvoke: + Buffer._ZeroMemory(ref b, byteLength); + } + +#if NATIVEAOT + [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemCopy")] +#endif + private static unsafe void MemCopy(ref byte dest, ref byte src, nuint len) + { + if (len == 0) + { + return; + } + + // Implicit nullchecks + _ = Unsafe.ReadUnaligned(ref dest); + _ = Unsafe.ReadUnaligned(ref src); + + // P/Invoke into the native version when the buffers are overlapping. + if (((nuint)(nint)Unsafe.ByteOffset(ref src, ref dest) < len) || ((nuint)(nint)Unsafe.ByteOffset(ref dest, ref src) < len)) + { + goto BuffersOverlap; + } + + // Use "(IntPtr)(nint)len" to avoid overflow checking on the explicit cast to IntPtr + + ref byte srcEnd = ref Unsafe.Add(ref src, (IntPtr)(nint)len); + ref byte destEnd = ref Unsafe.Add(ref dest, (IntPtr)(nint)len); + + if (len <= 16) + goto MCPY02; + if (len > 64) + goto MCPY05; + + MCPY00: + // Copy bytes which are multiples of 16 and leave the remainder for MCPY01 to handle. + Debug.Assert(len > 16 && len <= 64); +#if HAS_CUSTOM_BLOCKS + Unsafe.As(ref dest) = Unsafe.As(ref src); // [0,16] +#elif TARGET_64BIT + Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); // [0,16] +#else + Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref Unsafe.Add(ref dest, 4)) = Unsafe.As(ref Unsafe.Add(ref src, 4)); + Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); + Unsafe.As(ref Unsafe.Add(ref dest, 12)) = Unsafe.As(ref Unsafe.Add(ref src, 12)); // [0,16] +#endif + if (len <= 32) + goto MCPY01; +#if HAS_CUSTOM_BLOCKS + Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); // [0,32] +#elif TARGET_64BIT + Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); + Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); // [0,32] +#else + Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); + Unsafe.As(ref Unsafe.Add(ref dest, 20)) = Unsafe.As(ref Unsafe.Add(ref src, 20)); + Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); + Unsafe.As(ref Unsafe.Add(ref dest, 28)) = Unsafe.As(ref Unsafe.Add(ref src, 28)); // [0,32] +#endif + if (len <= 48) + goto MCPY01; +#if HAS_CUSTOM_BLOCKS + Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); // [0,48] +#elif TARGET_64BIT + Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); + Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); // [0,48] +#else + Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); + Unsafe.As(ref Unsafe.Add(ref dest, 36)) = Unsafe.As(ref Unsafe.Add(ref src, 36)); + Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); + Unsafe.As(ref Unsafe.Add(ref dest, 44)) = Unsafe.As(ref Unsafe.Add(ref src, 44)); // [0,48] +#endif + + MCPY01: + // Unconditionally copy the last 16 bytes using destEnd and srcEnd and return. + Debug.Assert(len > 16 && len <= 64); +#if HAS_CUSTOM_BLOCKS + Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); +#elif TARGET_64BIT + Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); +#else + Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -12)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -12)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); +#endif + return; + + MCPY02: + // Copy the first 8 bytes and then unconditionally copy the last 8 bytes and return. + if ((len & 24) == 0) + goto MCPY03; + Debug.Assert(len >= 8 && len <= 16); +#if TARGET_64BIT + Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); +#else + Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref Unsafe.Add(ref dest, 4)) = Unsafe.As(ref Unsafe.Add(ref src, 4)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); +#endif + return; + + MCPY03: + // Copy the first 4 bytes and then unconditionally copy the last 4 bytes and return. + if ((len & 4) == 0) + goto MCPY04; + Debug.Assert(len >= 4 && len < 8); + Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); + return; + + MCPY04: + // Copy the first byte. For pending bytes, do an unconditionally copy of the last 2 bytes and return. + Debug.Assert(len < 4); + if (len == 0) + return; + dest = src; + if ((len & 2) == 0) + return; + Unsafe.As(ref Unsafe.Add(ref destEnd, -2)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -2)); + return; + + MCPY05: + // PInvoke to the native version when the copy length exceeds the threshold. + if (len > Buffer.MemmoveNativeThreshold) + { + goto PInvoke; + } + +#if HAS_CUSTOM_BLOCKS + if (len >= 256) + { + // Try to opportunistically align the destination below. The input isn't pinned, so the GC + // is free to move the references. We're therefore assuming that reads may still be unaligned. + // + // dest is more important to align than src because an unaligned store is more expensive + // than an unaligned load. + nuint misalignedElements = 64 - (nuint)Unsafe.AsPointer(ref dest) & 63; + Unsafe.As(ref dest) = Unsafe.As(ref src); + src = ref Unsafe.Add(ref src, misalignedElements); + dest = ref Unsafe.Add(ref dest, misalignedElements); + len -= misalignedElements; + } +#endif + + // Copy 64-bytes at a time until the remainder is less than 64. + // If remainder is greater than 16 bytes, then jump to MCPY00. Otherwise, unconditionally copy the last 16 bytes and return. + Debug.Assert(len > 64 && len <= Buffer.MemmoveNativeThreshold); + nuint n = len >> 6; + + MCPY06: +#if HAS_CUSTOM_BLOCKS + Unsafe.As(ref dest) = Unsafe.As(ref src); +#elif TARGET_64BIT + Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); + Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); + Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); + Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); + Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); + Unsafe.As(ref Unsafe.Add(ref dest, 48)) = Unsafe.As(ref Unsafe.Add(ref src, 48)); + Unsafe.As(ref Unsafe.Add(ref dest, 56)) = Unsafe.As(ref Unsafe.Add(ref src, 56)); +#else + Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref Unsafe.Add(ref dest, 4)) = Unsafe.As(ref Unsafe.Add(ref src, 4)); + Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); + Unsafe.As(ref Unsafe.Add(ref dest, 12)) = Unsafe.As(ref Unsafe.Add(ref src, 12)); + Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); + Unsafe.As(ref Unsafe.Add(ref dest, 20)) = Unsafe.As(ref Unsafe.Add(ref src, 20)); + Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); + Unsafe.As(ref Unsafe.Add(ref dest, 28)) = Unsafe.As(ref Unsafe.Add(ref src, 28)); + Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); + Unsafe.As(ref Unsafe.Add(ref dest, 36)) = Unsafe.As(ref Unsafe.Add(ref src, 36)); + Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); + Unsafe.As(ref Unsafe.Add(ref dest, 44)) = Unsafe.As(ref Unsafe.Add(ref src, 44)); + Unsafe.As(ref Unsafe.Add(ref dest, 48)) = Unsafe.As(ref Unsafe.Add(ref src, 48)); + Unsafe.As(ref Unsafe.Add(ref dest, 52)) = Unsafe.As(ref Unsafe.Add(ref src, 52)); + Unsafe.As(ref Unsafe.Add(ref dest, 56)) = Unsafe.As(ref Unsafe.Add(ref src, 56)); + Unsafe.As(ref Unsafe.Add(ref dest, 60)) = Unsafe.As(ref Unsafe.Add(ref src, 60)); +#endif + dest = ref Unsafe.Add(ref dest, 64); + src = ref Unsafe.Add(ref src, 64); + n--; + if (n != 0) + goto MCPY06; + + len %= 64; + if (len > 16) + goto MCPY00; +#if HAS_CUSTOM_BLOCKS + Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); +#elif TARGET_64BIT + Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); +#else + Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -12)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -12)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); +#endif + return; + + BuffersOverlap: + // If the buffers overlap perfectly, there's no point to copying the data. + if (Unsafe.AreSame(ref dest, ref src)) + { + return; + } + + PInvoke: + Buffer._Memmove(ref dest, ref src, len); + } + } +} From 6dfe8c0368ab735d2676a13c9aa357d193c3c4f3 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 23 Feb 2024 20:05:09 +0100 Subject: [PATCH 25/40] Fix unix build --- .../System.Private.CoreLib/src/System/Buffer.Unix.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs index 4a54e8698da41e..d114ec3e3ce1cb 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs @@ -9,11 +9,11 @@ public static partial class Buffer // Managed code is currently faster than glibc unoptimized memmove // TODO-ARM64-UNIX-OPT revisit when glibc optimized memmove is in Linux distros // https://github.com/dotnet/runtime/issues/8897 - private static nuint MemmoveNativeThreshold => nuint.MaxValue; + internal static nuint MemmoveNativeThreshold => nuint.MaxValue; #elif TARGET_ARM - private const nuint MemmoveNativeThreshold = 512; + internal const nuint MemmoveNativeThreshold = 512; #else - private const nuint MemmoveNativeThreshold = 2048; + internal const nuint MemmoveNativeThreshold = 2048; #endif // TODO: Determine optimal value internal const nuint ZeroMemoryNativeThreshold = 1024; From ffce47ea49c29a84514499af250650a352e69db0 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 23 Feb 2024 20:27:29 +0100 Subject: [PATCH 26/40] Improve Unsafe.InitBlock --- src/coreclr/jit/importer.cpp | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 5469904c4ce554..0436583627647c 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -10295,9 +10295,9 @@ void Compiler::impImportBlockCode(BasicBlock* block) } #endif - op3 = impPopStack().val; // Size - op2 = impPopStack().val; // Value / Src addr - op1 = impPopStack().val; // Dst addr + op3 = gtFoldExpr(impPopStack().val); // Size + op2 = gtFoldExpr(impPopStack().val); // Value / Src addr + op1 = impPopStack().val; // Dst addr if (op3->IsCnsIntOrI()) { @@ -10343,18 +10343,33 @@ void Compiler::impImportBlockCode(BasicBlock* block) // TODO: enable for X86 as well, it currently doesn't support memset/memcpy helpers // Then, get rid of GT_STORE_DYN_BLK entirely. #ifndef TARGET_X86 - const unsigned helper = opcode == CEE_INITBLK ? CORINFO_HELP_MEMSET : CORINFO_HELP_MEMCPY; + GenTreeCall* call; + if (opcode == CEE_INITBLK) + { + if (op2->IsIntegralConst(0)) + { + call = gtNewHelperCallNode(CORINFO_HELP_MEMZERO, TYP_VOID, op1, op3); + } + else + { + call = gtNewHelperCallNode(CORINFO_HELP_MEMSET, TYP_VOID, op1, op2, op3); + } + } + else + { + call = gtNewHelperCallNode(CORINFO_HELP_MEMCPY, TYP_VOID, op1, op2, op3); + } + if (isVolatile) { // Wrap with memory barriers: full-barrier + call + load-barrier impAppendTree(gtNewMemoryBarrier(), CHECK_SPILL_ALL, impCurStmtDI); - impAppendTree(gtNewHelperCallNode(helper, TYP_VOID, op1, op2, op3), CHECK_SPILL_ALL, - impCurStmtDI); + impAppendTree(call, CHECK_SPILL_ALL, impCurStmtDI); op1 = gtNewMemoryBarrier(true); } else { - op1 = gtNewHelperCallNode(helper, TYP_VOID, op1, op2, op3); + op1 = call; } #else if (opcode == CEE_INITBLK) From 2dd0c11651884aa300e4a905885d14582ca071b7 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 23 Feb 2024 20:37:22 +0100 Subject: [PATCH 27/40] Tier up helpers --- src/coreclr/vm/appdomain.cpp | 1 + src/coreclr/vm/callcounting.cpp | 5 +++-- src/coreclr/vm/vars.cpp | 1 + src/coreclr/vm/vars.hpp | 1 + 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/appdomain.cpp b/src/coreclr/vm/appdomain.cpp index bb5d3d17e00534..fbbb4ee4c12ee5 100644 --- a/src/coreclr/vm/appdomain.cpp +++ b/src/coreclr/vm/appdomain.cpp @@ -1378,6 +1378,7 @@ void SystemDomain::LoadBaseSystemClasses() g_pWeakReferenceOfTClass = CoreLibBinder::GetClass(CLASS__WEAKREFERENCEGENERIC); g_pCastHelpers = CoreLibBinder::GetClass(CLASS__CASTHELPERS); + g_pRuntimeHelpers = CoreLibBinder::GetClass(CLASS__RUNTIME_HELPERS); #ifdef FEATURE_COMINTEROP if (g_pConfig->IsBuiltInCOMSupported()) diff --git a/src/coreclr/vm/callcounting.cpp b/src/coreclr/vm/callcounting.cpp index c464949f7aeee0..5a7c6daee3d7b0 100644 --- a/src/coreclr/vm/callcounting.cpp +++ b/src/coreclr/vm/callcounting.cpp @@ -660,9 +660,10 @@ bool CallCountingManager::SetCodeEntryPoint( CallCount callCountThreshold = g_pConfig->TieredCompilation_CallCountThreshold(); _ASSERTE(callCountThreshold != 0); - // Let's tier up all cast helpers faster than other methods. This is because we want to import them as + // Let's tier up all cast and runtime helpers faster than other methods. This is because we want to import them as // direct calls in codegen and they need to be promoted earlier than their callers. - if (methodDesc->GetMethodTable() == g_pCastHelpers) + PTR_MethodTable mt = methodDesc->GetMethodTable(); + if (mt == g_pCastHelpers || mt == g_pRuntimeHelpers) { callCountThreshold = max(1, (CallCount)(callCountThreshold / 2)); } diff --git a/src/coreclr/vm/vars.cpp b/src/coreclr/vm/vars.cpp index ba72d19f2b1e48..26d01466bdbbff 100644 --- a/src/coreclr/vm/vars.cpp +++ b/src/coreclr/vm/vars.cpp @@ -102,6 +102,7 @@ GVAL_IMPL_INIT(DWORD, g_debuggerWordTLSIndex, TLS_OUT_OF_INDEXES); GVAL_IMPL_INIT(DWORD, g_TlsIndex, TLS_OUT_OF_INDEXES); MethodTable* g_pCastHelpers; +MethodTable* g_pRuntimeHelpers; #ifdef FEATURE_EH_FUNCLETS GPTR_IMPL(MethodTable, g_pEHClass); GPTR_IMPL(MethodTable, g_pExceptionServicesInternalCallsClass); diff --git a/src/coreclr/vm/vars.hpp b/src/coreclr/vm/vars.hpp index 65712d031512d1..ef1bddfa3d39c5 100644 --- a/src/coreclr/vm/vars.hpp +++ b/src/coreclr/vm/vars.hpp @@ -412,6 +412,7 @@ EXTERN OBJECTHANDLE g_pPreallocatedExecutionEngineException; EXTERN OBJECTHANDLE g_pPreallocatedSentinelObject; EXTERN MethodTable* g_pCastHelpers; +EXTERN MethodTable* g_pRuntimeHelpers; GPTR_DECL(Thread,g_pFinalizerThread); GPTR_DECL(Thread,g_pSuspensionThread); From 7ce4cb721187493f881a3c2890f7e6a2bd8d0dbc Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 24 Feb 2024 17:26:37 +0100 Subject: [PATCH 28/40] Address feedback --- .../System.Private.CoreLib.Shared.projitems | 4 +- .../src/System/Buffer.Unix.cs | 21 - .../src/System/Buffer.Windows.cs | 18 - .../src/System/Buffer.cs | 542 +---------------- .../CompilerServices/RuntimeHelpers.cs | 43 +- .../System.Private.CoreLib/src/System/Span.cs | 14 +- ...rs.MemOps.cs => SpanHelpers.ByteMemOps.cs} | 544 +++++++----------- 7 files changed, 259 insertions(+), 927 deletions(-) delete mode 100644 src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs delete mode 100644 src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs rename src/libraries/System.Private.CoreLib/src/System/{Runtime/CompilerServices/RuntimeHelpers.MemOps.cs => SpanHelpers.ByteMemOps.cs} (69%) diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index 849a03e3128751..671d5a3dec585c 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -882,7 +882,6 @@ - @@ -1119,6 +1118,7 @@ + @@ -2141,7 +2141,6 @@ - @@ -2459,7 +2458,6 @@ - diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs deleted file mode 100644 index d114ec3e3ce1cb..00000000000000 --- a/src/libraries/System.Private.CoreLib/src/System/Buffer.Unix.cs +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -namespace System -{ - public static partial class Buffer - { -#if TARGET_ARM64 || TARGET_LOONGARCH64 - // Managed code is currently faster than glibc unoptimized memmove - // TODO-ARM64-UNIX-OPT revisit when glibc optimized memmove is in Linux distros - // https://github.com/dotnet/runtime/issues/8897 - internal static nuint MemmoveNativeThreshold => nuint.MaxValue; -#elif TARGET_ARM - internal const nuint MemmoveNativeThreshold = 512; -#else - internal const nuint MemmoveNativeThreshold = 2048; -#endif - // TODO: Determine optimal value - internal const nuint ZeroMemoryNativeThreshold = 1024; - } -} diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs deleted file mode 100644 index 3f579e9cc23741..00000000000000 --- a/src/libraries/System.Private.CoreLib/src/System/Buffer.Windows.cs +++ /dev/null @@ -1,18 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -namespace System -{ - public static partial class Buffer - { -#if TARGET_ARM64 - // Determine optimal value for Windows. - // https://github.com/dotnet/runtime/issues/8896 - internal static nuint MemmoveNativeThreshold => nuint.MaxValue; -#else - internal const nuint MemmoveNativeThreshold = 2048; -#endif - // TODO: Determine optimal value - internal const nuint ZeroMemoryNativeThreshold = 1024; - } -} diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffer.cs b/src/libraries/System.Private.CoreLib/src/System/Buffer.cs index ebe99ff1847aa8..543bf79beba7ef 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffer.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffer.cs @@ -1,10 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -#if TARGET_AMD64 || TARGET_ARM64 || (TARGET_32BIT && !TARGET_ARM) || TARGET_LOONGARCH64 -#define HAS_CUSTOM_BLOCKS -#endif - using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -127,209 +123,6 @@ public static unsafe void MemoryCopy(void* source, void* destination, ulong dest Memmove(ref *(byte*)destination, ref *(byte*)source, checked((nuint)sourceBytesToCopy)); } - [Intrinsic] // Unrolled for small constant lengths - internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) - { - // P/Invoke into the native version when the buffers are overlapping. - if (((nuint)(nint)Unsafe.ByteOffset(ref src, ref dest) < len) || ((nuint)(nint)Unsafe.ByteOffset(ref dest, ref src) < len)) - { - goto BuffersOverlap; - } - - // Use "(IntPtr)(nint)len" to avoid overflow checking on the explicit cast to IntPtr - - ref byte srcEnd = ref Unsafe.Add(ref src, (IntPtr)(nint)len); - ref byte destEnd = ref Unsafe.Add(ref dest, (IntPtr)(nint)len); - - if (len <= 16) - goto MCPY02; - if (len > 64) - goto MCPY05; - - MCPY00: - // Copy bytes which are multiples of 16 and leave the remainder for MCPY01 to handle. - Debug.Assert(len > 16 && len <= 64); -#if HAS_CUSTOM_BLOCKS - Unsafe.As(ref dest) = Unsafe.As(ref src); // [0,16] -#elif TARGET_64BIT - Unsafe.As(ref dest) = Unsafe.As(ref src); - Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); // [0,16] -#else - Unsafe.As(ref dest) = Unsafe.As(ref src); - Unsafe.As(ref Unsafe.Add(ref dest, 4)) = Unsafe.As(ref Unsafe.Add(ref src, 4)); - Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); - Unsafe.As(ref Unsafe.Add(ref dest, 12)) = Unsafe.As(ref Unsafe.Add(ref src, 12)); // [0,16] -#endif - if (len <= 32) - goto MCPY01; -#if HAS_CUSTOM_BLOCKS - Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); // [0,32] -#elif TARGET_64BIT - Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); - Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); // [0,32] -#else - Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); - Unsafe.As(ref Unsafe.Add(ref dest, 20)) = Unsafe.As(ref Unsafe.Add(ref src, 20)); - Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); - Unsafe.As(ref Unsafe.Add(ref dest, 28)) = Unsafe.As(ref Unsafe.Add(ref src, 28)); // [0,32] -#endif - if (len <= 48) - goto MCPY01; -#if HAS_CUSTOM_BLOCKS - Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); // [0,48] -#elif TARGET_64BIT - Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); - Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); // [0,48] -#else - Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); - Unsafe.As(ref Unsafe.Add(ref dest, 36)) = Unsafe.As(ref Unsafe.Add(ref src, 36)); - Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); - Unsafe.As(ref Unsafe.Add(ref dest, 44)) = Unsafe.As(ref Unsafe.Add(ref src, 44)); // [0,48] -#endif - - MCPY01: - // Unconditionally copy the last 16 bytes using destEnd and srcEnd and return. - Debug.Assert(len > 16 && len <= 64); -#if HAS_CUSTOM_BLOCKS - Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); -#elif TARGET_64BIT - Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); -#else - Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -12)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -12)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); -#endif - return; - - MCPY02: - // Copy the first 8 bytes and then unconditionally copy the last 8 bytes and return. - if ((len & 24) == 0) - goto MCPY03; - Debug.Assert(len >= 8 && len <= 16); -#if TARGET_64BIT - Unsafe.As(ref dest) = Unsafe.As(ref src); - Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); -#else - Unsafe.As(ref dest) = Unsafe.As(ref src); - Unsafe.As(ref Unsafe.Add(ref dest, 4)) = Unsafe.As(ref Unsafe.Add(ref src, 4)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); -#endif - return; - - MCPY03: - // Copy the first 4 bytes and then unconditionally copy the last 4 bytes and return. - if ((len & 4) == 0) - goto MCPY04; - Debug.Assert(len >= 4 && len < 8); - Unsafe.As(ref dest) = Unsafe.As(ref src); - Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); - return; - - MCPY04: - // Copy the first byte. For pending bytes, do an unconditionally copy of the last 2 bytes and return. - Debug.Assert(len < 4); - if (len == 0) - return; - dest = src; - if ((len & 2) == 0) - return; - Unsafe.As(ref Unsafe.Add(ref destEnd, -2)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -2)); - return; - - MCPY05: - // PInvoke to the native version when the copy length exceeds the threshold. - if (len > MemmoveNativeThreshold) - { - goto PInvoke; - } - -#if HAS_CUSTOM_BLOCKS - if (len >= 256) - { - // Try to opportunistically align the destination below. The input isn't pinned, so the GC - // is free to move the references. We're therefore assuming that reads may still be unaligned. - // - // dest is more important to align than src because an unaligned store is more expensive - // than an unaligned load. - nuint misalignedElements = 64 - (nuint)Unsafe.AsPointer(ref dest) & 63; - Unsafe.As(ref dest) = Unsafe.As(ref src); - src = ref Unsafe.Add(ref src, misalignedElements); - dest = ref Unsafe.Add(ref dest, misalignedElements); - len -= misalignedElements; - } -#endif - - // Copy 64-bytes at a time until the remainder is less than 64. - // If remainder is greater than 16 bytes, then jump to MCPY00. Otherwise, unconditionally copy the last 16 bytes and return. - Debug.Assert(len > 64 && len <= MemmoveNativeThreshold); - nuint n = len >> 6; - - MCPY06: -#if HAS_CUSTOM_BLOCKS - Unsafe.As(ref dest) = Unsafe.As(ref src); -#elif TARGET_64BIT - Unsafe.As(ref dest) = Unsafe.As(ref src); - Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); - Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); - Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); - Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); - Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); - Unsafe.As(ref Unsafe.Add(ref dest, 48)) = Unsafe.As(ref Unsafe.Add(ref src, 48)); - Unsafe.As(ref Unsafe.Add(ref dest, 56)) = Unsafe.As(ref Unsafe.Add(ref src, 56)); -#else - Unsafe.As(ref dest) = Unsafe.As(ref src); - Unsafe.As(ref Unsafe.Add(ref dest, 4)) = Unsafe.As(ref Unsafe.Add(ref src, 4)); - Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); - Unsafe.As(ref Unsafe.Add(ref dest, 12)) = Unsafe.As(ref Unsafe.Add(ref src, 12)); - Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); - Unsafe.As(ref Unsafe.Add(ref dest, 20)) = Unsafe.As(ref Unsafe.Add(ref src, 20)); - Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); - Unsafe.As(ref Unsafe.Add(ref dest, 28)) = Unsafe.As(ref Unsafe.Add(ref src, 28)); - Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); - Unsafe.As(ref Unsafe.Add(ref dest, 36)) = Unsafe.As(ref Unsafe.Add(ref src, 36)); - Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); - Unsafe.As(ref Unsafe.Add(ref dest, 44)) = Unsafe.As(ref Unsafe.Add(ref src, 44)); - Unsafe.As(ref Unsafe.Add(ref dest, 48)) = Unsafe.As(ref Unsafe.Add(ref src, 48)); - Unsafe.As(ref Unsafe.Add(ref dest, 52)) = Unsafe.As(ref Unsafe.Add(ref src, 52)); - Unsafe.As(ref Unsafe.Add(ref dest, 56)) = Unsafe.As(ref Unsafe.Add(ref src, 56)); - Unsafe.As(ref Unsafe.Add(ref dest, 60)) = Unsafe.As(ref Unsafe.Add(ref src, 60)); -#endif - dest = ref Unsafe.Add(ref dest, 64); - src = ref Unsafe.Add(ref src, 64); - n--; - if (n != 0) - goto MCPY06; - - len %= 64; - if (len > 16) - goto MCPY00; -#if HAS_CUSTOM_BLOCKS - Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); -#elif TARGET_64BIT - Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); -#else - Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -12)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -12)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); - Unsafe.As(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -4)); -#endif - return; - - BuffersOverlap: - // If the buffers overlap perfectly, there's no point to copying the data. - if (Unsafe.AreSame(ref dest, ref src)) - { - return; - } - - PInvoke: - _Memmove(ref dest, ref src, len); - } - // Non-inlinable wrapper around the QCall that avoids polluting the fast path // with P/Invoke prolog/epilog. [MethodImpl(MethodImplOptions.NoInlining)] @@ -340,14 +133,6 @@ internal static unsafe void _Memmove(ref byte dest, ref byte src, nuint len) __Memmove(pDest, pSrc, len); } -#if HAS_CUSTOM_BLOCKS - [StructLayout(LayoutKind.Sequential, Size = 16)] - internal struct Block16 { } - - [StructLayout(LayoutKind.Sequential, Size = 64)] - internal struct Block64 { } -#endif // HAS_CUSTOM_BLOCKS - // Non-inlinable wrapper around the QCall that avoids polluting the fast path // with P/Invoke prolog/epilog. [MethodImpl(MethodImplOptions.NoInlining)] @@ -369,7 +154,7 @@ internal static unsafe void Memmove(ref T destination, ref T source, nuint el if (!RuntimeHelpers.IsReferenceOrContainsReferences()) { // Blittable memmove - Memmove( + SpanHelpers.Memmove( ref Unsafe.As(ref destination), ref Unsafe.As(ref source), elementCount * (nuint)sizeof(T)); @@ -439,329 +224,4 @@ private static void _BulkMoveWithWriteBarrier(ref byte destination, ref byte sou #endif // !MONO } - - internal static partial class SpanHelpers - { - [Intrinsic] // Unrolled for small sizes - public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength) - { - if (byteLength == 0) - return; - -#if TARGET_AMD64 || TARGET_ARM64 || TARGET_LOONGARCH64 - // The exact matrix on when ZeroMemory is faster than InitBlockUnaligned is very complex. The factors to consider include - // type of hardware and memory alignment. This threshold was chosen as a good balance across different configurations. - if (byteLength > 768) - goto PInvoke; - Unsafe.InitBlockUnaligned(ref b, 0, (uint)byteLength); - return; -#else - // TODO: Optimize other platforms to be on par with AMD64 CoreCLR - // Note: It's important that this switch handles lengths at least up to 22. - // See notes below near the main loop for why. - - // The switch will be very fast since it can be implemented using a jump - // table in assembly. See http://stackoverflow.com/a/449297/4077294 for more info. - - switch (byteLength) - { - case 1: - b = 0; - return; - case 2: - Unsafe.As(ref b) = 0; - return; - case 3: - Unsafe.As(ref b) = 0; - Unsafe.Add(ref b, 2) = 0; - return; - case 4: - Unsafe.As(ref b) = 0; - return; - case 5: - Unsafe.As(ref b) = 0; - Unsafe.Add(ref b, 4) = 0; - return; - case 6: - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - return; - case 7: - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.Add(ref b, 6) = 0; - return; - case 8: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - return; - case 9: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.Add(ref b, 8) = 0; - return; - case 10: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - return; - case 11: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.Add(ref b, 10) = 0; - return; - case 12: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - return; - case 13: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.Add(ref b, 12) = 0; - return; - case 14: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; - return; - case 15: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; - Unsafe.Add(ref b, 14) = 0; - return; - case 16: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - return; - case 17: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.Add(ref b, 16) = 0; - return; - case 18: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - return; - case 19: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - Unsafe.Add(ref b, 18) = 0; - return; - case 20: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - return; - case 21: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - Unsafe.Add(ref b, 20) = 0; - return; - case 22: -#if TARGET_64BIT - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; -#else - Unsafe.As(ref b) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 4)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 8)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 12)) = 0; -#endif - Unsafe.As(ref Unsafe.Add(ref b, 16)) = 0; - Unsafe.As(ref Unsafe.Add(ref b, 20)) = 0; - return; - } - - // P/Invoke into the native version for large lengths - if (byteLength >= 512) goto PInvoke; - - nuint i = 0; // byte offset at which we're copying - - if (((nuint)Unsafe.AsPointer(ref b) & 3) != 0) - { - if (((nuint)Unsafe.AsPointer(ref b) & 1) != 0) - { - b = 0; - i += 1; - if (((nuint)Unsafe.AsPointer(ref b) & 2) != 0) - goto IntAligned; - } - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - i += 2; - } - - IntAligned: - - // On 64-bit IntPtr.Size == 8, so we want to advance to the next 8-aligned address. If - // (int)b % 8 is 0, 5, 6, or 7, we will already have advanced by 0, 3, 2, or 1 - // bytes to the next aligned address (respectively), so do nothing. On the other hand, - // if it is 1, 2, 3, or 4 we will want to copy-and-advance another 4 bytes until - // we're aligned. - // The thing 1, 2, 3, and 4 have in common that the others don't is that if you - // subtract one from them, their 3rd lsb will not be set. Hence, the below check. - - if ((((nuint)Unsafe.AsPointer(ref b) - 1) & 4) == 0) - { - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - i += 4; - } - - nuint end = byteLength - 16; - byteLength -= i; // lower 4 bits of byteLength represent how many bytes are left *after* the unrolled loop - - // We know due to the above switch-case that this loop will always run 1 iteration; max - // bytes we clear before checking is 23 (7 to align the pointers, 16 for 1 iteration) so - // the switch handles lengths 0-22. - Debug.Assert(end >= 7 && i <= end); - - // This is separated out into a different variable, so the i + 16 addition can be - // performed at the start of the pipeline and the loop condition does not have - // a dependency on the writes. - nuint counter; - - do - { - counter = i + 16; - - // This loop looks very costly since there appear to be a bunch of temporary values - // being created with the adds, but the jit (for x86 anyways) will convert each of - // these to use memory addressing operands. - - // So the only cost is a bit of code size, which is made up for by the fact that - // we save on writes to b. - -#if TARGET_64BIT - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 8)) = 0; -#else - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 4)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 8)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 12)) = 0; -#endif - - i = counter; - - // See notes above for why this wasn't used instead - // i += 16; - } - while (counter <= end); - - if ((byteLength & 8) != 0) - { -#if TARGET_64BIT - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; -#else - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i + 4)) = 0; -#endif - i += 8; - } - if ((byteLength & 4) != 0) - { - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - i += 4; - } - if ((byteLength & 2) != 0) - { - Unsafe.As(ref Unsafe.AddByteOffset(ref b, i)) = 0; - i += 2; - } - if ((byteLength & 1) != 0) - { - Unsafe.AddByteOffset(ref b, i) = 0; - // We're not using i after this, so not needed - // i += 1; - } - - return; -#endif - - PInvoke: - Buffer._ZeroMemory(ref b, byteLength); - } - } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs index cf213590851497..bb6b2599ac52a1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs @@ -125,9 +125,46 @@ internal static bool IsPrimitiveType(this CorElementType et) [Intrinsic] internal static bool IsKnownConstant(char t) => false; - - [Intrinsic] - internal static bool IsKnownConstant(int t) => false; #pragma warning restore IDE0060 + +#if NATIVEAOT + [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemCopy")] +#endif + private static void MemCopy(ref byte dest, ref byte src, nuint size) + { + if (size > 0) + { + // Implicit nullchecks + _ = Unsafe.ReadUnaligned(ref dest); + _ = Unsafe.ReadUnaligned(ref src); + SpanHelpers.Memmove(ref dest, ref src, size); + } + } + +#if NATIVEAOT + [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemZero")] +#endif + private static void MemZero(ref byte dest, nuint size) + { + if (size > 0) + { + // Implicit nullcheck + _ = Unsafe.ReadUnaligned(ref dest); + SpanHelpers.ClearWithoutReferences(ref dest, size); + } + } + +#if NATIVEAOT + [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemSet")] +#endif + private static void MemSet(ref byte dest, byte value, nuint size) + { + if (size > 0) + { + // Implicit nullcheck + _ = Unsafe.ReadUnaligned(ref dest); + SpanHelpers.Fill(ref dest, size, value); + } + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Span.cs b/src/libraries/System.Private.CoreLib/src/System/Span.cs index aaf3763d81b755..1c66a341b0fde1 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Span.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Span.cs @@ -300,19 +300,7 @@ public unsafe void Clear() [MethodImpl(MethodImplOptions.AggressiveInlining)] public unsafe void Fill(T value) { - if (sizeof(T) == 1) - { - // Special-case single-byte types like byte / sbyte / bool. - // The runtime eventually calls memset, which can efficiently support large buffers. - // We don't need to check IsReferenceOrContainsReferences because no references - // can ever be stored in types this small. - Unsafe.InitBlockUnaligned(ref Unsafe.As(ref _reference), *(byte*)&value, (uint)_length); - } - else - { - // Call our optimized workhorse method for all other types. - SpanHelpers.Fill(ref _reference, (uint)_length, value); - } + SpanHelpers.Fill(ref _reference, (uint)_length, value); } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.MemOps.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs similarity index 69% rename from src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.MemOps.cs rename to src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs index ab7f098f5db548..e9fc88e7234675 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.MemOps.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs @@ -2,338 +2,42 @@ // The .NET Foundation licenses this file to you under the MIT license. #if TARGET_AMD64 || TARGET_ARM64 || (TARGET_32BIT && !TARGET_ARM) || TARGET_LOONGARCH64 +// JIT is guaranteed to unroll blocks up to 64 bytes in size #define HAS_CUSTOM_BLOCKS #endif using System.Diagnostics; -using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; -namespace System.Runtime.CompilerServices +namespace System { - public static partial class RuntimeHelpers + internal static partial class SpanHelpers // .ByteMemOps { -#if NATIVEAOT - [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemSet")] -#endif - private static unsafe void MemSet(ref byte dest, byte value, nuint numElements) - { - if (numElements == 0) - { - return; - } - - // Implicit nullcheck - _ = Unsafe.ReadUnaligned(ref dest); - - // Early checks to see if it's even possible to vectorize - JIT will turn these checks into consts. - // - T cannot contain references (GC can't track references in vectors) - // - Vectorization must be hardware-accelerated - // - T's size must not exceed the vector's size - // - T's size must be a whole power of 2 - - if (!Vector.IsHardwareAccelerated) { goto CannotVectorize; } - - if (numElements >= (uint)(Vector.Count)) - { - // We have enough data for at least one vectorized write. - - Vector vector = new(value); - - nuint stopLoopAtOffset = numElements & (nuint)(nint)(2 * (int)-Vector.Count); // intentional sign extension carries the negative bit - nuint offset = 0; - - // Loop, writing 2 vectors at a time. - // Compare 'numElements' rather than 'stopLoopAtOffset' because we don't want a dependency - // on the very recently calculated 'stopLoopAtOffset' value. - - if (numElements >= (uint)(2 * Vector.Count)) - { - do - { - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset), vector); - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset + (nuint)Vector.Count), vector); - offset += (uint)(2 * Vector.Count); - } while (offset < stopLoopAtOffset); - } - - // At this point, if any data remains to be written, it's strictly less than - // 2 * sizeof(Vector) bytes. The loop above had us write an even number of vectors. - // If the total byte length instead involves us writing an odd number of vectors, write - // one additional vector now. The bit check below tells us if we're in an "odd vector - // count" situation. - - if ((numElements & (nuint)Vector.Count) != 0) - { - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset), vector); - } - - // It's possible that some small buffer remains to be populated - something that won't - // fit an entire vector's worth of data. Instead of falling back to a loop, we'll write - // a vector at the very end of the buffer. This may involve overwriting previously - // populated data, which is fine since we're splatting the same value for all entries. - // There's no need to perform a length check here because we already performed this - // check before entering the vectorized code path. - - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, numElements - (nuint)Vector.Count), vector); - - // And we're done! - return; - } - - CannotVectorize: - // If we reached this point, we cannot vectorize this T, or there are too few - // elements for us to vectorize. Fall back to an unrolled loop. - - nuint i = 0; - - // Write 8 elements at a time - if (numElements >= 8) - { - nuint stopLoopAtOffset = numElements & ~(nuint)7; - do - { - Unsafe.Add(ref dest, (nint)i + 0) = value; - Unsafe.Add(ref dest, (nint)i + 1) = value; - Unsafe.Add(ref dest, (nint)i + 2) = value; - Unsafe.Add(ref dest, (nint)i + 3) = value; - Unsafe.Add(ref dest, (nint)i + 4) = value; - Unsafe.Add(ref dest, (nint)i + 5) = value; - Unsafe.Add(ref dest, (nint)i + 6) = value; - Unsafe.Add(ref dest, (nint)i + 7) = value; - } while ((i += 8) < stopLoopAtOffset); - } - - // Write next 4 elements if needed - if ((numElements & 4) != 0) - { - Unsafe.Add(ref dest, (nint)i + 0) = value; - Unsafe.Add(ref dest, (nint)i + 1) = value; - Unsafe.Add(ref dest, (nint)i + 2) = value; - Unsafe.Add(ref dest, (nint)i + 3) = value; - i += 4; - } - - // Write next 2 elements if needed - if ((numElements & 2) != 0) - { - Unsafe.Add(ref dest, (nint)i + 0) = value; - Unsafe.Add(ref dest, (nint)i + 1) = value; - i += 2; - } - - // Write final element if needed - if ((numElements & 1) != 0) - { - Unsafe.Add(ref dest, (nint)i) = value; - } - } - -#if NATIVEAOT - [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemZero")] -#endif - private static unsafe void MemZero(ref byte b, nuint byteLength) - { - if (byteLength == 0) - return; - - // Implicit nullcheck - _ = Unsafe.ReadUnaligned(ref b); - - ref byte bEnd = ref Unsafe.Add(ref b, byteLength); - - if (byteLength <= 16) - goto MZER02; - if (byteLength > 64) - goto MZER05; - - MZER00: - // Clear bytes which are multiples of 16 and leave the remainder for MZER01 to handle. - Debug.Assert(byteLength > 16 && byteLength <= 64); -#if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref b, default); -#elif TARGET_64BIT - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); -#else - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 12), 0); -#endif - if (byteLength <= 32) - goto MZER01; -#if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), default); -#elif TARGET_64BIT - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); -#else - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 20), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 28), 0); -#endif - if (byteLength <= 48) - goto MZER01; -#if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), default); -#elif TARGET_64BIT - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); +#if TARGET_ARM64 || TARGET_LOONGARCH64 + // TODO: Determine optimal value + // https://github.com/dotnet/runtime/issues/8897 (Linux) + // https://github.com/dotnet/runtime/issues/8896 (Windows) + private static nuint MemmoveNativeThreshold => nuint.MaxValue; +#elif TARGET_ARM + private const nuint MemmoveNativeThreshold = 512; #else - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 36), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 44), 0); + private const nuint MemmoveNativeThreshold = 2048; #endif + // TODO: Determine optimal value + private const nuint ZeroMemoryNativeThreshold = 1024; - MZER01: - // Unconditionally clear the last 16 bytes using bEnd and return. - Debug.Assert(byteLength > 16 && byteLength <= 64); #if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), default); -#elif TARGET_64BIT - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); -#else - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -12), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); -#endif - return; + [StructLayout(LayoutKind.Sequential, Size = 16)] + private struct Block16 {} - MZER02: - // Clear the first 8 bytes and then unconditionally clear the last 8 bytes and return. - if ((byteLength & 24) == 0) - goto MZER03; - Debug.Assert(byteLength >= 8 && byteLength <= 16); -#if TARGET_64BIT - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); -#else - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); -#endif - return; - - MZER03: - // Clear the first 4 bytes and then unconditionally clear the last 4 bytes and return. - if ((byteLength & 4) == 0) - goto MZER04; - Debug.Assert(byteLength >= 4 && byteLength < 8); - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); - return; - - MZER04: - // Clear the first byte. For pending bytes, do an unconditionally clear of the last 2 bytes and return. - Debug.Assert(byteLength < 4); - if (byteLength == 0) - return; - b = 0; - if ((byteLength & 2) == 0) - return; - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -2), 0); - return; - - MZER05: - // PInvoke to the native version when the clear length exceeds the threshold. - if (byteLength > Buffer.ZeroMemoryNativeThreshold) - { - goto PInvoke; - } - -#if HAS_CUSTOM_BLOCKS - if (byteLength >= 256) - { - unsafe - { - // Try to opportunistically align the destination below. The input isn't pinned, so the GC - // is free to move the references. We're therefore assuming that reads may still be unaligned. - nuint misalignedElements = 64 - (nuint)Unsafe.AsPointer(ref b) & 63; - Unsafe.WriteUnaligned(ref b, default); - b = ref Unsafe.Add(ref b, misalignedElements); - byteLength -= misalignedElements; - } - } -#endif - // Clear 64-bytes at a time until the remainder is less than 64. - // If remainder is greater than 16 bytes, then jump to MZER00. Otherwise, unconditionally clear the last 16 bytes and return. - Debug.Assert(byteLength > 64 && byteLength <= Buffer.ZeroMemoryNativeThreshold); - nuint n = byteLength >> 6; + [StructLayout(LayoutKind.Sequential, Size = 64)] + private struct Block64 {} +#endif // HAS_CUSTOM_BLOCKS - MZER06: -#if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref b, default); -#elif TARGET_64BIT - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 48), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 56), 0); -#else - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 12), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 20), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 28), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 36), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 44), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 48), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 52), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 56), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 60), 0); -#endif - b = ref Unsafe.Add(ref b, 64); - n--; - if (n != 0) - goto MZER06; - - byteLength %= 64; - if (byteLength > 16) - goto MZER00; -#if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), default); -#elif TARGET_64BIT - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); -#else - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -12), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); -#endif - return; - - PInvoke: - Buffer._ZeroMemory(ref b, byteLength); - } - -#if NATIVEAOT - [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemCopy")] -#endif - private static unsafe void MemCopy(ref byte dest, ref byte src, nuint len) + [Intrinsic] // Unrolled for small constant lengths + internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) { - if (len == 0) - { - return; - } - - // Implicit nullchecks - _ = Unsafe.ReadUnaligned(ref dest); - _ = Unsafe.ReadUnaligned(ref src); - // P/Invoke into the native version when the buffers are overlapping. if (((nuint)(nint)Unsafe.ByteOffset(ref src, ref dest) < len) || ((nuint)(nint)Unsafe.ByteOffset(ref dest, ref src) < len)) { @@ -350,11 +54,11 @@ private static unsafe void MemCopy(ref byte dest, ref byte src, nuint len) if (len > 64) goto MCPY05; - MCPY00: + MCPY00: // Copy bytes which are multiples of 16 and leave the remainder for MCPY01 to handle. Debug.Assert(len > 16 && len <= 64); #if HAS_CUSTOM_BLOCKS - Unsafe.As(ref dest) = Unsafe.As(ref src); // [0,16] + Unsafe.As(ref dest) = Unsafe.As(ref src); // [0,16] #elif TARGET_64BIT Unsafe.As(ref dest) = Unsafe.As(ref src); Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); // [0,16] @@ -367,7 +71,7 @@ private static unsafe void MemCopy(ref byte dest, ref byte src, nuint len) if (len <= 32) goto MCPY01; #if HAS_CUSTOM_BLOCKS - Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); // [0,32] + Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); // [0,32] #elif TARGET_64BIT Unsafe.As(ref Unsafe.Add(ref dest, 16)) = Unsafe.As(ref Unsafe.Add(ref src, 16)); Unsafe.As(ref Unsafe.Add(ref dest, 24)) = Unsafe.As(ref Unsafe.Add(ref src, 24)); // [0,32] @@ -380,7 +84,7 @@ private static unsafe void MemCopy(ref byte dest, ref byte src, nuint len) if (len <= 48) goto MCPY01; #if HAS_CUSTOM_BLOCKS - Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); // [0,48] + Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); // [0,48] #elif TARGET_64BIT Unsafe.As(ref Unsafe.Add(ref dest, 32)) = Unsafe.As(ref Unsafe.Add(ref src, 32)); Unsafe.As(ref Unsafe.Add(ref dest, 40)) = Unsafe.As(ref Unsafe.Add(ref src, 40)); // [0,48] @@ -395,7 +99,7 @@ private static unsafe void MemCopy(ref byte dest, ref byte src, nuint len) // Unconditionally copy the last 16 bytes using destEnd and srcEnd and return. Debug.Assert(len > 16 && len <= 64); #if HAS_CUSTOM_BLOCKS - Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); #elif TARGET_64BIT Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); @@ -445,7 +149,7 @@ private static unsafe void MemCopy(ref byte dest, ref byte src, nuint len) MCPY05: // PInvoke to the native version when the copy length exceeds the threshold. - if (len > Buffer.MemmoveNativeThreshold) + if (len > MemmoveNativeThreshold) { goto PInvoke; } @@ -459,7 +163,7 @@ private static unsafe void MemCopy(ref byte dest, ref byte src, nuint len) // dest is more important to align than src because an unaligned store is more expensive // than an unaligned load. nuint misalignedElements = 64 - (nuint)Unsafe.AsPointer(ref dest) & 63; - Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref dest) = Unsafe.As(ref src); src = ref Unsafe.Add(ref src, misalignedElements); dest = ref Unsafe.Add(ref dest, misalignedElements); len -= misalignedElements; @@ -468,12 +172,12 @@ private static unsafe void MemCopy(ref byte dest, ref byte src, nuint len) // Copy 64-bytes at a time until the remainder is less than 64. // If remainder is greater than 16 bytes, then jump to MCPY00. Otherwise, unconditionally copy the last 16 bytes and return. - Debug.Assert(len > 64 && len <= Buffer.MemmoveNativeThreshold); + Debug.Assert(len > 64 && len <= MemmoveNativeThreshold); nuint n = len >> 6; MCPY06: #if HAS_CUSTOM_BLOCKS - Unsafe.As(ref dest) = Unsafe.As(ref src); + Unsafe.As(ref dest) = Unsafe.As(ref src); #elif TARGET_64BIT Unsafe.As(ref dest) = Unsafe.As(ref src); Unsafe.As(ref Unsafe.Add(ref dest, 8)) = Unsafe.As(ref Unsafe.Add(ref src, 8)); @@ -511,7 +215,7 @@ private static unsafe void MemCopy(ref byte dest, ref byte src, nuint len) if (len > 16) goto MCPY00; #if HAS_CUSTOM_BLOCKS - Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); + Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); #elif TARGET_64BIT Unsafe.As(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -16)); Unsafe.As(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As(ref Unsafe.Add(ref srcEnd, -8)); @@ -533,5 +237,189 @@ private static unsafe void MemCopy(ref byte dest, ref byte src, nuint len) PInvoke: Buffer._Memmove(ref dest, ref src, len); } + + [Intrinsic] // Unrolled for small sizes + public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength) + { + if (byteLength == 0) + return; + + ref byte bEnd = ref Unsafe.Add(ref b, byteLength); + + if (byteLength <= 16) + goto MZER02; + if (byteLength > 64) + goto MZER05; + + MZER00: + // Clear bytes which are multiples of 16 and leave the remainder for MZER01 to handle. + Debug.Assert(byteLength > 16 && byteLength <= 64); +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref b, default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); +#else + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 12), 0); +#endif + if (byteLength <= 32) + goto MZER01; +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 20), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 28), 0); +#endif + if (byteLength <= 48) + goto MZER01; +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 36), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 44), 0); +#endif + + MZER01: + // Unconditionally clear the last 16 bytes using bEnd and return. + Debug.Assert(byteLength > 16 && byteLength <= 64); +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); +#endif + return; + + MZER02: + // Clear the first 8 bytes and then unconditionally clear the last 8 bytes and return. + if ((byteLength & 24) == 0) + goto MZER03; + Debug.Assert(byteLength >= 8 && byteLength <= 16); +#if TARGET_64BIT + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); +#else + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); +#endif + return; + + MZER03: + // Clear the first 4 bytes and then unconditionally clear the last 4 bytes and return. + if ((byteLength & 4) == 0) + goto MZER04; + Debug.Assert(byteLength >= 4 && byteLength < 8); + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); + return; + + MZER04: + // Clear the first byte. For pending bytes, do an unconditionally clear of the last 2 bytes and return. + Debug.Assert(byteLength < 4); + if (byteLength == 0) + return; + b = 0; + if ((byteLength & 2) == 0) + return; + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -2), 0); + return; + + MZER05: + // PInvoke to the native version when the clear length exceeds the threshold. + if (byteLength > ZeroMemoryNativeThreshold) + { + goto PInvoke; + } + +#if HAS_CUSTOM_BLOCKS + if (byteLength >= 256) + { + // Try to opportunistically align the destination below. The input isn't pinned, so the GC + // is free to move the references. We're therefore assuming that reads may still be unaligned. + nuint misalignedElements = 64 - (nuint)Unsafe.AsPointer(ref b) & 63; + Unsafe.WriteUnaligned(ref b, default); + b = ref Unsafe.Add(ref b, misalignedElements); + byteLength -= misalignedElements; + } +#endif + // Clear 64-bytes at a time until the remainder is less than 64. + // If remainder is greater than 16 bytes, then jump to MZER00. Otherwise, unconditionally clear the last 16 bytes and return. + Debug.Assert(byteLength > 64 && byteLength <= ZeroMemoryNativeThreshold); + nuint n = byteLength >> 6; + + MZER06: +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref b, default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 48), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 56), 0); +#else + Unsafe.WriteUnaligned(ref b, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 20), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 28), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 36), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 44), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 48), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 52), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 56), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 60), 0); +#endif + b = ref Unsafe.Add(ref b, 64); + n--; + if (n != 0) + goto MZER06; + + byteLength %= 64; + if (byteLength > 16) + goto MZER00; +#if HAS_CUSTOM_BLOCKS + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), default); +#elif TARGET_64BIT + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); +#else + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); +#endif + return; + + PInvoke: + Buffer._ZeroMemory(ref b, byteLength); + } } } From 54639a1ea5b274ec56ec238ecbfeea3405ba0ef7 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 24 Feb 2024 17:55:38 +0100 Subject: [PATCH 29/40] Clean up --- .../src/System/Array.CoreCLR.cs | 4 +- .../src/System/Object.CoreCLR.cs | 2 +- .../InteropServices/Marshal.CoreCLR.cs | 4 +- .../src/System/String.CoreCLR.cs | 2 +- .../src/System/StubHelpers.cs | 10 +- .../src/System/Array.NativeAot.cs | 4 +- .../src/System/Object.NativeAot.cs | 2 +- .../InteropServices/Marshal.NativeAot.cs | 4 +- .../src/System/Array.cs | 6 +- .../src/System/IO/UnmanagedMemoryStream.cs | 8 +- .../CompilerServices/RuntimeHelpers.cs | 11 +-- .../Runtime/InteropServices/NativeMemory.cs | 2 +- .../Runtime/InteropServices/SafeBuffer.cs | 4 +- .../src/System/SpanHelpers.ByteMemOps.cs | 99 +++++++++++++++++++ .../src/System/String.Mono.cs | 2 +- 15 files changed, 128 insertions(+), 36 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Array.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Array.CoreCLR.cs index 16d9067567ee58..de7b3021c458fe 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Array.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Array.CoreCLR.cs @@ -74,7 +74,7 @@ private static unsafe void CopyImpl(Array sourceArray, int sourceIndex, Array de if (pMT->ContainsGCPointers) Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount); else - Buffer.Memmove(ref dst, ref src, byteCount); + SpanHelpers.Memmove(ref dst, ref src, byteCount); // GC.KeepAlive(sourceArray) not required. pMT kept alive via sourceArray return; @@ -184,7 +184,7 @@ private static unsafe void CopyImplUnBoxEachElement(Array sourceArray, int sourc } else { - Buffer.Memmove(ref dest, ref obj.GetRawData(), destSize); + SpanHelpers.Memmove(ref dest, ref obj.GetRawData(), destSize); } } } diff --git a/src/coreclr/System.Private.CoreLib/src/System/Object.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Object.CoreCLR.cs index 70cff629fc28e6..88c929dbe74cbe 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Object.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Object.CoreCLR.cs @@ -30,7 +30,7 @@ protected internal unsafe object MemberwiseClone() if (RuntimeHelpers.GetMethodTable(clone)->ContainsGCPointers) Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount); else - Buffer.Memmove(ref dst, ref src, byteCount); + SpanHelpers.Memmove(ref dst, ref src, byteCount); return clone; } diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs index c04665aa6c22f4..bbdccc6cd2eed4 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.CoreCLR.cs @@ -266,7 +266,7 @@ public static unsafe void StructureToPtr(object structure, IntPtr ptr, bool fDel } else { - Buffer.Memmove(ref *(byte*)ptr, ref structure.GetRawData(), size); + SpanHelpers.Memmove(ref *(byte*)ptr, ref structure.GetRawData(), size); } } @@ -291,7 +291,7 @@ private static unsafe void PtrToStructureHelper(IntPtr ptr, object structure, bo } else { - Buffer.Memmove(ref structure.GetRawData(), ref *(byte*)ptr, size); + SpanHelpers.Memmove(ref structure.GetRawData(), ref *(byte*)ptr, size); } } diff --git a/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs b/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs index f15ad03d82182b..d2785251613efa 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/String.CoreCLR.cs @@ -39,7 +39,7 @@ internal static unsafe void InternalCopy(string src, IntPtr dest, int len) { if (len != 0) { - Buffer.Memmove(ref *(byte*)dest, ref Unsafe.As(ref src.GetRawStringData()), (nuint)len); + SpanHelpers.Memmove(ref *(byte*)dest, ref Unsafe.As(ref src.GetRawStringData()), (nuint)len); } } diff --git a/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs b/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs index 9874eef6dc2292..81c0dd8e1afecd 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/StubHelpers.cs @@ -103,7 +103,7 @@ internal static unsafe IntPtr ConvertToNative(int flags, string strManaged, IntP // + 1 for the null character from the user. + 1 for the null character we put in. pbNativeBuffer = (byte*)Marshal.AllocCoTaskMem(nb + 2); - Buffer.Memmove(ref *pbNativeBuffer, ref MemoryMarshal.GetArrayDataReference(bytes), (nuint)nb); + SpanHelpers.Memmove(ref *pbNativeBuffer, ref MemoryMarshal.GetArrayDataReference(bytes), (nuint)nb); } } @@ -360,7 +360,7 @@ internal static unsafe IntPtr ConvertToNative(string strManaged, bool fBestFit, Debug.Assert(nbytesused >= 0 && nbytesused < nbytes, "Insufficient buffer allocated in VBByValStrMarshaler.ConvertToNative"); - Buffer.Memmove(ref *pNative, ref MemoryMarshal.GetArrayDataReference(bytes), (nuint)nbytesused); + SpanHelpers.Memmove(ref *pNative, ref MemoryMarshal.GetArrayDataReference(bytes), (nuint)nbytesused); pNative[nbytesused] = 0; *pLength = nbytesused; @@ -409,7 +409,7 @@ internal static unsafe IntPtr ConvertToNative(int flags, string strManaged) IntPtr bstr = Marshal.AllocBSTRByteLen(length); if (bytes != null) { - Buffer.Memmove(ref *(byte*)bstr, ref MemoryMarshal.GetArrayDataReference(bytes), length); + SpanHelpers.Memmove(ref *(byte*)bstr, ref MemoryMarshal.GetArrayDataReference(bytes), length); } return bstr; @@ -1484,7 +1484,7 @@ internal static unsafe void FmtClassUpdateNativeInternal(object obj, byte* pNati } else { - Buffer.Memmove(ref *pNative, ref obj.GetRawData(), size); + SpanHelpers.Memmove(ref *pNative, ref obj.GetRawData(), size); } } @@ -1503,7 +1503,7 @@ internal static unsafe void FmtClassUpdateCLRInternal(object obj, byte* pNative) } else { - Buffer.Memmove(ref obj.GetRawData(), ref *pNative, size); + SpanHelpers.Memmove(ref obj.GetRawData(), ref *pNative, size); } } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs index 64ba6597446a81..61f70e212483c1 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Array.NativeAot.cs @@ -494,7 +494,7 @@ private static unsafe void CopyImplValueTypeArrayNoInnerGcRefs(Array sourceArray // Copy scenario: ValueType-array to value-type array with no embedded gc-refs. nuint elementSize = sourceArray.ElementSize; - Buffer.Memmove( + SpanHelpers.Memmove( ref Unsafe.AddByteOffset(ref MemoryMarshal.GetArrayDataReference(destinationArray), (nuint)destinationIndex * elementSize), ref Unsafe.AddByteOffset(ref MemoryMarshal.GetArrayDataReference(sourceArray), (nuint)sourceIndex * elementSize), elementSize * (nuint)length); @@ -534,7 +534,7 @@ private static unsafe void CopyImplPrimitiveTypeWithWidening(Array sourceArray, if (sourceElementType == destElementType) { // Multidim arrays and enum->int copies can still reach this path. - Buffer.Memmove(ref *data, ref *srcData, (nuint)length * srcElementSize); + SpanHelpers.Memmove(ref *data, ref *srcData, (nuint)length * srcElementSize); return; } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs index 727fbc9fbfdd2b..9f8dbe11a212eb 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Object.NativeAot.cs @@ -41,7 +41,7 @@ protected internal unsafe object MemberwiseClone() if (this.GetMethodTable()->ContainsGCPointers) Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount); else - Buffer.Memmove(ref dst, ref src, byteCount); + SpanHelpers.Memmove(ref dst, ref src, byteCount); return clone; } diff --git a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs index a3ccfc5a8c431f..490997c1da90f5 100644 --- a/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs +++ b/src/coreclr/nativeaot/System.Private.CoreLib/src/System/Runtime/InteropServices/Marshal.NativeAot.cs @@ -93,7 +93,7 @@ internal static unsafe void PtrToStructureImpl(IntPtr ptr, object structure) { nuint size = (nuint)RuntimeInteropData.GetStructUnsafeStructSize(structureTypeHandle); - Buffer.Memmove(ref structure.GetRawData(), ref *(byte*)ptr, size); + SpanHelpers.Memmove(ref structure.GetRawData(), ref *(byte*)ptr, size); } } @@ -180,7 +180,7 @@ public static unsafe void StructureToPtr(object structure, IntPtr ptr, bool fDel { nuint size = (nuint)RuntimeInteropData.GetStructUnsafeStructSize(structureTypeHandle); - Buffer.Memmove(ref *(byte*)ptr, ref structure.GetRawData(), size); + SpanHelpers.Memmove(ref *(byte*)ptr, ref structure.GetRawData(), size); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Array.cs b/src/libraries/System.Private.CoreLib/src/System/Array.cs index c21caa8cc1d70a..84bd5ed20eed1f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Array.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Array.cs @@ -58,7 +58,7 @@ public static void Resize([NotNull] ref T[]? array, int newSize) // actually of type U[], where U:T; or that an int[] <-> uint[] or // similar cast has occurred. In any case, since it's always legal // to reinterpret U as T in this scenario (but not necessarily the - // other way around), we can use Buffer.Memmove here. + // other way around), we can use SpanHelpers.Memmove here. T[] newArray = new T[newSize]; Buffer.Memmove( @@ -377,7 +377,7 @@ public static unsafe void Copy(Array sourceArray, Array destinationArray, int le if (pMT->ContainsGCPointers) Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount); else - Buffer.Memmove(ref dst, ref src, byteCount); + SpanHelpers.Memmove(ref dst, ref src, byteCount); // GC.KeepAlive(sourceArray) not required. pMT kept alive via sourceArray return; @@ -408,7 +408,7 @@ public static unsafe void Copy(Array sourceArray, int sourceIndex, Array destina if (pMT->ContainsGCPointers) Buffer.BulkMoveWithWriteBarrier(ref dst, ref src, byteCount); else - Buffer.Memmove(ref dst, ref src, byteCount); + SpanHelpers.Memmove(ref dst, ref src, byteCount); // GC.KeepAlive(sourceArray) not required. pMT kept alive via sourceArray return; diff --git a/src/libraries/System.Private.CoreLib/src/System/IO/UnmanagedMemoryStream.cs b/src/libraries/System.Private.CoreLib/src/System/IO/UnmanagedMemoryStream.cs index b1b18a2c343135..68adbf72bc6b96 100644 --- a/src/libraries/System.Private.CoreLib/src/System/IO/UnmanagedMemoryStream.cs +++ b/src/libraries/System.Private.CoreLib/src/System/IO/UnmanagedMemoryStream.cs @@ -390,7 +390,7 @@ internal int ReadCore(Span buffer) try { _buffer.AcquirePointer(ref pointer); - Buffer.Memmove(ref MemoryMarshal.GetReference(buffer), ref *(pointer + pos + _offset), (nuint)nInt); + SpanHelpers.Memmove(ref MemoryMarshal.GetReference(buffer), ref *(pointer + pos + _offset), (nuint)nInt); } finally { @@ -402,7 +402,7 @@ internal int ReadCore(Span buffer) } else { - Buffer.Memmove(ref MemoryMarshal.GetReference(buffer), ref *(_mem + pos), (nuint)nInt); + SpanHelpers.Memmove(ref MemoryMarshal.GetReference(buffer), ref *(_mem + pos), (nuint)nInt); } } @@ -669,7 +669,7 @@ internal unsafe void WriteCore(ReadOnlySpan buffer) try { _buffer.AcquirePointer(ref pointer); - Buffer.Memmove(ref *(pointer + pos + _offset), ref MemoryMarshal.GetReference(buffer), (nuint)buffer.Length); + SpanHelpers.Memmove(ref *(pointer + pos + _offset), ref MemoryMarshal.GetReference(buffer), (nuint)buffer.Length); } finally { @@ -681,7 +681,7 @@ internal unsafe void WriteCore(ReadOnlySpan buffer) } else { - Buffer.Memmove(ref *(_mem + pos), ref MemoryMarshal.GetReference(buffer), (nuint)buffer.Length); + SpanHelpers.Memmove(ref *(_mem + pos), ref MemoryMarshal.GetReference(buffer), (nuint)buffer.Length); } _position = n; diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs index bb6b2599ac52a1..ca2c8995580632 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs @@ -50,7 +50,7 @@ public static T[] GetSubArray(T[] array, Range range) } // In either case, the newly-allocated array is the exact same type as the - // original incoming array. It's safe for us to Buffer.Memmove the contents + // original incoming array. It's safe for us to SpanHelpers.Memmove the contents // from the source array to the destination array, otherwise the contents // wouldn't have been valid for the source array in the first place. @@ -135,8 +135,6 @@ private static void MemCopy(ref byte dest, ref byte src, nuint size) if (size > 0) { // Implicit nullchecks - _ = Unsafe.ReadUnaligned(ref dest); - _ = Unsafe.ReadUnaligned(ref src); SpanHelpers.Memmove(ref dest, ref src, size); } } @@ -159,12 +157,7 @@ private static void MemZero(ref byte dest, nuint size) #endif private static void MemSet(ref byte dest, byte value, nuint size) { - if (size > 0) - { - // Implicit nullcheck - _ = Unsafe.ReadUnaligned(ref dest); - SpanHelpers.Fill(ref dest, size, value); - } + SpanHelpers.Fill(ref dest, size, value); } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeMemory.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeMemory.cs index 069d67e5e4621e..7fb4af35480a9b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeMemory.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/NativeMemory.cs @@ -61,7 +61,7 @@ public static unsafe void Clear(void* ptr, nuint byteCount) [CLSCompliant(false)] public static void Copy(void* source, void* destination, nuint byteCount) { - Buffer.Memmove(ref *(byte*)destination, ref *(byte*)source, byteCount); + SpanHelpers.Memmove(ref *(byte*)destination, ref *(byte*)source, byteCount); } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/SafeBuffer.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/SafeBuffer.cs index d35b5dd174fc73..76858298feb217 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/SafeBuffer.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/InteropServices/SafeBuffer.cs @@ -194,7 +194,7 @@ public T Read(ulong byteOffset) where T : struct { DangerousAddRef(ref mustCallRelease); - Buffer.Memmove(ref Unsafe.As(ref value), ref *ptr, sizeofT); + SpanHelpers.Memmove(ref Unsafe.As(ref value), ref *ptr, sizeofT); } finally { @@ -281,7 +281,7 @@ public void Write(ulong byteOffset, T value) where T : struct { DangerousAddRef(ref mustCallRelease); - Buffer.Memmove(ref *ptr, ref Unsafe.As(ref value), sizeofT); + SpanHelpers.Memmove(ref *ptr, ref Unsafe.As(ref value), sizeofT); } finally { diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs index e9fc88e7234675..7a55cf5190188d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs @@ -7,6 +7,7 @@ #endif using System.Diagnostics; +using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -421,5 +422,103 @@ public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength) PInvoke: Buffer._ZeroMemory(ref b, byteLength); } + + // TODO: This implementation is suboptimal + public static unsafe void Fill(ref byte refData, nuint numElements, byte value) + { + if (!Vector.IsHardwareAccelerated) + { + goto CannotVectorize; + } + + if (numElements >= (nuint)Vector.Count) + { + // We have enough data for at least one vectorized write. + Vector vector = new (value); + nuint stopLoopAtOffset = numElements & (nuint)(nint)(2 * (int)-Vector.Count); // intentional sign extension carries the negative bit + nuint offset = 0; + + // Loop, writing 2 vectors at a time. + // Compare 'numElements' rather than 'stopLoopAtOffset' because we don't want a dependency + // on the very recently calculated 'stopLoopAtOffset' value. + if (numElements >= (uint)(2 * Vector.Count)) + { + do + { + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref refData, offset), vector); + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref refData, offset + (nuint)Vector.Count), vector); + offset += (uint)(2 * Vector.Count); + } while (offset < stopLoopAtOffset); + } + + // At this point, if any data remains to be written, it's strictly less than + // 2 * sizeof(Vector) bytes. The loop above had us write an even number of vectors. + // If the total byte length instead involves us writing an odd number of vectors, write + // one additional vector now. The bit check below tells us if we're in an "odd vector + // count" situation. + if ((numElements & (nuint)Vector.Count) != 0) + { + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref refData, offset), vector); + } + + // It's possible that some small buffer remains to be populated - something that won't + // fit an entire vector's worth of data. Instead of falling back to a loop, we'll write + // a vector at the very end of the buffer. This may involve overwriting previously + // populated data, which is fine since we're splatting the same value for all entries. + // There's no need to perform a length check here because we already performed this + // check before entering the vectorized code path. + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref refData, numElements - (nuint)Vector.Count), vector); + + // And we're done! + return; + } + + CannotVectorize: + + // If we reached this point, we cannot vectorize this T, or there are too few + // elements for us to vectorize. Fall back to an unrolled loop. + nuint i = 0; + + // Write 8 elements at a time + if (numElements >= 8) + { + nuint stopLoopAtOffset = numElements & ~(nuint)7; + do + { + Unsafe.Add(ref refData, (nint)i + 0) = value; + Unsafe.Add(ref refData, (nint)i + 1) = value; + Unsafe.Add(ref refData, (nint)i + 2) = value; + Unsafe.Add(ref refData, (nint)i + 3) = value; + Unsafe.Add(ref refData, (nint)i + 4) = value; + Unsafe.Add(ref refData, (nint)i + 5) = value; + Unsafe.Add(ref refData, (nint)i + 6) = value; + Unsafe.Add(ref refData, (nint)i + 7) = value; + } while ((i += 8) < stopLoopAtOffset); + } + + // Write next 4 elements if needed + if ((numElements & 4) != 0) + { + Unsafe.Add(ref refData, (nint)i + 0) = value; + Unsafe.Add(ref refData, (nint)i + 1) = value; + Unsafe.Add(ref refData, (nint)i + 2) = value; + Unsafe.Add(ref refData, (nint)i + 3) = value; + i += 4; + } + + // Write next 2 elements if needed + if ((numElements & 2) != 0) + { + Unsafe.Add(ref refData, (nint)i + 0) = value; + Unsafe.Add(ref refData, (nint)i + 1) = value; + i += 2; + } + + // Write final element if needed + if ((numElements & 1) != 0) + { + Unsafe.Add(ref refData, (nint)i) = value; + } + } } } diff --git a/src/mono/System.Private.CoreLib/src/System/String.Mono.cs b/src/mono/System.Private.CoreLib/src/System/String.Mono.cs index 7314504aff9a22..7dedf5a6e536d0 100644 --- a/src/mono/System.Private.CoreLib/src/System/String.Mono.cs +++ b/src/mono/System.Private.CoreLib/src/System/String.Mono.cs @@ -116,7 +116,7 @@ private static unsafe void memset(byte* dest, int val, int len) private static unsafe void memcpy(byte* dest, byte* src, int size) { - Buffer.Memmove(ref *dest, ref *src, (nuint)size); + SpanHelpers.Memmove(ref *dest, ref *src, (nuint)size); } /* Used by the runtime */ From d8da20ae3c31c51a42b8712689e71696dfd2ec7c Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 24 Feb 2024 18:27:45 +0100 Subject: [PATCH 30/40] Fix StackOverflow in Mono --- src/mono/System.Private.CoreLib/src/System/Buffer.Mono.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mono/System.Private.CoreLib/src/System/Buffer.Mono.cs b/src/mono/System.Private.CoreLib/src/System/Buffer.Mono.cs index 8f45f602e6fb11..bcb9b6b38f2e39 100644 --- a/src/mono/System.Private.CoreLib/src/System/Buffer.Mono.cs +++ b/src/mono/System.Private.CoreLib/src/System/Buffer.Mono.cs @@ -23,7 +23,7 @@ internal static unsafe void Memmove(ref T destination, ref T source, nuint el { #pragma warning disable 8500 // sizeof of managed types // Blittable memmove - Memmove( + SpanHelpers.Memmove( ref Unsafe.As(ref destination), ref Unsafe.As(ref source), elementCount * (nuint)sizeof(T)); From ec82a4d063bc4b8cb30f7706e9699c449777dbd0 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 24 Feb 2024 21:12:48 +0100 Subject: [PATCH 31/40] Fix IL test --- src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il b/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il index 068f11ad7b6176..b84f69120bb4fd 100644 --- a/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il +++ b/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il @@ -44,7 +44,7 @@ // C#: // byte* data = stackalloc byte[64]; // to trigger slow helper-based tail calls - // Buffer.Memmove(ref Unsafe.AsRef(data), ref src, 64); + // SpanHelpers.Memmove(ref Unsafe.AsRef(data), ref src, 64); ldc.i4.s 64 conv.u @@ -53,7 +53,7 @@ ldarg.0 ldc.i4.s 64 conv.i - tail. call void [System.Runtime]System.Buffer::Memmove(uint8&, uint8&, native uint) + tail. call void [System.Runtime]System.SpanHelpers::Memmove(uint8&, uint8&, native uint) ret } } From f3c01414107f9faf331029f9891de214f29895a4 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 24 Feb 2024 22:28:28 +0100 Subject: [PATCH 32/40] Address feedback --- src/coreclr/jit/importer.cpp | 1 + src/coreclr/vm/appdomain.cpp | 1 - src/coreclr/vm/callcounting.cpp | 3 +- src/coreclr/vm/corelib.h | 8 +- src/coreclr/vm/ecall.cpp | 6 +- src/coreclr/vm/vars.cpp | 1 - src/coreclr/vm/vars.hpp | 1 - .../CompilerServices/RuntimeHelpers.cs | 33 --- .../src/System/SpanHelpers.ByteMemOps.cs | 277 +++++++++--------- .../Vectorization/BufferMemmoveTailCall.il | 99 ------- .../BufferMemmoveTailCall.ilproj | 8 - 11 files changed, 154 insertions(+), 284 deletions(-) delete mode 100644 src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il delete mode 100644 src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.ilproj diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 0436583627647c..e0d472b2538e1c 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -10346,6 +10346,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) GenTreeCall* call; if (opcode == CEE_INITBLK) { + // value is zero -> memzero, otherwise -> memset if (op2->IsIntegralConst(0)) { call = gtNewHelperCallNode(CORINFO_HELP_MEMZERO, TYP_VOID, op1, op3); diff --git a/src/coreclr/vm/appdomain.cpp b/src/coreclr/vm/appdomain.cpp index fbbb4ee4c12ee5..bb5d3d17e00534 100644 --- a/src/coreclr/vm/appdomain.cpp +++ b/src/coreclr/vm/appdomain.cpp @@ -1378,7 +1378,6 @@ void SystemDomain::LoadBaseSystemClasses() g_pWeakReferenceOfTClass = CoreLibBinder::GetClass(CLASS__WEAKREFERENCEGENERIC); g_pCastHelpers = CoreLibBinder::GetClass(CLASS__CASTHELPERS); - g_pRuntimeHelpers = CoreLibBinder::GetClass(CLASS__RUNTIME_HELPERS); #ifdef FEATURE_COMINTEROP if (g_pConfig->IsBuiltInCOMSupported()) diff --git a/src/coreclr/vm/callcounting.cpp b/src/coreclr/vm/callcounting.cpp index 5a7c6daee3d7b0..e7c70dccf40964 100644 --- a/src/coreclr/vm/callcounting.cpp +++ b/src/coreclr/vm/callcounting.cpp @@ -662,8 +662,7 @@ bool CallCountingManager::SetCodeEntryPoint( // Let's tier up all cast and runtime helpers faster than other methods. This is because we want to import them as // direct calls in codegen and they need to be promoted earlier than their callers. - PTR_MethodTable mt = methodDesc->GetMethodTable(); - if (mt == g_pCastHelpers || mt == g_pRuntimeHelpers) + if (methodDesc->GetMethodTable() == g_pCastHelpers) { callCountThreshold = max(1, (CallCount)(callCountThreshold / 2)); } diff --git a/src/coreclr/vm/corelib.h b/src/coreclr/vm/corelib.h index 5f1a86d9203ecd..c52c58954165a2 100644 --- a/src/coreclr/vm/corelib.h +++ b/src/coreclr/vm/corelib.h @@ -632,9 +632,11 @@ DEFINE_METHOD(RUNTIME_HELPERS, ENUM_COMPARE_TO, EnumCompareTo, NoSig DEFINE_METHOD(RUNTIME_HELPERS, ALLOC_TAILCALL_ARG_BUFFER, AllocTailCallArgBuffer, SM_Int_IntPtr_RetIntPtr) DEFINE_METHOD(RUNTIME_HELPERS, GET_TAILCALL_INFO, GetTailCallInfo, NoSig) DEFINE_METHOD(RUNTIME_HELPERS, DISPATCH_TAILCALLS, DispatchTailCalls, NoSig) -DEFINE_METHOD(RUNTIME_HELPERS, MEMSET, MemSet, SM_RefByte_Byte_UIntPtr_RetVoid) -DEFINE_METHOD(RUNTIME_HELPERS, MEMZERO, MemZero, SM_RefByte_UIntPtr_RetVoid) -DEFINE_METHOD(RUNTIME_HELPERS, MEMCOPY, MemCopy, SM_RefByte_RefByte_UIntPtr_RetVoid) + +DEFINE_CLASS(SPAN_HELPERS, System, SpanHelpers) +DEFINE_METHOD(SPAN_HELPERS, MEMSET, Fill, SM_RefByte_Byte_UIntPtr_RetVoid) +DEFINE_METHOD(SPAN_HELPERS, MEMZERO, ClearWithoutReferences, SM_RefByte_UIntPtr_RetVoid) +DEFINE_METHOD(SPAN_HELPERS, MEMCOPY, Memmove, SM_RefByte_RefByte_UIntPtr_RetVoid) DEFINE_CLASS(UNSAFE, CompilerServices, Unsafe) DEFINE_METHOD(UNSAFE, AS_POINTER, AsPointer, NoSig) diff --git a/src/coreclr/vm/ecall.cpp b/src/coreclr/vm/ecall.cpp index 0d914bd4bd4ebc..7a9538d8ea7dd9 100644 --- a/src/coreclr/vm/ecall.cpp +++ b/src/coreclr/vm/ecall.cpp @@ -145,15 +145,15 @@ void ECall::PopulateManagedHelpers() pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_LDELEMA_REF, pDest); - pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__RUNTIME_HELPERS__MEMSET)); + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__SPAN_HELPERS__MEMSET)); pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_MEMSET, pDest); - pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__RUNTIME_HELPERS__MEMZERO)); + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__SPAN_HELPERS__MEMZERO)); pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_MEMZERO, pDest); - pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__RUNTIME_HELPERS__MEMCOPY)); + pMD = CoreLibBinder::GetMethod((BinderMethodID)(METHOD__SPAN_HELPERS__MEMCOPY)); pDest = pMD->GetMultiCallableAddrOfCode(); SetJitHelperFunction(CORINFO_HELP_MEMCPY, pDest); } diff --git a/src/coreclr/vm/vars.cpp b/src/coreclr/vm/vars.cpp index 26d01466bdbbff..ba72d19f2b1e48 100644 --- a/src/coreclr/vm/vars.cpp +++ b/src/coreclr/vm/vars.cpp @@ -102,7 +102,6 @@ GVAL_IMPL_INIT(DWORD, g_debuggerWordTLSIndex, TLS_OUT_OF_INDEXES); GVAL_IMPL_INIT(DWORD, g_TlsIndex, TLS_OUT_OF_INDEXES); MethodTable* g_pCastHelpers; -MethodTable* g_pRuntimeHelpers; #ifdef FEATURE_EH_FUNCLETS GPTR_IMPL(MethodTable, g_pEHClass); GPTR_IMPL(MethodTable, g_pExceptionServicesInternalCallsClass); diff --git a/src/coreclr/vm/vars.hpp b/src/coreclr/vm/vars.hpp index ef1bddfa3d39c5..65712d031512d1 100644 --- a/src/coreclr/vm/vars.hpp +++ b/src/coreclr/vm/vars.hpp @@ -412,7 +412,6 @@ EXTERN OBJECTHANDLE g_pPreallocatedExecutionEngineException; EXTERN OBJECTHANDLE g_pPreallocatedSentinelObject; EXTERN MethodTable* g_pCastHelpers; -EXTERN MethodTable* g_pRuntimeHelpers; GPTR_DECL(Thread,g_pFinalizerThread); GPTR_DECL(Thread,g_pSuspensionThread); diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs index ca2c8995580632..38b923764e9764 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/CompilerServices/RuntimeHelpers.cs @@ -126,38 +126,5 @@ internal static bool IsPrimitiveType(this CorElementType et) [Intrinsic] internal static bool IsKnownConstant(char t) => false; #pragma warning restore IDE0060 - -#if NATIVEAOT - [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemCopy")] -#endif - private static void MemCopy(ref byte dest, ref byte src, nuint size) - { - if (size > 0) - { - // Implicit nullchecks - SpanHelpers.Memmove(ref dest, ref src, size); - } - } - -#if NATIVEAOT - [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemZero")] -#endif - private static void MemZero(ref byte dest, nuint size) - { - if (size > 0) - { - // Implicit nullcheck - _ = Unsafe.ReadUnaligned(ref dest); - SpanHelpers.ClearWithoutReferences(ref dest, size); - } - } - -#if NATIVEAOT - [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemSet")] -#endif - private static void MemSet(ref byte dest, byte value, nuint size) - { - SpanHelpers.Fill(ref dest, size, value); - } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs index 7a55cf5190188d..c58ee91ad07b9a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs @@ -16,10 +16,7 @@ namespace System internal static partial class SpanHelpers // .ByteMemOps { #if TARGET_ARM64 || TARGET_LOONGARCH64 - // TODO: Determine optimal value - // https://github.com/dotnet/runtime/issues/8897 (Linux) - // https://github.com/dotnet/runtime/issues/8896 (Windows) - private static nuint MemmoveNativeThreshold => nuint.MaxValue; + private const ulong MemmoveNativeThreshold = ulong.MaxValue; #elif TARGET_ARM private const nuint MemmoveNativeThreshold = 512; #else @@ -28,6 +25,7 @@ internal static partial class SpanHelpers // .ByteMemOps // TODO: Determine optimal value private const nuint ZeroMemoryNativeThreshold = 1024; + #if HAS_CUSTOM_BLOCKS [StructLayout(LayoutKind.Sequential, Size = 16)] private struct Block16 {} @@ -36,6 +34,9 @@ private struct Block16 {} private struct Block64 {} #endif // HAS_CUSTOM_BLOCKS +#if NATIVEAOT + [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemSet")] +#endif [Intrinsic] // Unrolled for small constant lengths internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) { @@ -236,217 +237,227 @@ internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) } PInvoke: + // Implicit nullchecks + _ = Unsafe.ReadUnaligned(ref dest); + _ = Unsafe.ReadUnaligned(ref src); Buffer._Memmove(ref dest, ref src, len); } +#if NATIVEAOT + [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemSet")] +#endif [Intrinsic] // Unrolled for small sizes - public static unsafe void ClearWithoutReferences(ref byte b, nuint byteLength) + public static unsafe void ClearWithoutReferences(ref byte dest, nuint len) { - if (byteLength == 0) + if (len == 0) return; - ref byte bEnd = ref Unsafe.Add(ref b, byteLength); + ref byte destEnd = ref Unsafe.Add(ref dest, len); - if (byteLength <= 16) + if (len <= 16) goto MZER02; - if (byteLength > 64) + if (len > 64) goto MZER05; MZER00: // Clear bytes which are multiples of 16 and leave the remainder for MZER01 to handle. - Debug.Assert(byteLength > 16 && byteLength <= 64); + Debug.Assert(len > 16 && len <= 64); #if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref b, default); + Unsafe.WriteUnaligned(ref dest, default); #elif TARGET_64BIT - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); + Unsafe.WriteUnaligned(ref dest, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 8), 0); #else - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 12), 0); + Unsafe.WriteUnaligned(ref dest, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 12), 0); #endif - if (byteLength <= 32) + if (len <= 32) goto MZER01; #if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), default); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 16), default); #elif TARGET_64BIT - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 24), 0); #else - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 20), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 28), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 20), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 28), 0); #endif - if (byteLength <= 48) + if (len <= 48) goto MZER01; #if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), default); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 32), default); #elif TARGET_64BIT - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 40), 0); #else - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 36), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 44), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 36), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 44), 0); #endif MZER01: - // Unconditionally clear the last 16 bytes using bEnd and return. - Debug.Assert(byteLength > 16 && byteLength <= 64); + // Unconditionally clear the last 16 bytes using destEnd and return. + Debug.Assert(len > 16 && len <= 64); #if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), default); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -16), default); #elif TARGET_64BIT - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -8), 0); #else - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -12), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -4), 0); #endif return; MZER02: // Clear the first 8 bytes and then unconditionally clear the last 8 bytes and return. - if ((byteLength & 24) == 0) + if ((len & 24) == 0) goto MZER03; - Debug.Assert(byteLength >= 8 && byteLength <= 16); + Debug.Assert(len >= 8 && len <= 16); #if TARGET_64BIT - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); + Unsafe.WriteUnaligned(ref dest, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -8), 0); #else - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); + Unsafe.WriteUnaligned(ref dest, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -4), 0); #endif return; MZER03: // Clear the first 4 bytes and then unconditionally clear the last 4 bytes and return. - if ((byteLength & 4) == 0) + if ((len & 4) == 0) goto MZER04; - Debug.Assert(byteLength >= 4 && byteLength < 8); - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); + Debug.Assert(len >= 4 && len < 8); + Unsafe.WriteUnaligned(ref dest, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -4), 0); return; MZER04: // Clear the first byte. For pending bytes, do an unconditionally clear of the last 2 bytes and return. - Debug.Assert(byteLength < 4); - if (byteLength == 0) + Debug.Assert(len < 4); + if (len == 0) return; - b = 0; - if ((byteLength & 2) == 0) + dest = 0; + if ((len & 2) == 0) return; - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -2), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -2), 0); return; MZER05: // PInvoke to the native version when the clear length exceeds the threshold. - if (byteLength > ZeroMemoryNativeThreshold) + if (len > ZeroMemoryNativeThreshold) { goto PInvoke; } #if HAS_CUSTOM_BLOCKS - if (byteLength >= 256) + if (len >= 256) { // Try to opportunistically align the destination below. The input isn't pinned, so the GC // is free to move the references. We're therefore assuming that reads may still be unaligned. - nuint misalignedElements = 64 - (nuint)Unsafe.AsPointer(ref b) & 63; - Unsafe.WriteUnaligned(ref b, default); - b = ref Unsafe.Add(ref b, misalignedElements); - byteLength -= misalignedElements; + nuint misalignedElements = 64 - (nuint)Unsafe.AsPointer(ref dest) & 63; + Unsafe.WriteUnaligned(ref dest, default); + dest = ref Unsafe.Add(ref dest, misalignedElements); + len -= misalignedElements; } #endif // Clear 64-bytes at a time until the remainder is less than 64. // If remainder is greater than 16 bytes, then jump to MZER00. Otherwise, unconditionally clear the last 16 bytes and return. - Debug.Assert(byteLength > 64 && byteLength <= ZeroMemoryNativeThreshold); - nuint n = byteLength >> 6; + Debug.Assert(len > 64 && len <= ZeroMemoryNativeThreshold); + nuint n = len >> 6; MZER06: #if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref b, default); + Unsafe.WriteUnaligned(ref dest, default); #elif TARGET_64BIT - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 48), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 56), 0); + Unsafe.WriteUnaligned(ref dest, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 48), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 56), 0); #else - Unsafe.WriteUnaligned(ref b, 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 4), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 12), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 20), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 24), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 28), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 32), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 36), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 40), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 44), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 48), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 52), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 56), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref b, 60), 0); + Unsafe.WriteUnaligned(ref dest, 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 20), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 24), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 28), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 32), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 36), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 40), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 44), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 48), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 52), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 56), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref dest, 60), 0); #endif - b = ref Unsafe.Add(ref b, 64); + dest = ref Unsafe.Add(ref dest, 64); n--; if (n != 0) goto MZER06; - byteLength %= 64; - if (byteLength > 16) + len %= 64; + if (len > 16) goto MZER00; #if HAS_CUSTOM_BLOCKS - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), default); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -16), default); #elif TARGET_64BIT - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -8), 0); #else - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -16), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -12), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -8), 0); - Unsafe.WriteUnaligned(ref Unsafe.Add(ref bEnd, -4), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -16), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -12), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -8), 0); + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destEnd, -4), 0); #endif return; PInvoke: - Buffer._ZeroMemory(ref b, byteLength); + // Implicit nullchecks + _ = Unsafe.ReadUnaligned(ref dest); + Buffer._ZeroMemory(ref dest, len); } - // TODO: This implementation is suboptimal - public static unsafe void Fill(ref byte refData, nuint numElements, byte value) +#if NATIVEAOT + [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemSet")] +#endif + public static void Fill(ref byte dest, byte value, nuint len) { if (!Vector.IsHardwareAccelerated) { goto CannotVectorize; } - if (numElements >= (nuint)Vector.Count) + if (len >= (nuint)Vector.Count) { // We have enough data for at least one vectorized write. Vector vector = new (value); - nuint stopLoopAtOffset = numElements & (nuint)(nint)(2 * (int)-Vector.Count); // intentional sign extension carries the negative bit + nuint stopLoopAtOffset = len & (nuint)(nint)(2 * (int)-Vector.Count); // intentional sign extension carries the negative bit nuint offset = 0; // Loop, writing 2 vectors at a time. // Compare 'numElements' rather than 'stopLoopAtOffset' because we don't want a dependency // on the very recently calculated 'stopLoopAtOffset' value. - if (numElements >= (uint)(2 * Vector.Count)) + if (len >= (uint)(2 * Vector.Count)) { do { - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref refData, offset), vector); - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref refData, offset + (nuint)Vector.Count), vector); + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset), vector); + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset + (nuint)Vector.Count), vector); offset += (uint)(2 * Vector.Count); } while (offset < stopLoopAtOffset); } @@ -456,9 +467,9 @@ public static unsafe void Fill(ref byte refData, nuint numElements, byte value) // If the total byte length instead involves us writing an odd number of vectors, write // one additional vector now. The bit check below tells us if we're in an "odd vector // count" situation. - if ((numElements & (nuint)Vector.Count) != 0) + if ((len & (nuint)Vector.Count) != 0) { - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref refData, offset), vector); + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, offset), vector); } // It's possible that some small buffer remains to be populated - something that won't @@ -467,7 +478,7 @@ public static unsafe void Fill(ref byte refData, nuint numElements, byte value) // populated data, which is fine since we're splatting the same value for all entries. // There's no need to perform a length check here because we already performed this // check before entering the vectorized code path. - Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref refData, numElements - (nuint)Vector.Count), vector); + Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref dest, len - (nuint)Vector.Count), vector); // And we're done! return; @@ -480,44 +491,44 @@ public static unsafe void Fill(ref byte refData, nuint numElements, byte value) nuint i = 0; // Write 8 elements at a time - if (numElements >= 8) + if (len >= 8) { - nuint stopLoopAtOffset = numElements & ~(nuint)7; + nuint stopLoopAtOffset = len & ~(nuint)7; do { - Unsafe.Add(ref refData, (nint)i + 0) = value; - Unsafe.Add(ref refData, (nint)i + 1) = value; - Unsafe.Add(ref refData, (nint)i + 2) = value; - Unsafe.Add(ref refData, (nint)i + 3) = value; - Unsafe.Add(ref refData, (nint)i + 4) = value; - Unsafe.Add(ref refData, (nint)i + 5) = value; - Unsafe.Add(ref refData, (nint)i + 6) = value; - Unsafe.Add(ref refData, (nint)i + 7) = value; + Unsafe.Add(ref dest, (nint)i + 0) = value; + Unsafe.Add(ref dest, (nint)i + 1) = value; + Unsafe.Add(ref dest, (nint)i + 2) = value; + Unsafe.Add(ref dest, (nint)i + 3) = value; + Unsafe.Add(ref dest, (nint)i + 4) = value; + Unsafe.Add(ref dest, (nint)i + 5) = value; + Unsafe.Add(ref dest, (nint)i + 6) = value; + Unsafe.Add(ref dest, (nint)i + 7) = value; } while ((i += 8) < stopLoopAtOffset); } // Write next 4 elements if needed - if ((numElements & 4) != 0) + if ((len & 4) != 0) { - Unsafe.Add(ref refData, (nint)i + 0) = value; - Unsafe.Add(ref refData, (nint)i + 1) = value; - Unsafe.Add(ref refData, (nint)i + 2) = value; - Unsafe.Add(ref refData, (nint)i + 3) = value; + Unsafe.Add(ref dest, (nint)i + 0) = value; + Unsafe.Add(ref dest, (nint)i + 1) = value; + Unsafe.Add(ref dest, (nint)i + 2) = value; + Unsafe.Add(ref dest, (nint)i + 3) = value; i += 4; } // Write next 2 elements if needed - if ((numElements & 2) != 0) + if ((len & 2) != 0) { - Unsafe.Add(ref refData, (nint)i + 0) = value; - Unsafe.Add(ref refData, (nint)i + 1) = value; + Unsafe.Add(ref dest, (nint)i + 0) = value; + Unsafe.Add(ref dest, (nint)i + 1) = value; i += 2; } // Write final element if needed - if ((numElements & 1) != 0) + if ((len & 1) != 0) { - Unsafe.Add(ref refData, (nint)i) = value; + Unsafe.Add(ref dest, (nint)i) = value; } } } diff --git a/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il b/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il deleted file mode 100644 index b84f69120bb4fd..00000000000000 --- a/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.il +++ /dev/null @@ -1,99 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -.assembly extern System.Runtime { .publickeytoken = (B0 3F 5F 7F 11 D5 0A 3A ) } -.assembly extern xunit.core {} -.assembly extern System.Runtime.Extensions {} -.assembly BufferMemmoveTailCall { - // Allow access to private members of System.Private.CoreLib - .custom instance void System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute::.ctor(string) = ( - 01 00 16 53 79 73 74 65 6d 2e 50 72 69 76 61 74 - 65 2e 43 6f 72 65 4c 69 62 00 00 - ) -} - -.class public abstract auto ansi sealed beforefieldinit TailCallBufferMemmove - extends [System.Runtime]System.Object -{ - .method public hidebysig static int32 Main() cil managed - { - .custom instance void [xunit.core]Xunit.FactAttribute::.ctor() = ( - 01 00 00 00 - ) - .maxstack 8 - .entrypoint - - // C#: - // byte[] src = new byte[32]; - // Test(ref src[0]); - - ldc.i4.s 32 - newarr [System.Runtime]System.Byte - ldc.i4.0 - ldelema [System.Runtime]System.Byte - call void TailCallBufferMemmove::Test(uint8&) - - // return 100; - ldc.i4.s 100 - ret - } - - .method private hidebysig static void Test (uint8& src) cil managed noinlining - { - .maxstack 3 - - // C#: - // byte* data = stackalloc byte[64]; // to trigger slow helper-based tail calls - // SpanHelpers.Memmove(ref Unsafe.AsRef(data), ref src, 64); - - ldc.i4.s 64 - conv.u - localloc - call !!0& [System.Runtime]System.Runtime.CompilerServices.Unsafe::AsRef(void*) - ldarg.0 - ldc.i4.s 64 - conv.i - tail. call void [System.Runtime]System.SpanHelpers::Memmove(uint8&, uint8&, native uint) - ret - } -} - -// C#: -// namespace System.Runtime.CompilerServices -// { -// public class IgnoresAccessChecksToAttribute : Attribute -// { -// public IgnoresAccessChecksToAttribute(string assemblyName) -// { -// AssemblyName = assemblyName; -// } -// public string AssemblyName { get; } -// } -// } -// -.class public auto ansi beforefieldinit System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute - extends [System.Runtime]System.Attribute -{ - .field private initonly string 'k__BackingField' - .method public hidebysig specialname rtspecialname instance void .ctor (string assemblyName) cil managed - { - .maxstack 8 - ldarg.0 - call instance void [System.Runtime]System.Attribute::.ctor() - ldarg.0 - ldarg.1 - stfld string System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute::'k__BackingField' - ret - } - .method public hidebysig specialname instance string get_AssemblyName () cil managed - { - .maxstack 8 - ldarg.0 - ldfld string System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute::'k__BackingField' - ret - } - .property instance string AssemblyName() - { - .get instance string System.Runtime.CompilerServices.IgnoresAccessChecksToAttribute::get_AssemblyName() - } -} diff --git a/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.ilproj b/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.ilproj deleted file mode 100644 index 5fa250452852d2..00000000000000 --- a/src/tests/JIT/opt/Vectorization/BufferMemmoveTailCall.ilproj +++ /dev/null @@ -1,8 +0,0 @@ - - - True - - - - - From 6e83d82022622953a2c39774004420c8ccf6c454 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 24 Feb 2024 22:40:00 +0100 Subject: [PATCH 33/40] Add zero check to memmove (in case of invalid pointers with len=0) --- .../src/System/SpanHelpers.ByteMemOps.cs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs index c58ee91ad07b9a..5d466af4cca5cd 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs @@ -237,10 +237,13 @@ internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) } PInvoke: - // Implicit nullchecks - _ = Unsafe.ReadUnaligned(ref dest); - _ = Unsafe.ReadUnaligned(ref src); - Buffer._Memmove(ref dest, ref src, len); + if (len > 0) + { + // Implicit nullchecks + _ = Unsafe.ReadUnaligned(ref dest); + _ = Unsafe.ReadUnaligned(ref src); + Buffer._Memmove(ref dest, ref src, len); + } } #if NATIVEAOT From 1bbaa3d74b0264e13b82ee4bddd91baf29947b3b Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 24 Feb 2024 22:58:34 +0100 Subject: [PATCH 34/40] Clean up symbol name --- .../tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs | 6 +++--- .../src/System/SpanHelpers.ByteMemOps.cs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index 5240ff4d013fed..1845b5ce7848c6 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -139,13 +139,13 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, break; case ReadyToRunHelper.MemCpy: - mangledName = "RhRuntimeHelpers_MemCopy"; + mangledName = "RhSpanHelpers_MemCopy"; break; case ReadyToRunHelper.MemSet: - mangledName = "RhRuntimeHelpers_MemSet"; + mangledName = "RhSpanHelpers_MemSet"; break; case ReadyToRunHelper.MemZero: - mangledName = "RhRuntimeHelpers_MemZero"; + mangledName = "RhSpanHelpers_MemZero"; break; case ReadyToRunHelper.NativeMemSet: mangledName = "memset"; diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs index 5d466af4cca5cd..7f08ba656974c8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs @@ -35,7 +35,7 @@ private struct Block64 {} #endif // HAS_CUSTOM_BLOCKS #if NATIVEAOT - [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemSet")] + [System.Runtime.RuntimeExport("RhSpanHelpers_MemSet")] #endif [Intrinsic] // Unrolled for small constant lengths internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) @@ -247,7 +247,7 @@ internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) } #if NATIVEAOT - [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemSet")] + [System.Runtime.RuntimeExport("RhSpanHelpers_MemSet")] #endif [Intrinsic] // Unrolled for small sizes public static unsafe void ClearWithoutReferences(ref byte dest, nuint len) @@ -436,7 +436,7 @@ public static unsafe void ClearWithoutReferences(ref byte dest, nuint len) } #if NATIVEAOT - [System.Runtime.RuntimeExport("RhRuntimeHelpers_MemSet")] + [System.Runtime.RuntimeExport("RhSpanHelpers_MemSet")] #endif public static void Fill(ref byte dest, byte value, nuint len) { From a621ad24649de7b3d1192b9a8031b3b15d8b9191 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 24 Feb 2024 23:02:39 +0100 Subject: [PATCH 35/40] Add a test --- src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs b/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs index 1c958a99e4aada..9d7e4816522c73 100644 --- a/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs +++ b/src/tests/JIT/opt/Structs/MemsetMemcpyNullref.cs @@ -32,6 +32,13 @@ public static void MemsetMemcpyThrowNullRefonNull() Assert.Throws(() => MemoryInitByref(ref Unsafe.NullRef(), length)); Assert.Throws(() => MemoryCopyByref(ref Unsafe.NullRef(), ref Unsafe.NullRef(), length)); } + + // These APIs are not expected to fail/throw on zero length, even if pointers are not valid + byte valid = 0; + MemoryInitByref(ref Unsafe.NullRef(), 0); + MemoryCopyByref(ref Unsafe.NullRef(), ref valid, 0); + MemoryCopyByref(ref valid, ref Unsafe.NullRef(), 0); + MemoryCopyByref(ref Unsafe.NullRef(), ref Unsafe.NullRef(), 0); } [MethodImpl(MethodImplOptions.NoInlining)] From 6283cf2681c73762268d3d8fb99f131e6bfbdc92 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 24 Feb 2024 23:16:05 +0100 Subject: [PATCH 36/40] More clean up (in jit) --- src/coreclr/jit/fgbasic.cpp | 2 +- src/coreclr/jit/fgprofile.cpp | 6 +++--- src/coreclr/jit/importercalls.cpp | 19 ++++++++----------- src/coreclr/jit/lower.cpp | 4 ++-- src/coreclr/jit/namedintrinsiclist.h | 2 +- .../src/System/SpanHelpers.ByteMemOps.cs | 2 +- 6 files changed, 16 insertions(+), 19 deletions(-) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 27aa0966095eac..d1a7bc7fda298a 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -1332,7 +1332,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed case NI_System_SpanHelpers_ClearWithoutReferences: case NI_System_SpanHelpers_Fill: case NI_System_SpanHelpers_SequenceEqual: - case NI_System_Buffer_Memmove: + case NI_System_SpanHelpers_Memmove: { if (FgStack::IsConstArgument(pushedStack.Top(), impInlineInfo)) { diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index a92e7d260c3043..a1c9da833fd3f4 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -1947,7 +1947,7 @@ class ValueHistogramProbeVisitor final : public GenTreeVisitorIsCall() && node->AsCall()->IsSpecialIntrinsic()) { const NamedIntrinsic ni = m_compiler->lookupNamedIntrinsic(node->AsCall()->gtCallMethHnd); - if ((ni == NI_System_Buffer_Memmove) || (ni == NI_System_SpanHelpers_SequenceEqual)) + if ((ni == NI_System_SpanHelpers_Memmove) || (ni == NI_System_SpanHelpers_SequenceEqual)) { m_functor(m_compiler, node); } @@ -2274,7 +2274,7 @@ class ValueHistogramProbeInserter return; } - assert(node->AsCall()->IsSpecialIntrinsic(compiler, NI_System_Buffer_Memmove) || + assert(node->AsCall()->IsSpecialIntrinsic(compiler, NI_System_SpanHelpers_Memmove) || node->AsCall()->IsSpecialIntrinsic(compiler, NI_System_SpanHelpers_SequenceEqual)); const ICorJitInfo::PgoInstrumentationSchema& countEntry = m_schema[*m_currentSchemaIndex]; @@ -2540,7 +2540,7 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod() // These are marked as [Intrinsic] only to be handled (unrolled) for constant inputs. // In other cases they have large managed implementations we want to profile. case NI_System_String_Equals: - case NI_System_Buffer_Memmove: + case NI_System_SpanHelpers_Memmove: case NI_System_MemoryExtensions_Equals: case NI_System_MemoryExtensions_SequenceEqual: case NI_System_MemoryExtensions_StartsWith: diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index b897e783a7268b..a1f5768ee68321 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -1290,7 +1290,7 @@ var_types Compiler::impImportCall(OPCODE opcode, impAppendTree(call, verCurrentState.esStackDepth - 1, impCurStmtDI); } else if (JitConfig.JitProfileValues() && call->IsCall() && - call->AsCall()->IsSpecialIntrinsic(this, NI_System_Buffer_Memmove)) + call->AsCall()->IsSpecialIntrinsic(this, NI_System_SpanHelpers_Memmove)) { if (opts.IsOptimizedWithProfile()) { @@ -1555,7 +1555,7 @@ GenTree* Compiler::impDuplicateWithProfiledArg(GenTreeCall* call, IL_OFFSET ilOf unsigned argNum = 0; ssize_t minValue = 0; ssize_t maxValue = 0; - if (call->IsSpecialIntrinsic(this, NI_System_Buffer_Memmove)) + if (call->IsSpecialIntrinsic(this, NI_System_SpanHelpers_Memmove)) { // dst(0), src(1), len(2) argNum = 2; @@ -2761,7 +2761,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, betterToExpand = true; break; - case NI_System_Buffer_Memmove: + case NI_System_SpanHelpers_Memmove: case NI_System_SpanHelpers_SequenceEqual: // We're going to instrument these betterToExpand = opts.IsInstrumented(); @@ -3983,7 +3983,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, case NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8: case NI_System_SpanHelpers_SequenceEqual: case NI_System_SpanHelpers_ClearWithoutReferences: - case NI_System_Buffer_Memmove: + case NI_System_SpanHelpers_Memmove: { if (sig->sigInst.methInstCount == 0) { @@ -8874,13 +8874,6 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) result = NI_System_BitConverter_Int64BitsToDouble; } } - else if (strcmp(className, "Buffer") == 0) - { - if (strcmp(methodName, "Memmove") == 0) - { - result = NI_System_Buffer_Memmove; - } - } break; } @@ -9040,6 +9033,10 @@ NamedIntrinsic Compiler::lookupNamedIntrinsic(CORINFO_METHOD_HANDLE method) { result = NI_System_SpanHelpers_ClearWithoutReferences; } + else if (strcmp(methodName, "Memmove") == 0) + { + result = NI_System_SpanHelpers_Memmove; + } } else if (strcmp(className, "String") == 0) { diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 511beb40a23fa0..79935bffcae23e 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -2013,7 +2013,7 @@ bool Lowering::LowerCallMemmove(GenTreeCall* call, GenTree** next) { JITDUMP("Considering Memmove [%06d] for unrolling.. ", comp->dspTreeID(call)) assert(call->IsHelperCall(comp, CORINFO_HELP_MEMCPY) || - (comp->lookupNamedIntrinsic(call->gtCallMethHnd) == NI_System_Buffer_Memmove)); + (comp->lookupNamedIntrinsic(call->gtCallMethHnd) == NI_System_SpanHelpers_Memmove)); assert(call->gtArgs.CountUserArgs() == 3); @@ -2374,7 +2374,7 @@ GenTree* Lowering::LowerCall(GenTree* node) { switch (comp->lookupNamedIntrinsic(call->gtCallMethHnd)) { - case NI_System_Buffer_Memmove: + case NI_System_SpanHelpers_Memmove: if (LowerCallMemmove(call, &nextNode)) { return nextNode; diff --git a/src/coreclr/jit/namedintrinsiclist.h b/src/coreclr/jit/namedintrinsiclist.h index a68b88f06a4502..9fa128c38f74ee 100644 --- a/src/coreclr/jit/namedintrinsiclist.h +++ b/src/coreclr/jit/namedintrinsiclist.h @@ -20,7 +20,7 @@ enum NamedIntrinsic : unsigned short NI_System_BitConverter_Int64BitsToDouble, NI_System_BitConverter_SingleToInt32Bits, - NI_System_Buffer_Memmove, + NI_System_SpanHelpers_Memmove, NI_SYSTEM_MATH_START, NI_System_Math_Abs, diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs index 7f08ba656974c8..5557ea645ccae2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs @@ -438,7 +438,7 @@ public static unsafe void ClearWithoutReferences(ref byte dest, nuint len) #if NATIVEAOT [System.Runtime.RuntimeExport("RhSpanHelpers_MemSet")] #endif - public static void Fill(ref byte dest, byte value, nuint len) + internal static void Fill(ref byte dest, byte value, nuint len) { if (!Vector.IsHardwareAccelerated) { From c497ca1ecbccc8b19eb8a970fbc1d7c36b176709 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 24 Feb 2024 23:47:26 +0100 Subject: [PATCH 37/40] Fix NAOT --- .../src/System/SpanHelpers.ByteMemOps.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs index 5557ea645ccae2..9672d8c11512af 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs @@ -35,7 +35,7 @@ private struct Block64 {} #endif // HAS_CUSTOM_BLOCKS #if NATIVEAOT - [System.Runtime.RuntimeExport("RhSpanHelpers_MemSet")] + [System.Runtime.RuntimeExport("RhSpanHelpers_MemCopy")] #endif [Intrinsic] // Unrolled for small constant lengths internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) @@ -247,7 +247,7 @@ internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) } #if NATIVEAOT - [System.Runtime.RuntimeExport("RhSpanHelpers_MemSet")] + [System.Runtime.RuntimeExport("RhSpanHelpers_MemZero")] #endif [Intrinsic] // Unrolled for small sizes public static unsafe void ClearWithoutReferences(ref byte dest, nuint len) From 0a72fab96df1ba9ab7fbf1ad76cc5e6a16988dd2 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 25 Feb 2024 01:07:21 +0100 Subject: [PATCH 38/40] Update src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs Co-authored-by: Jan Kotas --- .../System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs index 9672d8c11512af..5de5fb738fae63 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs @@ -41,7 +41,7 @@ private struct Block64 {} internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) { // P/Invoke into the native version when the buffers are overlapping. - if (((nuint)(nint)Unsafe.ByteOffset(ref src, ref dest) < len) || ((nuint)(nint)Unsafe.ByteOffset(ref dest, ref src) < len)) + if (((nuint)Unsafe.ByteOffset(ref src, ref dest) < len) || ((nuint)Unsafe.ByteOffset(ref dest, ref src) < len)) { goto BuffersOverlap; } From 8aa337717be7c459aa19e6839cea0bc6f41c1e81 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 25 Feb 2024 01:18:12 +0100 Subject: [PATCH 39/40] Address feedback --- .../src/System/SpanHelpers.ByteMemOps.cs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs index 5de5fb738fae63..ed54c495d60d4a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.ByteMemOps.cs @@ -41,15 +41,14 @@ private struct Block64 {} internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) { // P/Invoke into the native version when the buffers are overlapping. - if (((nuint)Unsafe.ByteOffset(ref src, ref dest) < len) || ((nuint)Unsafe.ByteOffset(ref dest, ref src) < len)) + if ((nuint)Unsafe.ByteOffset(ref src, ref dest) < len || + (nuint)Unsafe.ByteOffset(ref dest, ref src) < len) { goto BuffersOverlap; } - // Use "(IntPtr)(nint)len" to avoid overflow checking on the explicit cast to IntPtr - - ref byte srcEnd = ref Unsafe.Add(ref src, (IntPtr)(nint)len); - ref byte destEnd = ref Unsafe.Add(ref dest, (IntPtr)(nint)len); + ref byte srcEnd = ref Unsafe.Add(ref src, len); + ref byte destEnd = ref Unsafe.Add(ref dest, len); if (len <= 16) goto MCPY02; @@ -230,20 +229,21 @@ internal static unsafe void Memmove(ref byte dest, ref byte src, nuint len) return; BuffersOverlap: + Debug.Assert(len > 0); // If the buffers overlap perfectly, there's no point to copying the data. if (Unsafe.AreSame(ref dest, ref src)) { + // Both could be null with a non-zero length, perform an implicit null check. + _ = Unsafe.ReadUnaligned(ref dest); return; } PInvoke: - if (len > 0) - { - // Implicit nullchecks - _ = Unsafe.ReadUnaligned(ref dest); - _ = Unsafe.ReadUnaligned(ref src); - Buffer._Memmove(ref dest, ref src, len); - } + // Implicit nullchecks + Debug.Assert(len > 0); + _ = Unsafe.ReadUnaligned(ref dest); + _ = Unsafe.ReadUnaligned(ref src); + Buffer._Memmove(ref dest, ref src, len); } #if NATIVEAOT From f5e066288f0455b840ae28c829857cbe474d5183 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 25 Feb 2024 12:55:43 +0100 Subject: [PATCH 40/40] Update guide-for-porting.md --- docs/design/coreclr/botr/guide-for-porting.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/design/coreclr/botr/guide-for-porting.md b/docs/design/coreclr/botr/guide-for-porting.md index f5549cde22521b..f7ca105bf165db 100644 --- a/docs/design/coreclr/botr/guide-for-porting.md +++ b/docs/design/coreclr/botr/guide-for-porting.md @@ -413,10 +413,6 @@ Here is an annotated list of the stubs implemented for Unix on Arm64. Today use of this feature on Unix requires hand-written IL. On Windows this feature is commonly used by C++/CLI -3. EH Correctness. Some helpers are written in assembly to provide well known - locations for NullReferenceExceptions to be generated out of a SIGSEGV - signal. - #### cgencpu.h This header is included by various code in the VM directory. It provides a large