From 9884baa9d377e30a759b4cb90ff81e372f8db20a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 21 Feb 2024 04:23:36 +0100 Subject: [PATCH 1/7] Replace dynamic store blocks with helper calls early --- src/coreclr/jit/importer.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 36f0fdcc45e55c..d72f420b72d421 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -10324,6 +10324,25 @@ void Compiler::impImportBlockCode(BasicBlock* block) } else { +// TODO: enable for X86 as well, it currently doesn't support memset/memcpy helpers +// Then, get rid of GT_STORE_DYN_BLK entirely. +#ifndef TARGET_X86 + const bool isVolatile = (indirFlags & GTF_IND_VOLATILE) != 0; + unsigned helper = opcode == CEE_INITBLK ? CORINFO_HELP_MEMSET : CORINFO_HELP_MEMCPY; + op3 = gtNewCastNode(TYP_I_IMPL, op3, /* fromUnsigned */ true, TYP_I_IMPL); + + if (isVolatile) + { + op3 = gtNewOperNode(GT_COMMA, TYP_I_IMPL, gtNewMemoryBarrier(), op3); + } + op1 = gtNewHelperCallNode(helper, TYP_VOID, op1, op2, op3); + + if (isVolatile) + { + impAppendTree(op1, CHECK_SPILL_ALL, impCurStmtDI); + op1 = gtNewMemoryBarrier(true); + } +#else if (opcode == CEE_INITBLK) { if (!op2->IsIntegralConst(0)) @@ -10342,6 +10361,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) #endif op1 = gtNewStoreDynBlkNode(op1, op2, op3, indirFlags); +#endif } goto SPILL_APPEND; } From d88a8804de1fdbae9761a2bb8b27f3f322001481 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 21 Feb 2024 10:33:35 +0100 Subject: [PATCH 2/7] Clean up --- src/coreclr/jit/importer.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index d72f420b72d421..7185c5302b7731 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -10329,16 +10329,16 @@ void Compiler::impImportBlockCode(BasicBlock* block) #ifndef TARGET_X86 const bool isVolatile = (indirFlags & GTF_IND_VOLATILE) != 0; unsigned helper = opcode == CEE_INITBLK ? CORINFO_HELP_MEMSET : CORINFO_HELP_MEMCPY; - op3 = gtNewCastNode(TYP_I_IMPL, op3, /* fromUnsigned */ true, TYP_I_IMPL); - - if (isVolatile) - { - op3 = gtNewOperNode(GT_COMMA, TYP_I_IMPL, gtNewMemoryBarrier(), op3); - } +#ifdef TARGET_64BIT + op3 = gtNewCastNode(TYP_I_IMPL, op3, /* fromUnsigned */ true, TYP_I_IMPL); +#endif op1 = gtNewHelperCallNode(helper, TYP_VOID, op1, op2, op3); if (isVolatile) { + // Wrap with memory barriers: full-barrier + call + load-barrier + impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("spilling side-effects")); + impAppendTree(gtNewMemoryBarrier(), CHECK_SPILL_ALL, impCurStmtDI); impAppendTree(op1, CHECK_SPILL_ALL, impCurStmtDI); op1 = gtNewMemoryBarrier(true); } From ef459a61113c2f9b10a286d44812e450b6f1e69e Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 21 Feb 2024 11:51:25 +0100 Subject: [PATCH 3/7] fix volatile --- src/coreclr/jit/importer.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 7185c5302b7731..edc1edd50c8ed1 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -10327,21 +10327,24 @@ void Compiler::impImportBlockCode(BasicBlock* block) // TODO: enable for X86 as well, it currently doesn't support memset/memcpy helpers // Then, get rid of GT_STORE_DYN_BLK entirely. #ifndef TARGET_X86 - const bool isVolatile = (indirFlags & GTF_IND_VOLATILE) != 0; - unsigned helper = opcode == CEE_INITBLK ? CORINFO_HELP_MEMSET : CORINFO_HELP_MEMCPY; + const bool isVolatile = (indirFlags & GTF_IND_VOLATILE) != 0; + const unsigned helper = opcode == CEE_INITBLK ? CORINFO_HELP_MEMSET : CORINFO_HELP_MEMCPY; #ifdef TARGET_64BIT op3 = gtNewCastNode(TYP_I_IMPL, op3, /* fromUnsigned */ true, TYP_I_IMPL); #endif - op1 = gtNewHelperCallNode(helper, TYP_VOID, op1, op2, op3); - if (isVolatile) { // Wrap with memory barriers: full-barrier + call + load-barrier impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("spilling side-effects")); + op1 = gtNewHelperCallNode(helper, TYP_VOID, op1, op2, op3); impAppendTree(gtNewMemoryBarrier(), CHECK_SPILL_ALL, impCurStmtDI); impAppendTree(op1, CHECK_SPILL_ALL, impCurStmtDI); op1 = gtNewMemoryBarrier(true); } + else + { + op1 = gtNewHelperCallNode(helper, TYP_VOID, op1, op2, op3); + } #else if (opcode == CEE_INITBLK) { From e6fffe9ba3683ec6401fc7b89e0e039c960b5958 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 21 Feb 2024 11:51:43 +0100 Subject: [PATCH 4/7] unroll memcpy --- src/coreclr/jit/lower.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 5acb6ff95112e1..ed3d8d29d8e8eb 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1860,7 +1860,8 @@ GenTree* Lowering::AddrGen(void* addr) bool Lowering::LowerCallMemmove(GenTreeCall* call, GenTree** next) { JITDUMP("Considering Memmove [%06d] for unrolling.. ", comp->dspTreeID(call)) - assert(comp->lookupNamedIntrinsic(call->gtCallMethHnd) == NI_System_Buffer_Memmove); + assert(call->IsHelperCall(comp, CORINFO_HELP_MEMCPY) || + (comp->lookupNamedIntrinsic(call->gtCallMethHnd) == NI_System_Buffer_Memmove)); assert(call->gtArgs.CountUserArgs() == 3); @@ -2215,16 +2216,22 @@ GenTree* Lowering::LowerCall(GenTree* node) } #if defined(TARGET_AMD64) || defined(TARGET_ARM64) + GenTree* nextNode = nullptr; if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) { - GenTree* nextNode = nullptr; - NamedIntrinsic ni = comp->lookupNamedIntrinsic(call->gtCallMethHnd); + NamedIntrinsic ni = comp->lookupNamedIntrinsic(call->gtCallMethHnd); if (((ni == NI_System_Buffer_Memmove) && LowerCallMemmove(call, &nextNode)) || ((ni == NI_System_SpanHelpers_SequenceEqual) && LowerCallMemcmp(call, &nextNode))) { return nextNode; } } + + // Try to lower CORINFO_HELP_MEMCPY to unrollable STORE_BLK + if (call->IsHelperCall(comp, CORINFO_HELP_MEMCPY) && LowerCallMemmove(call, &nextNode)) + { + return nextNode; + } #endif call->ClearOtherRegs(); From affafffa9694d56517d6327e7ae6794668e1d3e2 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 21 Feb 2024 12:00:44 +0100 Subject: [PATCH 5/7] Clean up --- src/coreclr/jit/lower.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index ed3d8d29d8e8eb..18d7f388e908a7 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -1844,6 +1844,7 @@ GenTree* Lowering::AddrGen(void* addr) //------------------------------------------------------------------------ // LowerCallMemmove: Replace Buffer.Memmove(DST, SRC, CNS_SIZE) with a GT_STORE_BLK: +// Do the same for CORINFO_HELP_MEMCPY(DST, SRC, CNS_SIZE) // // * STORE_BLK struct (copy) (Unroll) // +--* LCL_VAR byref dst @@ -1895,7 +1896,8 @@ bool Lowering::LowerCallMemmove(GenTreeCall* call, GenTree** next) // TODO-CQ: Use GenTreeBlk::BlkOpKindUnroll here if srcAddr and dstAddr don't overlap, thus, we can // unroll this memmove as memcpy - it doesn't require lots of temp registers - storeBlk->gtBlkOpKind = GenTreeBlk::BlkOpKindUnrollMemmove; + storeBlk->gtBlkOpKind = call->IsHelperCall(comp, CORINFO_HELP_MEMCPY) ? GenTreeBlk::BlkOpKindUnroll + : GenTreeBlk::BlkOpKindUnrollMemmove; BlockRange().InsertBefore(call, srcBlk); BlockRange().InsertBefore(call, storeBlk); From 501ddad77106fbb778cbb9b7e4eb01ccf5249206 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 21 Feb 2024 15:51:45 +0100 Subject: [PATCH 6/7] Address feedback --- src/coreclr/jit/importer.cpp | 37 +++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index edc1edd50c8ed1..6fe7ed6eb90810 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -10285,9 +10285,17 @@ void Compiler::impImportBlockCode(BasicBlock* block) case CEE_CPBLK: { GenTreeFlags indirFlags = impPrefixFlagsToIndirFlags(prefixFlags); - op3 = impPopStack().val; // Size - op2 = impPopStack().val; // Value / Src addr - op1 = impPopStack().val; // Dst addr + const bool isVolatile = (indirFlags & GTF_IND_VOLATILE) != 0; + if (isVolatile && !impStackTop(0).val->IsCnsIntOrI()) + { + // We're going to emit a helper call surrounded by memory barriers, so we need to spill any side + // effects. + impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("spilling side-effects")); + } + + op3 = impPopStack().val; // Size + op2 = impPopStack().val; // Value / Src addr + op1 = impPopStack().val; // Dst addr if (op3->IsCnsIntOrI()) { @@ -10324,21 +10332,22 @@ void Compiler::impImportBlockCode(BasicBlock* block) } else { + if (TARGET_POINTER_SIZE == 8) + { + // Cast size to TYP_LONG on 64-bit targets + op3 = gtNewCastNode(TYP_LONG, op3, /* fromUnsigned */ true, TYP_LONG); + } + // TODO: enable for X86 as well, it currently doesn't support memset/memcpy helpers // Then, get rid of GT_STORE_DYN_BLK entirely. #ifndef TARGET_X86 - const bool isVolatile = (indirFlags & GTF_IND_VOLATILE) != 0; - const unsigned helper = opcode == CEE_INITBLK ? CORINFO_HELP_MEMSET : CORINFO_HELP_MEMCPY; -#ifdef TARGET_64BIT - op3 = gtNewCastNode(TYP_I_IMPL, op3, /* fromUnsigned */ true, TYP_I_IMPL); -#endif + const unsigned helper = opcode == CEE_INITBLK ? CORINFO_HELP_MEMSET : CORINFO_HELP_MEMCPY; if (isVolatile) { // Wrap with memory barriers: full-barrier + call + load-barrier - impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("spilling side-effects")); - op1 = gtNewHelperCallNode(helper, TYP_VOID, op1, op2, op3); impAppendTree(gtNewMemoryBarrier(), CHECK_SPILL_ALL, impCurStmtDI); - impAppendTree(op1, CHECK_SPILL_ALL, impCurStmtDI); + impAppendTree(gtNewHelperCallNode(helper, TYP_VOID, op1, op2, op3), CHECK_SPILL_ALL, + impCurStmtDI); op1 = gtNewMemoryBarrier(true); } else @@ -10357,12 +10366,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) { op2 = gtNewIndir(TYP_STRUCT, op2); } - -#ifdef TARGET_64BIT - // STORE_DYN_BLK takes a native uint size as it turns into call to memcpy. - op3 = gtNewCastNode(TYP_I_IMPL, op3, /* fromUnsigned */ true, TYP_I_IMPL); -#endif - op1 = gtNewStoreDynBlkNode(op1, op2, op3, indirFlags); #endif } From 44c9c7683c7f531d1baeec6ddc5f66f999e08000 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 21 Feb 2024 15:52:54 +0100 Subject: [PATCH 7/7] Clean up --- src/coreclr/jit/importer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 6fe7ed6eb90810..5469904c4ce554 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -10286,12 +10286,14 @@ void Compiler::impImportBlockCode(BasicBlock* block) { GenTreeFlags indirFlags = impPrefixFlagsToIndirFlags(prefixFlags); const bool isVolatile = (indirFlags & GTF_IND_VOLATILE) != 0; +#ifndef TARGET_X86 if (isVolatile && !impStackTop(0).val->IsCnsIntOrI()) { // We're going to emit a helper call surrounded by memory barriers, so we need to spill any side // effects. impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("spilling side-effects")); } +#endif op3 = impPopStack().val; // Size op2 = impPopStack().val; // Value / Src addr