From 244ad4c9e3329f61fdce15ce2459e57dfd5760a4 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 3 Dec 2023 01:42:58 +0100 Subject: [PATCH 01/22] Loop-based impl --- src/coreclr/jit/codegen.h | 1 + src/coreclr/jit/codegenarmarch.cpp | 62 +++++++++++++++++++ src/coreclr/jit/codegenloongarch64.cpp | 16 +++++ src/coreclr/jit/codegenriscv64.cpp | 16 +++++ src/coreclr/jit/codegenxarch.cpp | 48 ++++++++++++++ src/coreclr/jit/gentree.cpp | 5 ++ src/coreclr/jit/gentree.h | 10 +-- src/coreclr/jit/lowerarmarch.cpp | 3 +- src/coreclr/jit/lowerloongarch64.cpp | 3 +- src/coreclr/jit/lowerriscv64.cpp | 3 +- src/coreclr/jit/lowerxarch.cpp | 6 +- src/coreclr/jit/lsraarmarch.cpp | 5 ++ src/coreclr/jit/lsrabuild.cpp | 1 + src/coreclr/jit/lsraloongarch64.cpp | 5 ++ src/coreclr/jit/lsrariscv64.cpp | 5 ++ src/coreclr/jit/lsraxarch.cpp | 5 ++ .../JIT/opt/Structs/StructWithGC_Zeroing.cs | 30 +++++++++ .../opt/Structs/StructWithGC_Zeroing.csproj | 14 +++++ 18 files changed, 229 insertions(+), 9 deletions(-) create mode 100644 src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs create mode 100644 src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index eb190f97f0e479..dda0d1871e4bf1 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1226,6 +1226,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #ifndef TARGET_X86 void genCodeForInitBlkHelper(GenTreeBlk* initBlkNode); #endif + void genCodeForInitBlkLoop(GenTreeBlk* initBlkNode); void genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode); void genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode); void genJumpTable(GenTree* tree); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 5434ef7b722ef9..6e7a4750a3f0ba 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3114,6 +3114,63 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); } +//------------------------------------------------------------------------ +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// +// Arguments: +// initBlkNode - the GT_STORE_BLK node +// +void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) +{ + GenTree* const dstNode = initBlkNode->Addr(); + genConsumeReg(dstNode); + const regNumber dstReg = dstNode->GetRegNum(); + +#ifdef TARGET_ARM64 + const regNumber zeroReg = REG_ZR; +#else + GenTree* const zeroNode = initBlkNode->Data(); + genConsumeReg(zeroNode); + const regNumber zeroReg = zeroNode->GetRegNum(); +#endif + + if (initBlkNode->IsVolatile()) + { + // issue a full memory barrier before a volatile initBlock Operation + instGen_MemoryBarrier(); + } + + // mov offsetReg, + //.LOOP: + // str xzr, [dstReg, offsetReg] + // subs offsetReg, offsetReg, #8 + // bne .LOOP + // str xzr, [dstReg] + + const unsigned size = initBlkNode->GetLayout()->GetSize(); + assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + + if (size > TARGET_POINTER_SIZE) + { + const regNumber offsetReg = initBlkNode->ExtractTempReg(); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); + + BasicBlock* loop = genCreateTempLabel(); + genDefineTempLabel(loop); + + GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); +#ifdef TARGET_ARM64 + GetEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); +#else + // There is no subs (sets flags) instruction on ARM32, so we use sub and cmp instead. + GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); + GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, offsetReg); +#endif + inst_JMP(EJ_ne, loop); + } + GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); +} + //------------------------------------------------------------------------ // genCall: Produce code for a GT_CALL node // @@ -4384,6 +4441,11 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) genCodeForCpObj(blkOp->AsBlk()); break; + case GenTreeBlk::BlkOpKindLoop: + assert(!isCopyBlk); + genCodeForInitBlkLoop(blkOp); + break; + case GenTreeBlk::BlkOpKindHelper: assert(!blkOp->gtBlkOpGcUnsafe); if (isCopyBlk) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 33bd2a77486c65..73d4c4467efcdb 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6375,6 +6375,17 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN); } +//------------------------------------------------------------------------ +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// +// Arguments: +// initBlkNode - the GT_STORE_BLK node +// +void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) +{ + // TODO: +} + // Generate code for a load from some address + offset // base: tree node which can be either a local address or arbitrary node // offset: distance from the base from which to load @@ -7286,6 +7297,11 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) genCodeForCpObj(blkOp->AsBlk()); break; + case GenTreeBlk::BlkOpKindLoop: + assert(!isCopyBlk); + genCodeForInitBlkLoop(blkOp); + break; + case GenTreeBlk::BlkOpKindHelper: if (isCopyBlk) { diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 39d25e316e529e..4e1e04c23e1358 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -5958,6 +5958,17 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) } } +//------------------------------------------------------------------------ +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// +// Arguments: +// initBlkNode - the GT_STORE_BLK node +// +void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) +{ + // TODO: +} + //------------------------------------------------------------------------ // genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call // @@ -6853,6 +6864,11 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp) genCodeForCpObj(blkOp->AsBlk()); break; + case GenTreeBlk::BlkOpKindLoop: + assert(!isCopyBlk); + genCodeForInitBlkLoop(blkOp); + break; + case GenTreeBlk::BlkOpKindHelper: if (isCopyBlk) { diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 5847b40ffd32e2..562c862e99cf89 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3034,6 +3034,11 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode) genCodeForCpObj(storeBlkNode->AsBlk()); break; + case GenTreeBlk::BlkOpKindLoop: + assert(!isCopyBlk); + genCodeForInitBlkLoop(storeBlkNode); + break; + #ifdef TARGET_AMD64 case GenTreeBlk::BlkOpKindHelper: assert(!storeBlkNode->gtBlkOpGcUnsafe); @@ -3312,6 +3317,49 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) #endif } +//------------------------------------------------------------------------ +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// +// Arguments: +// initBlkNode - the GT_STORE_BLK node +// +void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) +{ + GenTree* const dstNode = initBlkNode->Addr(); + GenTree* const zeroNode = initBlkNode->Data(); + + genConsumeReg(dstNode); + genConsumeReg(zeroNode); + + const regNumber dstReg = dstNode->GetRegNum(); + const regNumber zeroReg = zeroNode->GetRegNum(); + + // xor zeroReg, zeroReg + // mov offsetReg, + //.LOOP: + // mov qword ptr [dstReg + offsetReg], zeroReg + // sub offsetReg, 8 + // jne .LOOP + // mov qword ptr [dstReg], zeroReg + + const unsigned size = initBlkNode->GetLayout()->GetSize(); + assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + + if (size > TARGET_POINTER_SIZE) + { + const regNumber offsetReg = initBlkNode->ExtractTempReg(); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); + + BasicBlock* loop = genCreateTempLabel(); + genDefineTempLabel(loop); + + GetEmitter()->emitIns_ARX_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, offsetReg, 1, 0); + GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); + inst_JMP(EJ_jne, loop); + } + GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, 0); +} + #ifdef TARGET_AMD64 //------------------------------------------------------------------------ // genCodeForInitBlkHelper - Generate code for an InitBlk node by the means of the VM memcpy helper call diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 7c34c51571e66d..31be03bda15b7e 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -12227,6 +12227,11 @@ void Compiler::gtDispTree(GenTree* tree, printf(" (Helper)"); break; #endif + + case GenTreeBlk::BlkOpKindLoop: + printf(" (Loop)"); + break; + default: unreached(); } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 109da6a15c30d8..7e07e958478da2 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -7242,12 +7242,11 @@ struct GenTreeBlk : public GenTreeIndir #ifdef TARGET_XARCH BlkOpKindCpObjRepInstr, #endif -#ifndef TARGET_X86 BlkOpKindHelper, -#endif #ifdef TARGET_XARCH BlkOpKindRepInstr, #endif + BlkOpKindLoop, BlkOpKindUnroll, BlkOpKindUnrollMemmove, } gtBlkOpKind; @@ -7256,12 +7255,15 @@ struct GenTreeBlk : public GenTreeIndir bool gtBlkOpGcUnsafe; #endif -#ifdef TARGET_XARCH bool IsOnHeapAndContainsReferences() { return (m_layout != nullptr) && m_layout->HasGCPtr() && !Addr()->OperIs(GT_LCL_ADDR); } -#endif + + bool IsZeroingGcPointersOnHeap() + { + return OperIs(GT_STORE_BLK) && Data()->IsIntegralConst(0) && IsOnHeapAndContainsReferences(); + } GenTreeBlk(genTreeOps oper, var_types type, GenTree* addr, ClassLayout* layout) : GenTreeIndir(oper, type, addr, nullptr) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 2536d44aa00c56..e4afd313ac856d 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -600,7 +600,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) } else { - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + blkNode->gtBlkOpKind = + blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper; } } else diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index d89c8723e80f66..217dd69154778a 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -328,7 +328,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) } else { - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + blkNode->gtBlkOpKind = + blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper; } } else diff --git a/src/coreclr/jit/lowerriscv64.cpp b/src/coreclr/jit/lowerriscv64.cpp index 37848fc5909de3..be95a9a41ce760 100644 --- a/src/coreclr/jit/lowerriscv64.cpp +++ b/src/coreclr/jit/lowerriscv64.cpp @@ -282,7 +282,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) } else { - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + blkNode->gtBlkOpKind = + blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper; } } else diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 150ad04a55d99f..13681929f40793 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -375,10 +375,12 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) { TOO_BIG_TO_UNROLL: #ifdef TARGET_AMD64 - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; + blkNode->gtBlkOpKind = + blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper; #else // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86 - blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr; + blkNode->gtBlkOpKind = + blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindRepInstr; #endif } } diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index e72ff23df3375d..b87e0f80539fdc 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -631,6 +631,11 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) #endif // TARGET_ARM64 break; + case GenTreeBlk::BlkOpKindLoop: + // Needed for offsetReg + buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); + break; + case GenTreeBlk::BlkOpKindHelper: assert(!src->isContained()); dstAddrRegMask = RBM_ARG_0; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 8c9025f61b703a..22efbf53b82cb9 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -968,6 +968,7 @@ regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode) #endif case GenTreeBlk::BlkOpKindUnrollMemmove: case GenTreeBlk::BlkOpKindUnroll: + case GenTreeBlk::BlkOpKindLoop: case GenTreeBlk::BlkOpKindInvalid: // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE break; diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 7ab333630c046a..0312342a38914e 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -1114,6 +1114,11 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) } break; + case GenTreeBlk::BlkOpKindLoop: + // Needed for offsetReg + buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); + break; + case GenTreeBlk::BlkOpKindHelper: assert(!src->isContained()); dstAddrRegMask = RBM_ARG_0; diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 4be7a391d26a70..e8dcd633928b74 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -1129,6 +1129,11 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) } break; + case GenTreeBlk::BlkOpKindLoop: + // Needed for offsetReg + buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); + break; + case GenTreeBlk::BlkOpKindHelper: assert(!src->isContained()); dstAddrRegMask = RBM_ARG_0; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 5d54c08ebb7902..678aa6bd596bb9 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1406,6 +1406,11 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) sizeRegMask = RBM_RCX; break; + case GenTreeBlk::BlkOpKindLoop: + // Needed for offsetReg + buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); + break; + #ifdef TARGET_AMD64 case GenTreeBlk::BlkOpKindHelper: dstAddrRegMask = RBM_ARG_0; diff --git a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs new file mode 100644 index 00000000000000..8fcc19a6a502a5 --- /dev/null +++ b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs @@ -0,0 +1,30 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using Xunit; + +public class StructWithGC_Zeroing +{ + [Fact] + public static void StructZeroingShouldNotUseMemset() + { + var largeStructWithGc = new LargeStructWithGC(); + Test(ref largeStructWithGc); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Test(ref LargeStructWithGC s) + { + // X64-NOT: CORINFO_HELP_MEMSET + s = default; + } + + struct LargeStructWithGC + { + public byte x; + public string a; + public long b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, r, s, t, u, v, w, z; + } +} diff --git a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj new file mode 100644 index 00000000000000..30458a4a8ca2d3 --- /dev/null +++ b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj @@ -0,0 +1,14 @@ + + + true + + + None + True + + + + true + + + From 9c33870540c8ef47018b0feae2520b0f93905eb9 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 3 Dec 2023 02:08:35 +0100 Subject: [PATCH 02/22] Add asserts --- src/coreclr/jit/codegenloongarch64.cpp | 2 +- src/coreclr/jit/codegenriscv64.cpp | 2 +- src/coreclr/jit/gentree.h | 11 ++++++++++- src/coreclr/jit/lower.cpp | 9 +++++++++ 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 73d4c4467efcdb..1bdeda84b5e93e 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6383,7 +6383,7 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - // TODO: + // TODO: LoongArch64 impl } // Generate code for a load from some address + offset diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 4e1e04c23e1358..70252f8d31225c 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -5966,7 +5966,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - // TODO: + // TODO: RISC-V impl } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 7e07e958478da2..679d5f0b1e7332 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -7255,9 +7255,14 @@ struct GenTreeBlk : public GenTreeIndir bool gtBlkOpGcUnsafe; #endif + bool ContainsReferences() + { + return (m_layout != nullptr) && m_layout->HasGCPtr(); + } + bool IsOnHeapAndContainsReferences() { - return (m_layout != nullptr) && m_layout->HasGCPtr() && !Addr()->OperIs(GT_LCL_ADDR); + return ContainsReferences() && !Addr()->OperIs(GT_LCL_ADDR); } bool IsZeroingGcPointersOnHeap() @@ -7274,6 +7279,10 @@ struct GenTreeBlk : public GenTreeIndir GenTreeBlk(genTreeOps oper, var_types type, GenTree* addr, GenTree* data, ClassLayout* layout) : GenTreeIndir(oper, type, addr, data) { + if (data->IsIntegralConst(0)) + { + data->gtFlags |= GTF_DONT_CSE; + } Initialize(layout); } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 2e454e64c14eb1..2231a044b04cdb 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7973,6 +7973,15 @@ void Lowering::LowerBlockStoreCommon(GenTreeBlk* blkNode) { assert(blkNode->OperIs(GT_STORE_BLK, GT_STORE_DYN_BLK)); + if (blkNode->ContainsReferences() && !blkNode->OperIsCopyBlkOp()) + { + // Make sure we don't use GT_STORE_DYN_BLK + assert(blkNode->OperIs(GT_STORE_BLK)); + + // and we only zero it (and that zero is better to be not hoisted/CSE'd) + assert(blkNode->Data()->IsIntegralConst(0)); + } + // Lose the type information stored in the source - we no longer need it. if (blkNode->Data()->OperIs(GT_BLK)) { From ddc30062785488ee87cc319a6c5b88b949207a3f Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 3 Dec 2023 02:41:48 +0100 Subject: [PATCH 03/22] Add another test case --- src/coreclr/jit/gentree.h | 2 + .../JIT/opt/Structs/StructWithGC_Zeroing.cs | 40 +++++++++++++++---- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 679d5f0b1e7332..5a06113fa8d717 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -7242,7 +7242,9 @@ struct GenTreeBlk : public GenTreeIndir #ifdef TARGET_XARCH BlkOpKindCpObjRepInstr, #endif +#ifndef TARGET_X86 BlkOpKindHelper, +#endif #ifdef TARGET_XARCH BlkOpKindRepInstr, #endif diff --git a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs index 8fcc19a6a502a5..e5a2db11ec09fe 100644 --- a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs +++ b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs @@ -10,21 +10,47 @@ public class StructWithGC_Zeroing [Fact] public static void StructZeroingShouldNotUseMemset() { - var largeStructWithGc = new LargeStructWithGC(); - Test(ref largeStructWithGc); + LargeStructWithGC ls1 = default; + ls1.str = "hello1"; + ls1.z2 = long.MaxValue; + ZeroIt(ref ls1); + + LargeStructWithGC2 ls2 = default; + ls2.str = "hello2"; + ls2.z1 = long.MinValue; + ZeroIt(ref ls2); + + if (ls1.str != null || ls2.str != null || ls1.z2 != 0 || ls2.z1 != 0) + throw new InvalidOperationException("should be zeroed"); } [MethodImpl(MethodImplOptions.NoInlining)] - private static void Test(ref LargeStructWithGC s) + private static void ZeroIt(ref LargeStructWithGC s) { // X64-NOT: CORINFO_HELP_MEMSET + // ARM64-NOT: CORINFO_HELP_MEMSET s = default; } - struct LargeStructWithGC + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ZeroIt(ref LargeStructWithGC2 s) + { + // X64-NOT: CORINFO_HELP_MEMSET + // ARM64-NOT: CORINFO_HELP_MEMSET + s = default; + } + + struct LargeStructWithGC // 360 bytes (64-bit) + { + public string str; + public long b1, c1, d1, e1, f1, g1, h1, i1, j1, k1, l1, m1, n1, o1, p1, r1, s1, t1, u1, v1, w1, z1; + public long b2, c2, d2, e2, f2, g2, h2, i2, j2, k2, l2, m2, n2, o2, p2, r2, s2, t2, u2, v2, w2, z2; + } + + unsafe struct LargeStructWithGC2 // 4184 bytes (64-bit) { - public byte x; - public string a; - public long b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, r, s, t, u, v, w, z; + public fixed byte data[4000]; + public string str; + public long b1, c1, d1, e1, f1, g1, h1, i1, j1, k1, l1, m1, n1, o1, p1, r1, s1, t1, u1, v1, w1, z1; } } From 0a0c0f3b477934917782e580403edae2f56f1ec7 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 3 Dec 2023 03:22:54 +0100 Subject: [PATCH 04/22] Update StructWithGC_Zeroing.csproj --- src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj index 30458a4a8ca2d3..7aa59749804e49 100644 --- a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj +++ b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.csproj @@ -1,8 +1,7 @@ true - - + true None True From 203dc0486c1681e781df4d4534387dacf16ff095 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 3 Dec 2023 11:35:57 +0100 Subject: [PATCH 05/22] Update StructWithGC_Zeroing.cs --- src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs index e5a2db11ec09fe..0a8719a2430a3b 100644 --- a/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs +++ b/src/tests/JIT/opt/Structs/StructWithGC_Zeroing.cs @@ -18,7 +18,7 @@ public static void StructZeroingShouldNotUseMemset() LargeStructWithGC2 ls2 = default; ls2.str = "hello2"; ls2.z1 = long.MinValue; - ZeroIt(ref ls2); + ZeroIt2(ref ls2); if (ls1.str != null || ls2.str != null || ls1.z2 != 0 || ls2.z1 != 0) throw new InvalidOperationException("should be zeroed"); @@ -33,7 +33,7 @@ private static void ZeroIt(ref LargeStructWithGC s) } [MethodImpl(MethodImplOptions.NoInlining)] - private static void ZeroIt(ref LargeStructWithGC2 s) + private static void ZeroIt2(ref LargeStructWithGC2 s) { // X64-NOT: CORINFO_HELP_MEMSET // ARM64-NOT: CORINFO_HELP_MEMSET From 87f3171a57e4134f49b7d776d20294507d6b2f10 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 3 Dec 2023 12:46:09 +0100 Subject: [PATCH 06/22] clean up --- src/coreclr/jit/codegenarmarch.cpp | 7 +++-- src/coreclr/jit/codegenloongarch64.cpp | 39 ++++++++++++++++++++++++-- src/coreclr/jit/codegenriscv64.cpp | 39 ++++++++++++++++++++++++-- src/coreclr/jit/codegenxarch.cpp | 7 +++-- 4 files changed, 84 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 6e7a4750a3f0ba..51c8913899ba77 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3115,7 +3115,9 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) } //------------------------------------------------------------------------ -// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop. +// It's needed for cases when size is too big to unroll and we're not allowed +// to use memset call due to atomicity requirements. // // Arguments: // initBlkNode - the GT_STORE_BLK node @@ -3150,6 +3152,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + // The loop is reversed (it makes it smaller) + GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) { const regNumber offsetReg = initBlkNode->ExtractTempReg(); @@ -3168,7 +3172,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) #endif inst_JMP(EJ_ne, loop); } - GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 1bdeda84b5e93e..a559b5a0878383 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6376,14 +6376,49 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) } //------------------------------------------------------------------------ -// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop. +// It's needed for cases when size is too big to unroll and we're not allowed +// to use memset call due to atomicity requirements. // // Arguments: // initBlkNode - the GT_STORE_BLK node // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - // TODO: LoongArch64 impl + GenTree* const dstNode = initBlkNode->Addr(); + genConsumeReg(dstNode); + const regNumber dstReg = dstNode->GetRegNum(); + + GenTree* const zeroNode = initBlkNode->Data(); + genConsumeReg(zeroNode); + const regNumber zeroReg = zeroNode->GetRegNum(); + + if (initBlkNode->IsVolatile()) + { + // issue a full memory barrier before a volatile initBlock Operation + instGen_MemoryBarrier(); + } + + const unsigned size = initBlkNode->GetLayout()->GetSize(); + assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + + // The loop is reversed (it makes it smaller) + //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); + if (size > TARGET_POINTER_SIZE) + { + const regNumber offsetReg = initBlkNode->ExtractTempReg(); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); + + BasicBlock* loop = genCreateTempLabel(); + genDefineTempLabel(loop); + + // TODO: LoongArch64: + // + //// GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); + //// GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); + //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, offsetReg); + //// inst_JMP(EJ_ne, loop); + } } // Generate code for a load from some address + offset diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 70252f8d31225c..e49444461dd82c 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -5959,14 +5959,49 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) } //------------------------------------------------------------------------ -// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop. +// It's needed for cases when size is too big to unroll and we're not allowed +// to use memset call due to atomicity requirements. // // Arguments: // initBlkNode - the GT_STORE_BLK node // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - // TODO: RISC-V impl + GenTree* const dstNode = initBlkNode->Addr(); + genConsumeReg(dstNode); + const regNumber dstReg = dstNode->GetRegNum(); + + GenTree* const zeroNode = initBlkNode->Data(); + genConsumeReg(zeroNode); + const regNumber zeroReg = zeroNode->GetRegNum(); + + if (initBlkNode->IsVolatile()) + { + // issue a full memory barrier before a volatile initBlock Operation + instGen_MemoryBarrier(); + } + + const unsigned size = initBlkNode->GetLayout()->GetSize(); + assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + + // The loop is reversed (it makes it smaller) + //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); + if (size > TARGET_POINTER_SIZE) + { + const regNumber offsetReg = initBlkNode->ExtractTempReg(); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); + + BasicBlock* loop = genCreateTempLabel(); + genDefineTempLabel(loop); + + // TODO: RISC-V: + // + //// GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); + //// GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); + //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, offsetReg); + //// inst_JMP(EJ_ne, loop); + } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 562c862e99cf89..432a7f4fe4faa0 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3318,7 +3318,9 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) } //------------------------------------------------------------------------ -// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop +// genCodeForInitBlkLoop - Generate code for an InitBlk using an inlined for-loop. +// It's needed for cases when size is too big to unroll and we're not allowed +// to use memset call due to atomicity requirements. // // Arguments: // initBlkNode - the GT_STORE_BLK node @@ -3345,6 +3347,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + // The loop is reversed (it makes it smaller) + GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { const regNumber offsetReg = initBlkNode->ExtractTempReg(); @@ -3357,7 +3361,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); inst_JMP(EJ_jne, loop); } - GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, 0); } #ifdef TARGET_AMD64 From 31a5686475f9c9723913c2b71fe17723a30f66bf Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 3 Dec 2023 14:25:38 +0100 Subject: [PATCH 07/22] Fix arm32 --- src/coreclr/jit/codegenarmarch.cpp | 22 +++++++++++----------- src/coreclr/jit/codegenloongarch64.cpp | 15 ++++++++------- src/coreclr/jit/codegenriscv64.cpp | 15 ++++++++------- src/coreclr/jit/codegenxarch.cpp | 2 +- 4 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 51c8913899ba77..8ed38ad63cc757 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3124,17 +3124,16 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - GenTree* const dstNode = initBlkNode->Addr(); - genConsumeReg(dstNode); - const regNumber dstReg = dstNode->GetRegNum(); - -#ifdef TARGET_ARM64 - const regNumber zeroReg = REG_ZR; -#else + GenTree* const dstNode = initBlkNode->Addr(); GenTree* const zeroNode = initBlkNode->Data(); + + genConsumeReg(dstNode); genConsumeReg(zeroNode); + + const regNumber dstReg = dstNode->GetRegNum(); const regNumber zeroReg = zeroNode->GetRegNum(); -#endif + + // TODO-ARM64: mark initBlkNode->Data() as contained and use WZR/XZR if (initBlkNode->IsVolatile()) { @@ -3142,12 +3141,13 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) instGen_MemoryBarrier(); } + // mov zeroReg, wzr + // str zeroReg, [dstReg] // mov offsetReg, //.LOOP: - // str xzr, [dstReg, offsetReg] + // str zeroReg, [dstReg, offsetReg] // subs offsetReg, offsetReg, #8 // bne .LOOP - // str xzr, [dstReg] const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); @@ -3168,7 +3168,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) #else // There is no subs (sets flags) instruction on ARM32, so we use sub and cmp instead. GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); - GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, offsetReg); + GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, zeroReg); #endif inst_JMP(EJ_ne, loop); } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index a559b5a0878383..c984241f454103 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6385,12 +6385,13 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - GenTree* const dstNode = initBlkNode->Addr(); - genConsumeReg(dstNode); - const regNumber dstReg = dstNode->GetRegNum(); - + GenTree* const dstNode = initBlkNode->Addr(); GenTree* const zeroNode = initBlkNode->Data(); + + genConsumeReg(dstNode); genConsumeReg(zeroNode); + + const regNumber dstReg = dstNode->GetRegNum(); const regNumber zeroReg = zeroNode->GetRegNum(); if (initBlkNode->IsVolatile()) @@ -6402,6 +6403,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + //// TODO: implement on LoongArch64: + // The loop is reversed (it makes it smaller) //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) @@ -6412,11 +6415,9 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); - // TODO: LoongArch64: - // //// GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); //// GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); - //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, offsetReg); + //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, zeroReg); //// inst_JMP(EJ_ne, loop); } } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index e49444461dd82c..3629cdb002760c 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -5968,12 +5968,13 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - GenTree* const dstNode = initBlkNode->Addr(); - genConsumeReg(dstNode); - const regNumber dstReg = dstNode->GetRegNum(); - + GenTree* const dstNode = initBlkNode->Addr(); GenTree* const zeroNode = initBlkNode->Data(); + + genConsumeReg(dstNode); genConsumeReg(zeroNode); + + const regNumber dstReg = dstNode->GetRegNum(); const regNumber zeroReg = zeroNode->GetRegNum(); if (initBlkNode->IsVolatile()) @@ -5985,6 +5986,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); + //// TODO: implement on RISC-V: + // The loop is reversed (it makes it smaller) //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) @@ -5995,11 +5998,9 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); - // TODO: RISC-V: - // //// GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); //// GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); - //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, offsetReg); + //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, zeroReg); //// inst_JMP(EJ_ne, loop); } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 432a7f4fe4faa0..7bab48ea9350ca 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3337,12 +3337,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const regNumber zeroReg = zeroNode->GetRegNum(); // xor zeroReg, zeroReg + // mov qword ptr [dstReg], zeroReg // mov offsetReg, //.LOOP: // mov qword ptr [dstReg + offsetReg], zeroReg // sub offsetReg, 8 // jne .LOOP - // mov qword ptr [dstReg], zeroReg const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); From e4248d28c17341a144766483cc598516a8a4b238 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 5 Dec 2023 02:21:59 +0100 Subject: [PATCH 08/22] Address feedback --- src/coreclr/jit/codegenarmarch.cpp | 11 ++++++----- src/coreclr/jit/codegenloongarch64.cpp | 7 +++++-- src/coreclr/jit/codegenriscv64.cpp | 7 +++++-- src/coreclr/jit/codegenxarch.cpp | 7 +++++-- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 8ed38ad63cc757..374fb32e3893ed 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3152,11 +3152,14 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); - // The loop is reversed (it makes it smaller) + // The loop is reversed - it makes it smaller. + // Although, we zero the first pointer before the loop (the loop doesn't zero it) + // it works as a nullcheck, otherwise the first iteration would try to access + // "null + potentially large offset" and hit AV. GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) { - const regNumber offsetReg = initBlkNode->ExtractTempReg(); + const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); @@ -3166,9 +3169,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) #ifdef TARGET_ARM64 GetEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); #else - // There is no subs (sets flags) instruction on ARM32, so we use sub and cmp instead. - GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); - GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, zeroReg); + GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE, INS_FLAGS_SET); #endif inst_JMP(EJ_ne, loop); } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index c984241f454103..44619a72c38684 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6405,11 +6405,14 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) //// TODO: implement on LoongArch64: - // The loop is reversed (it makes it smaller) + // The loop is reversed - it makes it smaller. + // Although, we zero the first pointer before the loop (the loop doesn't zero it) + // it works as a nullcheck, otherwise the first iteration would try to access + // "null + potentially large offset" and hit AV. //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) { - const regNumber offsetReg = initBlkNode->ExtractTempReg(); + const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 3629cdb002760c..cbf817172bca77 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -5988,11 +5988,14 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) //// TODO: implement on RISC-V: - // The loop is reversed (it makes it smaller) + // The loop is reversed - it makes it smaller. + // Although, we zero the first pointer before the loop (the loop doesn't zero it) + // it works as a nullcheck, otherwise the first iteration would try to access + // "null + potentially large offset" and hit AV. //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) { - const regNumber offsetReg = initBlkNode->ExtractTempReg(); + const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 7bab48ea9350ca..1af92b2d9644dc 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3347,11 +3347,14 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); - // The loop is reversed (it makes it smaller) + // The loop is reversed - it makes it smaller. + // Although, we zero the first pointer before the loop (the loop doesn't zero it) + // it works as a nullcheck, otherwise the first iteration would try to access + // "null + potentially large offset" and hit AV. GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - const regNumber offsetReg = initBlkNode->ExtractTempReg(); + const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); From 548d5ed7fb1103225bf3b6f4a638715efd061044 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 5 Dec 2023 15:03:54 +0100 Subject: [PATCH 09/22] Add a small note --- docs/design/coreclr/botr/clr-abi.md | 44 +++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index 825ad5820a920e..9141613db0c0da 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -205,7 +205,7 @@ For non-rude thread abort, the VM walks the stack, running any catch handler tha For example: -``` +```cs try { // try 1 try { // try 2 System.Threading.Thread.CurrentThread.Abort(); @@ -221,7 +221,7 @@ L: In this case, if the address returned in catch 2 corresponding to label L is outside try 1, then the ThreadAbortException re-raised by the VM will not be caught by catch 1, as is expected. The JIT needs to insert a block such that this is the effective code generation: -``` +```cs try { // try 1 try { // try 2 System.Threading.Thread.CurrentThread.Abort(); @@ -238,7 +238,7 @@ L: Similarly, the automatic re-raise address for a ThreadAbortException can't be within a finally handler, or the VM will abort the re-raise and swallow the exception. This can happen due to call-to-finally thunks marked as "cloned finally", as described above. For example (this is pseudo-assembly-code, not C#): -``` +```cs try { // try 1 try { // try 2 System.Threading.Thread.CurrentThread.Abort(); @@ -254,7 +254,7 @@ L: This would generate something like: -``` +```asm // beginning of 'try 1' // beginning of 'try 2' System.Threading.Thread.CurrentThread.Abort(); @@ -279,7 +279,7 @@ Finally1: Note that the JIT must already insert a "step" block so the finally will be called. However, this isn't sufficient to support ThreadAbortException processing, because "L1" is marked as "cloned finally". In this case, the JIT must insert another step block that is within "try 1" but outside the cloned finally block, that will allow for correct re-raise semantics. For example: -``` +```asm // beginning of 'try 1' // beginning of 'try 2' System.Threading.Thread.CurrentThread.Abort(); @@ -397,7 +397,7 @@ To implement this requirement, for any function with EH, we create a frame-local Note that the since a slot on x86 is 4 bytes, the minimum size is 16 bytes. The idea is to have 1 slot for each handler that could be possibly be invoked at the same time. For example, for: -``` +```cs try { ... } catch { @@ -417,7 +417,7 @@ When calling a finally, we set the appropriate level to 0xFC (aka "finally call" Thus, calling a finally from JIT generated code looks like: -``` +```asm mov dword ptr [L_02+0x4 ebp-10H], 0 // This must happen before the 0xFC is written mov dword ptr [L_02+0x8 ebp-0CH], 252 // 0xFC push G_M52300_IG07 @@ -428,7 +428,7 @@ In this case, `G_M52300_IG07` is not the address after the 'jmp', so a simple 'c The code this finally returns to looks like this: -``` +```asm mov dword ptr [L_02+0x8 ebp-0CH], 0 jmp SHORT G_M52300_IG05 ``` @@ -477,7 +477,7 @@ Because a main function body will **always** be on the stack when one of its fun There is one "corner case" in the VM implementation of WantsReportOnlyLeaf model that has implications for the code the JIT is allowed to generate. Consider this function with nested exception handling: -``` +```cs public void runtest() { try { try { @@ -804,3 +804,29 @@ In addition to the usual registers it also preserves all float registers and `rc `CORINFO_HELP_DISPATCH_INDIRECT_CALL` takes the call address in `rax` and it reserves the right to use and trash `r10` and `r11`. The JIT uses the dispatch helper on x64 whenever possible as it is expected that the code size benefits outweighs the less accurate branch prediction. However, note that the use of `r11` in the dispatcher makes it incompatible with VSD calls where the JIT must fall back to the validator and a manual call. + +# Notes on Memset/Memcpy + +Generally, `memset` and `memcpy` do not provide any guarantees of atomicity. This implies that they should only be used when the memory being modified by `memset`/`memcpy` is not observable by any other thread (including GC), or when there are no atomicity requirements according to our [Memory Model](../../specs/Memory-model.md). It's especially important when we modify heap containing managed pointers - those must be updated atomically, e.g. using pointer-sized `mov` instruction (managed pointers are always aligned) - see [Atomic Memory Access](../../specs/Memory-model.md#Atomic-memory-accesses). It's worth noting that by "update" it's implied "set to zero", otherwise, we need a write barrier. + +Examples: + +```cs +struct MyStruct +{ + long a; + string b; +} + +void Test1(ref MyStruct m) +{ + // We're not allowed to use memset here + m = default; +} + +MyStruct Test2() +{ + // We can use memset here + return default; +} +``` \ No newline at end of file From d46d9dc6f3ab585266467d61fe375c7236b5f242 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 6 Dec 2023 02:23:39 +0100 Subject: [PATCH 10/22] Add RISC-V and LA64 --- src/coreclr/jit/codegenarmarch.cpp | 2 ++ src/coreclr/jit/codegenloongarch64.cpp | 21 +++++++++++++-------- src/coreclr/jit/codegenriscv64.cpp | 21 +++++++++++++-------- src/coreclr/jit/codegenxarch.cpp | 2 ++ src/coreclr/jit/lsrariscv64.cpp | 2 ++ 5 files changed, 32 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 374fb32e3893ed..a9178eea3ac267 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3164,6 +3164,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); + GetEmitter()->emitDisableGC(); GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); #ifdef TARGET_ARM64 @@ -3172,6 +3173,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE, INS_FLAGS_SET); #endif inst_JMP(EJ_ne, loop); + GetEmitter()->emitEnableGC(); } } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 44619a72c38684..aebbb05051d41d 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6403,25 +6403,30 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); - //// TODO: implement on LoongArch64: - // The loop is reversed - it makes it smaller. // Although, we zero the first pointer before the loop (the loop doesn't zero it) // it works as a nullcheck, otherwise the first iteration would try to access // "null + potentially large offset" and hit AV. - //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - const regNumber offsetReg = initBlkNode->GetSingleTempReg(); + const regNumber offsetReg = initBlkNode->ExtractTempReg(); + const regNumber tempReg = initBlkNode->ExtractTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); + GetEmitter()->emitDisableGC(); - //// GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); - //// GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); - //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, zeroReg); - //// inst_JMP(EJ_ne, loop); + // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) + GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, tempReg, dstReg, offsetReg); + // *tempReg = 0 + GetEmitter()->emitIns_R_R_R(INS_st_d, EA_PTRSIZE, zeroReg, tempReg, 0); + // offsetReg = offsetReg - 8 + GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, offsetReg, offsetReg, -8); + // if (offsetReg != 0) goto loop; + GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, zeroReg); + GetEmitter()->emitEnableGC(); } } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index cbf817172bca77..394c668b6113d8 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -5986,25 +5986,30 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const unsigned size = initBlkNode->GetLayout()->GetSize(); assert((size >= TARGET_POINTER_SIZE) && ((size % TARGET_POINTER_SIZE) == 0)); - //// TODO: implement on RISC-V: - // The loop is reversed - it makes it smaller. // Although, we zero the first pointer before the loop (the loop doesn't zero it) // it works as a nullcheck, otherwise the first iteration would try to access // "null + potentially large offset" and hit AV. - //// GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - const regNumber offsetReg = initBlkNode->GetSingleTempReg(); + const regNumber offsetReg = initBlkNode->ExtractTempReg(); + const regNumber tempReg = initBlkNode->ExtractTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); + GetEmitter()->emitDisableGC(); - //// GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); - //// GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE); - //// GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, offsetReg, zeroReg); - //// inst_JMP(EJ_ne, loop); + // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) + GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, tempReg, dstReg, offsetReg); + // *tempReg = 0 + GetEmitter()->emitIns_R_R_R(INS_sd, EA_PTRSIZE, zeroReg, tempReg, 0); + // offsetReg = offsetReg - 8 + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, offsetReg, offsetReg, -8); + // if (offsetReg != 0) goto loop; + GetEmitter()->emitIns_J(INS_bnez, loop, offsetReg); + GetEmitter()->emitEnableGC(); } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 1af92b2d9644dc..bb422886a221b7 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3359,10 +3359,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); + GetEmitter()->emitDisableGC(); GetEmitter()->emitIns_ARX_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, offsetReg, 1, 0); GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); inst_JMP(EJ_jne, loop); + GetEmitter()->emitEnableGC(); } } diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index e8dcd633928b74..cfa574bee4d66e 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -1132,6 +1132,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) case GenTreeBlk::BlkOpKindLoop: // Needed for offsetReg buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); + // Needed for tempReg + buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); break; case GenTreeBlk::BlkOpKindHelper: From 259587c5c917d578c0d35f33c13f3234ac7295e6 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 6 Dec 2023 14:04:55 +0100 Subject: [PATCH 11/22] Address feedback --- src/coreclr/jit/codegenarmarch.cpp | 5 +++-- src/coreclr/jit/codegenloongarch64.cpp | 5 +++-- src/coreclr/jit/codegenriscv64.cpp | 5 +++-- src/coreclr/jit/codegenxarch.cpp | 5 +++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index a9178eea3ac267..58cff2744f1f9b 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3159,12 +3159,14 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); - GetEmitter()->emitDisableGC(); GetEmitter()->emitIns_R_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg, offsetReg); #ifdef TARGET_ARM64 @@ -3173,7 +3175,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE, INS_FLAGS_SET); #endif inst_JMP(EJ_ne, loop); - GetEmitter()->emitEnableGC(); } } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index aebbb05051d41d..d32e90f2d31be5 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6410,13 +6410,15 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const regNumber offsetReg = initBlkNode->ExtractTempReg(); const regNumber tempReg = initBlkNode->ExtractTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); - GetEmitter()->emitDisableGC(); // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, tempReg, dstReg, offsetReg); @@ -6426,7 +6428,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, zeroReg); - GetEmitter()->emitEnableGC(); } } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 394c668b6113d8..12fb3474a244c4 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -5993,13 +5993,15 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const regNumber offsetReg = initBlkNode->ExtractTempReg(); const regNumber tempReg = initBlkNode->ExtractTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); - GetEmitter()->emitDisableGC(); // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, tempReg, dstReg, offsetReg); @@ -6009,7 +6011,6 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J(INS_bnez, loop, offsetReg); - GetEmitter()->emitEnableGC(); } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index bb422886a221b7..7d27de2021c0df 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3354,17 +3354,18 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); - GetEmitter()->emitDisableGC(); GetEmitter()->emitIns_ARX_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, offsetReg, 1, 0); GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); inst_JMP(EJ_jne, loop); - GetEmitter()->emitEnableGC(); } } From 9314474f5fdd2c29ecf96255f97c299ad9866cb4 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 6 Dec 2023 15:36:48 +0100 Subject: [PATCH 12/22] fix build --- src/coreclr/jit/codegenarmarch.cpp | 14 ++++++++++++-- src/coreclr/jit/codegenloongarch64.cpp | 14 ++++++++++++-- src/coreclr/jit/codegenriscv64.cpp | 14 ++++++++++++-- src/coreclr/jit/codegenxarch.cpp | 14 ++++++++++++-- 4 files changed, 48 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 58cff2744f1f9b..dfc52781d09257 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3159,8 +3159,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; + if (dstDies) + { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + } const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); @@ -3175,6 +3179,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, offsetReg, offsetReg, TARGET_POINTER_SIZE, INS_FLAGS_SET); #endif inst_JMP(EJ_ne, loop); + + if (dstDies) + { + regSet.RemoveMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegSetNpt(dstReg); + } } } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index d32e90f2d31be5..7b105afd80d551 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6410,8 +6410,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; + if (dstDies) + { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + } const regNumber offsetReg = initBlkNode->ExtractTempReg(); const regNumber tempReg = initBlkNode->ExtractTempReg(); @@ -6428,6 +6432,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, zeroReg); + + if (dstDies) + { + regSet.RemoveMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegSetNpt(dstReg); + } } } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 12fb3474a244c4..9d8b8947cc1b53 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -5993,8 +5993,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; + if (dstDies) + { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + } const regNumber offsetReg = initBlkNode->ExtractTempReg(); const regNumber tempReg = initBlkNode->ExtractTempReg(); @@ -6011,6 +6015,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J(INS_bnez, loop, offsetReg); + + if (dstDies) + { + regSet.RemoveMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegSetNpt(dstReg); + } } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 7d27de2021c0df..9c859a50ca1326 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3354,8 +3354,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; + if (dstDies) + { + regSet.AddMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); + } const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); @@ -3366,6 +3370,12 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_ARX_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, offsetReg, 1, 0); GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); inst_JMP(EJ_jne, loop); + + if (dstDies) + { + regSet.RemoveMaskVars(genRegMask(dstReg)); + gcInfo.gcMarkRegSetNpt(dstReg); + } } } From a966fe60635da50695bb675f8024015cf26a89d7 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 6 Dec 2023 17:16:16 +0100 Subject: [PATCH 13/22] fix build --- src/coreclr/jit/codegenarmarch.cpp | 14 +++----------- src/coreclr/jit/codegenloongarch64.cpp | 14 +++----------- src/coreclr/jit/codegenriscv64.cpp | 14 +++----------- src/coreclr/jit/codegenxarch.cpp | 14 +++----------- 4 files changed, 12 insertions(+), 44 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index dfc52781d09257..9ea6cded486589 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3159,12 +3159,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R(INS_str, EA_PTRSIZE, zeroReg, dstReg); if (size > TARGET_POINTER_SIZE) { - const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; - if (dstDies) - { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); - } + // Extend liveness of dstReg in case if it gets killed by the store. + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); @@ -3180,11 +3176,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) #endif inst_JMP(EJ_ne, loop); - if (dstDies) - { - regSet.RemoveMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegSetNpt(dstReg); - } + gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); } } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 7b105afd80d551..97353aeb6e3fc3 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6410,12 +6410,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; - if (dstDies) - { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); - } + // Extend liveness of dstReg in case if it gets killed by the store. + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); const regNumber offsetReg = initBlkNode->ExtractTempReg(); const regNumber tempReg = initBlkNode->ExtractTempReg(); @@ -6433,11 +6429,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, zeroReg); - if (dstDies) - { - regSet.RemoveMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegSetNpt(dstReg); - } + gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); } } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 9d8b8947cc1b53..74e01544cdf792 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -5993,12 +5993,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; - if (dstDies) - { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); - } + // Extend liveness of dstReg in case if it gets killed by the store. + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); const regNumber offsetReg = initBlkNode->ExtractTempReg(); const regNumber tempReg = initBlkNode->ExtractTempReg(); @@ -6016,11 +6012,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J(INS_bnez, loop, offsetReg); - if (dstDies) - { - regSet.RemoveMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegSetNpt(dstReg); - } + gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 9c859a50ca1326..8fdc43343c32c2 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3354,12 +3354,8 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, zeroReg, dstReg, 0); if (size > TARGET_POINTER_SIZE) { - const bool dstDies = (dstNode->gtFlags & GTF_VAR_DEATH) != 0; - if (dstDies) - { - regSet.AddMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); - } + // Extend liveness of dstReg in case if it gets killed by the store. + gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); @@ -3371,11 +3367,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); inst_JMP(EJ_jne, loop); - if (dstDies) - { - regSet.RemoveMaskVars(genRegMask(dstReg)); - gcInfo.gcMarkRegSetNpt(dstReg); - } + gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); } } From 5dd94c3b1b3f587b5f232e4312d310a2956d5525 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 6 Dec 2023 19:28:40 +0100 Subject: [PATCH 14/22] CI test --- src/coreclr/jit/codegenriscv64.cpp | 2 +- src/coreclr/jit/lowerarmarch.cpp | 8 ++++++++ src/coreclr/jit/lowerxarch.cpp | 8 ++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 74e01544cdf792..07a47d1cf045c1 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6006,7 +6006,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, tempReg, dstReg, offsetReg); // *tempReg = 0 - GetEmitter()->emitIns_R_R_R(INS_sd, EA_PTRSIZE, zeroReg, tempReg, 0); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, zeroReg, tempReg, 0); // offsetReg = offsetReg - 8 GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index e4afd313ac856d..17adfbf7f2999d 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -560,6 +560,14 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) src = src->AsUnOp()->gtGetOp1(); } + // CI Test - use BlkOpKindLoop for more cases + // TODO: enable only under jitstress + if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0)) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; + return; + } + if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset)) && src->OperIs(GT_CNS_INT)) { diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 13681929f40793..e51f3efebf3ba4 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -316,6 +316,14 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) src = src->AsUnOp()->gtGetOp1(); } + // CI Test - use BlkOpKindLoop for more cases + // TODO: enable only under jitstress + if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0)) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; + return; + } + if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset))) { if (!src->OperIs(GT_CNS_INT)) From cd8686438e17d53e6e7e1b75b0cb8e1d9b622bff Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 6 Dec 2023 20:53:36 +0100 Subject: [PATCH 15/22] Fix build --- src/coreclr/jit/lowerarmarch.cpp | 15 ++++++++------- src/coreclr/jit/lowerxarch.cpp | 15 ++++++++------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 17adfbf7f2999d..23bf9b13a12794 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -554,20 +554,21 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) if (blkNode->OperIsInitBlkOp()) { - if (src->OperIs(GT_INIT_VAL)) - { - src->SetContained(); - src = src->AsUnOp()->gtGetOp1(); - } - // CI Test - use BlkOpKindLoop for more cases // TODO: enable only under jitstress - if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0)) + if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0) && + src->IsIntegralConst(0)) { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; return; } + if (src->OperIs(GT_INIT_VAL)) + { + src->SetContained(); + src = src->AsUnOp()->gtGetOp1(); + } + if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset)) && src->OperIs(GT_CNS_INT)) { diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index e51f3efebf3ba4..eeb2986c3a99f1 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -310,20 +310,21 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) if (blkNode->OperIsInitBlkOp()) { - if (src->OperIs(GT_INIT_VAL)) - { - src->SetContained(); - src = src->AsUnOp()->gtGetOp1(); - } - // CI Test - use BlkOpKindLoop for more cases // TODO: enable only under jitstress - if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0)) + if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0) && + src->IsIntegralConst(0)) { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; return; } + if (src->OperIs(GT_INIT_VAL)) + { + src->SetContained(); + src = src->AsUnOp()->gtGetOp1(); + } + if (!blkNode->OperIs(GT_STORE_DYN_BLK) && (size <= comp->getUnrollThreshold(Compiler::UnrollKind::Memset))) { if (!src->OperIs(GT_CNS_INT)) From 0afee16ec028e3750562fb37b96165238c0918aa Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 6 Dec 2023 23:51:29 +0100 Subject: [PATCH 16/22] Clean up --- src/coreclr/jit/codegenarmarch.cpp | 2 +- src/coreclr/jit/codegenloongarch64.cpp | 4 +++- src/coreclr/jit/codegenriscv64.cpp | 4 +++- src/coreclr/jit/codegenxarch.cpp | 2 +- src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/lowerarmarch.cpp | 9 +++++---- src/coreclr/jit/lowerxarch.cpp | 9 +++++---- 7 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 9ea6cded486589..2d4a529e32e2b1 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3176,7 +3176,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) #endif inst_JMP(EJ_ne, loop); - gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); + gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); } } diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 97353aeb6e3fc3..55b8012a43be7c 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6419,6 +6419,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); + GetEmitter()->emitDisableGC(); // TODO: add gcinfo to tempReg and remove nogc // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, tempReg, dstReg, offsetReg); @@ -6428,8 +6429,9 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, zeroReg); + GetEmitter()->emitEnableGC(); - gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); + gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); } } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 07a47d1cf045c1..9937a4785095d0 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -6002,6 +6002,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); + GetEmitter()->emitDisableGC(); // TODO: add gcinfo to tempReg and remove nogc // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, tempReg, dstReg, offsetReg); @@ -6011,8 +6012,9 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J(INS_bnez, loop, offsetReg); + GetEmitter()->emitEnableGC(); - gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); + gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); } } diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 8fdc43343c32c2..12764affdd92e4 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3367,7 +3367,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, offsetReg, TARGET_POINTER_SIZE); inst_JMP(EJ_jne, loop); - gcInfo.gcMarkRegSetNpt(dstNode->gtGetRegMask()); + gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); } } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8339d6d274f4f5..dd23c482559155 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9905,6 +9905,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX STRESS_MODE(IF_CONVERSION_COST) \ STRESS_MODE(IF_CONVERSION_INNER_LOOPS) \ STRESS_MODE(POISON_IMPLICIT_BYREFS) \ + STRESS_MODE(STORE_BLOCK_UNROLLING) \ STRESS_MODE(COUNT) enum compStressArea diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 23bf9b13a12794..6f2828111ddeb6 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -554,14 +554,15 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) if (blkNode->OperIsInitBlkOp()) { - // CI Test - use BlkOpKindLoop for more cases - // TODO: enable only under jitstress - if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0) && - src->IsIntegralConst(0)) +#ifdef DEBUG + // Use BlkOpKindLoop for more cases under stress mode + if (comp->compStressCompile(Compiler::STRESS_STORE_BLOCK_UNROLLING, 50) && blkNode->OperIs(GT_STORE_BLK) && + ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0) && src->IsIntegralConst(0)) { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; return; } +#endif if (src->OperIs(GT_INIT_VAL)) { diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index eeb2986c3a99f1..f9766fe8e7b345 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -310,14 +310,15 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) if (blkNode->OperIsInitBlkOp()) { - // CI Test - use BlkOpKindLoop for more cases - // TODO: enable only under jitstress - if (blkNode->OperIs(GT_STORE_BLK) && ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0) && - src->IsIntegralConst(0)) +#ifdef DEBUG + // Use BlkOpKindLoop for more cases under stress mode + if (comp->compStressCompile(Compiler::STRESS_STORE_BLOCK_UNROLLING, 50) && blkNode->OperIs(GT_STORE_BLK) && + ((blkNode->GetLayout()->GetSize() % TARGET_POINTER_SIZE) == 0) && src->IsIntegralConst(0)) { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; return; } +#endif if (src->OperIs(GT_INIT_VAL)) { From c9510a9595daa0828b76776e0112f53b34b4c64d Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 7 Dec 2023 20:45:36 +0100 Subject: [PATCH 17/22] Apply suggestion --- src/coreclr/jit/codegenloongarch64.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 55b8012a43be7c..d5a76b4956e265 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6413,23 +6413,18 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // Extend liveness of dstReg in case if it gets killed by the store. gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); - const regNumber offsetReg = initBlkNode->ExtractTempReg(); - const regNumber tempReg = initBlkNode->ExtractTempReg(); + const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); - GetEmitter()->emitDisableGC(); // TODO: add gcinfo to tempReg and remove nogc - // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) - GetEmitter()->emitIns_R_R_R(INS_add_d, EA_PTRSIZE, tempReg, dstReg, offsetReg); - // *tempReg = 0 - GetEmitter()->emitIns_R_R_R(INS_st_d, EA_PTRSIZE, zeroReg, tempReg, 0); + // *(dstReg + offsetReg) = 0 + GetEmitter()->emitIns_R_R_R(INS_stx_d, EA_PTRSIZE, zeroReg, dstReg, offsetReg); // offsetReg = offsetReg - 8 GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, zeroReg); - GetEmitter()->emitEnableGC(); gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); } From 091320247d866cc0bbd7912a1c17b0e32f4f1496 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 8 Dec 2023 12:37:57 +0100 Subject: [PATCH 18/22] Use REG_R0 on RISC-V and LA64, use ZR on ARM64 --- src/coreclr/jit/codegenarmarch.cpp | 13 ++++++++----- src/coreclr/jit/codegenloongarch64.cpp | 15 +++++---------- src/coreclr/jit/codegenriscv64.cpp | 13 ++++--------- src/coreclr/jit/lowerarmarch.cpp | 12 ++++++++++-- src/coreclr/jit/lowerloongarch64.cpp | 9 +++++++-- src/coreclr/jit/lowerriscv64.cpp | 9 +++++++-- 6 files changed, 41 insertions(+), 30 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 2d4a529e32e2b1..e5dc029b6390f3 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3124,14 +3124,17 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - GenTree* const dstNode = initBlkNode->Addr(); - GenTree* const zeroNode = initBlkNode->Data(); - + GenTree* const dstNode = initBlkNode->Addr(); genConsumeReg(dstNode); - genConsumeReg(zeroNode); + const regNumber dstReg = dstNode->GetRegNum(); - const regNumber dstReg = dstNode->GetRegNum(); +#ifdef TARGET_ARM64 + GenTree* const zeroNode = initBlkNode->Data(); + genConsumeReg(zeroNode); const regNumber zeroReg = zeroNode->GetRegNum(); +#else + const regNumber zeroReg = REG_ZR; +#endif // TODO-ARM64: mark initBlkNode->Data() as contained and use WZR/XZR diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index d5a76b4956e265..b413b61172a923 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6385,14 +6385,9 @@ void CodeGen::genCodeForInitBlkHelper(GenTreeBlk* initBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - GenTree* const dstNode = initBlkNode->Addr(); - GenTree* const zeroNode = initBlkNode->Data(); - + GenTree* const dstNode = initBlkNode->Addr(); genConsumeReg(dstNode); - genConsumeReg(zeroNode); - - const regNumber dstReg = dstNode->GetRegNum(); - const regNumber zeroReg = zeroNode->GetRegNum(); + const regNumber dstReg = dstNode->GetRegNum(); if (initBlkNode->IsVolatile()) { @@ -6407,7 +6402,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // Although, we zero the first pointer before the loop (the loop doesn't zero it) // it works as a nullcheck, otherwise the first iteration would try to access // "null + potentially large offset" and hit AV. - GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, zeroReg, dstReg, 0); + GetEmitter()->emitIns_R_R_I(INS_st_d, EA_PTRSIZE, REG_R0, dstReg, 0); if (size > TARGET_POINTER_SIZE) { // Extend liveness of dstReg in case if it gets killed by the store. @@ -6420,11 +6415,11 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) genDefineTempLabel(loop); // *(dstReg + offsetReg) = 0 - GetEmitter()->emitIns_R_R_R(INS_stx_d, EA_PTRSIZE, zeroReg, dstReg, offsetReg); + GetEmitter()->emitIns_R_R_R(INS_stx_d, EA_PTRSIZE, REG_R0, dstReg, offsetReg); // offsetReg = offsetReg - 8 GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; - GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, zeroReg); + GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, REG_R0); gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 9937a4785095d0..6693eb387ded2f 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -5968,14 +5968,9 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode) // void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) { - GenTree* const dstNode = initBlkNode->Addr(); - GenTree* const zeroNode = initBlkNode->Data(); - + GenTree* const dstNode = initBlkNode->Addr(); genConsumeReg(dstNode); - genConsumeReg(zeroNode); - - const regNumber dstReg = dstNode->GetRegNum(); - const regNumber zeroReg = zeroNode->GetRegNum(); + const regNumber dstReg = dstNode->GetRegNum(); if (initBlkNode->IsVolatile()) { @@ -5990,7 +5985,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // Although, we zero the first pointer before the loop (the loop doesn't zero it) // it works as a nullcheck, otherwise the first iteration would try to access // "null + potentially large offset" and hit AV. - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, zeroReg, dstReg, 0); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, dstReg, 0); if (size > TARGET_POINTER_SIZE) { // Extend liveness of dstReg in case if it gets killed by the store. @@ -6007,7 +6002,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, tempReg, dstReg, offsetReg); // *tempReg = 0 - GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, zeroReg, tempReg, 0); + GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, tempReg, 0); // offsetReg = offsetReg - 8 GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 6f2828111ddeb6..c58456940bf5e1 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -608,10 +608,18 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) ContainBlockStoreAddress(blkNode, size, dstAddr, nullptr); } + else if (blkNode->IsZeroingGcPointersOnHeap()) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; +#ifdef TARGET_ARM64 + // On ARM64 we can just use REG_ZR instead of having to load + // the constant into a real register like on ARM32. + src->SetContained(); +#endif + } else { - blkNode->gtBlkOpKind = - blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper; + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } } else diff --git a/src/coreclr/jit/lowerloongarch64.cpp b/src/coreclr/jit/lowerloongarch64.cpp index 217dd69154778a..d522a85ab0f966 100644 --- a/src/coreclr/jit/lowerloongarch64.cpp +++ b/src/coreclr/jit/lowerloongarch64.cpp @@ -326,10 +326,15 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) ContainBlockStoreAddress(blkNode, size, dstAddr, nullptr); } + else if (blkNode->IsZeroingGcPointersOnHeap()) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; + // We're going to use REG_R0 for zero + src->SetContained(); + } else { - blkNode->gtBlkOpKind = - blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper; + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } } else diff --git a/src/coreclr/jit/lowerriscv64.cpp b/src/coreclr/jit/lowerriscv64.cpp index be95a9a41ce760..0c0f0aafc34bcc 100644 --- a/src/coreclr/jit/lowerriscv64.cpp +++ b/src/coreclr/jit/lowerriscv64.cpp @@ -280,10 +280,15 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) ContainBlockStoreAddress(blkNode, size, dstAddr, nullptr); } + else if (blkNode->IsZeroingGcPointersOnHeap()) + { + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindLoop; + // We're going to use REG_R0 for zero + src->SetContained(); + } else { - blkNode->gtBlkOpKind = - blkNode->IsZeroingGcPointersOnHeap() ? GenTreeBlk::BlkOpKindLoop : GenTreeBlk::BlkOpKindHelper; + blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper; } } else From ebadd9d48241fa3fcebea3f51463c99d3982d672 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 8 Dec 2023 15:01:38 +0100 Subject: [PATCH 19/22] fix build --- src/coreclr/jit/codegenarmarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index e5dc029b6390f3..36ed015cb6cb3b 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3128,7 +3128,7 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) genConsumeReg(dstNode); const regNumber dstReg = dstNode->GetRegNum(); -#ifdef TARGET_ARM64 +#ifndef TARGET_ARM64 GenTree* const zeroNode = initBlkNode->Data(); genConsumeReg(zeroNode); const regNumber zeroReg = zeroNode->GetRegNum(); From b59f27e3ec00830139f24a4cd3ae630be0005318 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Fri, 8 Dec 2023 19:54:28 +0100 Subject: [PATCH 20/22] Clean up --- src/coreclr/jit/codegenarmarch.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 36ed015cb6cb3b..562dad738bd3e1 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -3136,19 +3136,16 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const regNumber zeroReg = REG_ZR; #endif - // TODO-ARM64: mark initBlkNode->Data() as contained and use WZR/XZR - if (initBlkNode->IsVolatile()) { // issue a full memory barrier before a volatile initBlock Operation instGen_MemoryBarrier(); } - // mov zeroReg, wzr - // str zeroReg, [dstReg] + // str xzr, [dstReg] // mov offsetReg, //.LOOP: - // str zeroReg, [dstReg, offsetReg] + // str xzr, [dstReg, offsetReg] // subs offsetReg, offsetReg, #8 // bne .LOOP From 8425ccc04c691affb585ed8c9dc29b577953275d Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Thu, 4 Jan 2024 15:27:49 +0100 Subject: [PATCH 21/22] Update src/coreclr/jit/codegenloongarch64.cpp Co-authored-by: Qiao Pengcheng --- src/coreclr/jit/codegenloongarch64.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index b413b61172a923..2f7444e0825ae4 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -6411,15 +6411,13 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) const regNumber offsetReg = initBlkNode->GetSingleTempReg(); instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); - BasicBlock* loop = genCreateTempLabel(); - genDefineTempLabel(loop); - + // loop begin: // *(dstReg + offsetReg) = 0 GetEmitter()->emitIns_R_R_R(INS_stx_d, EA_PTRSIZE, REG_R0, dstReg, offsetReg); // offsetReg = offsetReg - 8 GetEmitter()->emitIns_R_R_I(INS_addi_d, EA_PTRSIZE, offsetReg, offsetReg, -8); // if (offsetReg != 0) goto loop; - GetEmitter()->emitIns_J_cond_la(INS_beq, loop, offsetReg, REG_R0); + GetEmitter()->emitIns_R_I(INS_bnez, EA_8BYTE, offsetReg, -2 << 2); gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); } From db079df7e6aa26794a125101c944e72095b7d70a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Thu, 4 Jan 2024 20:07:24 +0100 Subject: [PATCH 22/22] Apply suggestions for risc-v --- src/coreclr/jit/codegenriscv64.cpp | 18 +++++++++--------- src/coreclr/jit/lsrariscv64.cpp | 2 -- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 6693eb387ded2f..df5b47554d8b95 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -5991,22 +5991,22 @@ void CodeGen::genCodeForInitBlkLoop(GenTreeBlk* initBlkNode) // Extend liveness of dstReg in case if it gets killed by the store. gcInfo.gcMarkRegPtrVal(dstReg, dstNode->TypeGet()); - const regNumber offsetReg = initBlkNode->ExtractTempReg(); - const regNumber tempReg = initBlkNode->ExtractTempReg(); - instGen_Set_Reg_To_Imm(EA_PTRSIZE, offsetReg, size - TARGET_POINTER_SIZE); + const regNumber tempReg = initBlkNode->GetSingleTempReg(); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, tempReg, size - TARGET_POINTER_SIZE); + + // tempReg = dstReg + tempReg (a new interior pointer, but in a nongc region) + GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, tempReg, dstReg, tempReg); BasicBlock* loop = genCreateTempLabel(); genDefineTempLabel(loop); GetEmitter()->emitDisableGC(); // TODO: add gcinfo to tempReg and remove nogc - // tempReg = dstReg + offset (a new interior pointer, but in a nongc region) - GetEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, tempReg, dstReg, offsetReg); // *tempReg = 0 GetEmitter()->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, tempReg, 0); - // offsetReg = offsetReg - 8 - GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, offsetReg, offsetReg, -8); - // if (offsetReg != 0) goto loop; - GetEmitter()->emitIns_J(INS_bnez, loop, offsetReg); + // tempReg = tempReg - 8 + GetEmitter()->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, tempReg, -8); + // if (tempReg != dstReg) goto loop; + GetEmitter()->emitIns_J(INS_bne, loop, (int)tempReg | ((int)dstReg << 5)); GetEmitter()->emitEnableGC(); gcInfo.gcMarkRegSetNpt(genRegMask(dstReg)); diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index cfa574bee4d66e..09d8a8daf905a8 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -1130,8 +1130,6 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) break; case GenTreeBlk::BlkOpKindLoop: - // Needed for offsetReg - buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); // Needed for tempReg buildInternalIntRegisterDefForNode(blkNode, availableIntRegs); break;