Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 91 additions & 63 deletions src/coreclr/jit/codegenwasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2789,6 +2789,7 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
switch (blkOp->gtBlkOpKind)
{
case GenTreeBlk::BlkOpKindCpObjUnroll:
case GenTreeBlk::BlkOpKindNativeOpcode:
genCodeForCpObj(blkOp->AsBlk());
break;

Expand All @@ -2797,13 +2798,6 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
genCodeForInitBlkLoop(blkOp);
break;

case GenTreeBlk::BlkOpKindNativeOpcode:
genConsumeOperands(blkOp);
// Emit the size constant expected by the memory.copy and memory.fill opcodes
GetEmitter()->emitIns_I(INS_i32_const, EA_4BYTE, blkOp->Size());
GetEmitter()->emitIns_I(isCopyBlk ? INS_memory_copy : INS_memory_fill, EA_8BYTE, LINEAR_MEMORY_INDEX);
break;

default:
unreached();
}
Expand All @@ -2812,74 +2806,111 @@ void CodeGen::genCodeForStoreBlk(GenTreeBlk* blkOp)
}

//------------------------------------------------------------------------
// genCodeForCpObj: Produce code for a GT_STORE_BLK node that represents a cpobj operation.
// genCodeForCpObj: Produce code for a GT_STORE_BLK node that represents a cpobj operation
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no specific need to overload genCodeForCpObj... we can just house all this logic in genCodeForStoreBlk if it makes things easier to share. It actually looks like it would flush out the logic to look more straight as well.

// or a native memory.copy or memory.fill opcode. We share this function for both types of
// block stores because they have a lot of common logic.
//
// Arguments:
// cpObjNode - the node
//
void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
{
GenTree* dstAddr = cpObjNode->Addr();
GenTree* source = cpObjNode->Data();
var_types srcAddrType = TYP_BYREF;
regNumber dstReg = GetMultiUseOperandReg(dstAddr);
unsigned dstOffset = 0;
regNumber srcReg;
unsigned srcOffset;

// Identify the register containing our source base address, either a multi-use
// reg representing the operand of a GT_IND, or the frame pointer for LCL_VAR/LCL_FLD.
if (source->OperIs(GT_IND))
{
bool doNullCheck = (source->gtFlags & GTF_IND_NONFAULTING) == 0;
source = source->gtGetOp1();
assert(!source->isContained());
srcAddrType = source->TypeGet();
srcReg = GetMultiUseOperandReg(source);
srcOffset = 0;

if (doNullCheck)
struct operandRec
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Naming nit: operandRec -> OperandRec, addrType -> AddrType.

(What does "rec" mean?)

{
var_types addrType;
unsigned offset;
regNumber reg;
bool isContained;
};

auto makeOperandRec = [&](GenTree* operand, bool isSource) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using this for the destination looks unnecessary, since we don't need to materialize the destination address manually. The pattern we had here before was simpler. More generally it feels like open-coding the various possibilities would lead to simpler code.

var_types addrType = TYP_BYREF;
regNumber reg;
unsigned offset;

// Identify the register containing our source base address, either a multi-use
// reg representing the operand of a GT_IND, or the frame pointer for LCL_VAR/LCL_FLD.
if (operand->OperIs(GT_IND))
{
genEmitNullCheck(srcReg);
bool doNullCheck = (operand->gtFlags & GTF_IND_NONFAULTING) == 0;
operand = operand->gtGetOp1();
assert(!operand->isContained());
addrType = operand->TypeGet();
reg = GetMultiUseOperandReg(operand);
offset = 0;

if (doNullCheck)
{
genEmitNullCheck(reg);
}
}
}
else
{
assert(source->OperIs(GT_LCL_FLD, GT_LCL_VAR));
GenTreeLclVarCommon* lclVar = source->AsLclVarCommon();
bool fpBased;
srcOffset = m_compiler->lvaFrameAddress(lclVar->GetLclNum(), &fpBased) + lclVar->GetLclOffs();
assert(fpBased);
srcReg = GetFramePointerReg();
}
else if (isSource && operand->OperIs(GT_CNS_INT))
{
addrType = TYP_INT;
offset = 0;
reg = REG_NA;
}
else
{
assert(operand->OperIs(GT_LCL_FLD, GT_LCL_VAR, GT_LCL_ADDR));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GT_LCL_ADDR is here for dests?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah when I expanded the cases this got called in, this assert tripped up on a LCL_ADDR at least once. Conceptually it seems reasonable to get an address here.

GenTreeLclVarCommon* lclVar = operand->AsLclVarCommon();
Comment on lines +2847 to +2856
Copy link

Copilot AI Mar 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makeOperandRec doesn't handle initblk sources wrapped in GT_INIT_VAL (used to represent byte-pattern fills). For BlkOpKindNativeOpcode+initblk, cpObjNode->Data() can be GT_INIT_VAL, which will currently fall into the LCL_* branch and hit the assert / invalid AsLclVarCommon cast. Consider unwrapping GT_INIT_VAL to its operand before the GT_IND/GT_CNS_INT/LCL_* classification (similar to how other targets/codepaths drop GT_INIT_VAL).

Copilot uses AI. Check for mistakes.
bool fpBased;
reg = GetFramePointerReg();
offset = m_compiler->lvaFrameAddress(lclVar->GetLclNum(), &fpBased) + lclVar->GetLclOffs();
assert(fpBased);
Comment on lines +2857 to +2860
Copy link

Copilot AI Mar 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The LCL_* case in makeOperandRec computes an address using frame pointer + lvaFrameAddress, which is only valid when the local itself is the source buffer living on the stack. For destination address operands (and for byref locals that are enregistered), this computes the address of the local’s home/slot rather than the address value, and can also assert if the local isn’t FP-based. Consider using GetMultiUseOperandReg/operand->GetRegNum() for address-valued operands, and reserving the FP+offset computation only for stack-resident struct sources.

Suggested change
bool fpBased;
reg = GetFramePointerReg();
offset = m_compiler->lvaFrameAddress(lclVar->GetLclNum(), &fpBased) + lclVar->GetLclOffs();
assert(fpBased);
// For address-valued locals (e.g. GT_LCL_ADDR) or destination operands, the
// address is already materialized in a register; use that directly.
if (operand->OperIs(GT_LCL_ADDR) || !isSource)
{
addrType = operand->TypeGet();
reg = GetMultiUseOperandReg(operand);
offset = 0;
}
else
{
// For source locals that represent stack-resident buffers, compute
// the address as frame-pointer + frame offset.
bool fpBased;
reg = GetFramePointerReg();
offset = m_compiler->lvaFrameAddress(lclVar->GetLclNum(), &fpBased) + lclVar->GetLclOffs();
assert(fpBased);
}

Copilot uses AI. Check for mistakes.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this correct? I don't really understand it

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are quite a few different cases and it's hard to be sure without tracing through what happens upstream.

You might want to work through a set of examples just to see the variety of things that need to be handled. Something like:

struct S { ... }

S getS();
void useS(ref S s);

void foo(ref S src, ref S dst) { dst = src }
void foo(S src, ref S dst) { dst = src }
void foo(ref S src) { S dst = src; }
void foo(ref S src) { useS(src); }
void foo (ref S dst) { S src = new S(...); dst = src }
void foo (ref S dst) { S src = new S; dst = src }
void foo (ref S dst) { dst = getS(); }

and try cases where S is a single-field wrapper and also has multiple fields.

}

operandRec result = {addrType, offset, reg, operand->isContained()};
return result;
};

bool isCopyBlk = cpObjNode->OperIsCopyBlkOp();
bool isNativeOp = cpObjNode->gtBlkOpKind == GenTreeBlk::BlkOpKindNativeOpcode;

operandRec dest = makeOperandRec(cpObjNode->Addr(), false);
operandRec source = makeOperandRec(cpObjNode->Data(), true);

// If the destination is on the stack we don't need the write barrier.
bool dstOnStack = cpObjNode->IsAddressNotOnHeap(m_compiler);
// We should have generated a memory.copy for this scenario in lowering.
assert(!dstOnStack);
// If our destination is on the stack we should be handling it with a native memory.copy/fill,
// lowering should only select cpobj for cases where a write barrier is potentially necessary.
assert(!dstOnStack || isNativeOp);

#ifdef DEBUG
assert(!dstAddr->isContained());

// This GenTree node has data about GC pointers, this means we're dealing
// with CpObj.
assert(cpObjNode->GetLayout()->HasGCPtr());
// If we're not using the native memory.copy/fill opcodes and are doing cpobj, we should only
// see types that have GC pointers in them.
assert(isNativeOp || cpObjNode->GetLayout()->HasGCPtr());
#endif // DEBUG

genConsumeOperands(cpObjNode);

emitter* emit = GetEmitter();
emitter* emit = GetEmitter();
emitAttr attrSrcAddr = emitActualTypeSize(source.addrType);
emitAttr attrDstAddr = emitActualTypeSize(dest.addrType);

if ((cpObjNode->gtFlags & GTF_IND_NONFAULTING) == 0)
if (isNativeOp)
{
genEmitNullCheck(dstReg);
// The destination should already be on the stack.
// The src may not be on the evaluation stack if it was contained, in which case we need to manufacture it
if (source.isContained)
{
assert(isCopyBlk);
emit->emitIns_I(INS_local_get, attrSrcAddr, WasmRegToIndex(source.reg));
emit->emitIns_I(INS_I_const, attrSrcAddr, source.offset);
emit->emitIns(INS_I_add);
}
GetEmitter()->emitIns_I(INS_i32_const, EA_4BYTE, cpObjNode->Size());
GetEmitter()->emitIns_I(isCopyBlk ? INS_memory_copy : INS_memory_fill, EA_8BYTE, LINEAR_MEMORY_INDEX);
return;
}

// TODO-WASM: Remove the need to do this somehow
// The dst and src may be on the evaluation stack, but we can't reliably use them, so drop them.
emit->emitIns(INS_drop);
if (!source->isContained())
if (!source.isContained)
{
emit->emitIns(INS_drop);
}

if (cpObjNode->IsVolatile())
{
Expand All @@ -2889,9 +2920,6 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
ClassLayout* layout = cpObjNode->GetLayout();
unsigned slots = layout->GetSlotCount();

emitAttr attrSrcAddr = emitActualTypeSize(srcAddrType);
emitAttr attrDstAddr = emitActualTypeSize(dstAddr->TypeGet());

unsigned gcPtrCount = cpObjNode->GetLayout()->GetGCPtrCount();

unsigned i = 0;
Expand All @@ -2902,19 +2930,19 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
if (!layout->IsGCPtr(i))
{
// Do a pointer-sized load+store pair at the appropriate offset relative to dest and source
emit->emitIns_I(INS_local_get, attrDstAddr, WasmRegToIndex(dstReg));
emit->emitIns_I(INS_local_get, attrSrcAddr, WasmRegToIndex(srcReg));
emit->emitIns_I(INS_I_load, EA_PTRSIZE, srcOffset);
emit->emitIns_I(INS_I_store, EA_PTRSIZE, dstOffset);
emit->emitIns_I(INS_local_get, attrDstAddr, WasmRegToIndex(dest.reg));
emit->emitIns_I(INS_local_get, attrSrcAddr, WasmRegToIndex(source.reg));
emit->emitIns_I(INS_I_load, EA_PTRSIZE, source.offset);
emit->emitIns_I(INS_I_store, EA_PTRSIZE, dest.offset);
}
else
{
// Compute the actual dest/src of the slot being copied to pass to the helper.
emit->emitIns_I(INS_local_get, attrDstAddr, WasmRegToIndex(dstReg));
emit->emitIns_I(INS_I_const, attrDstAddr, dstOffset);
emit->emitIns_I(INS_local_get, attrDstAddr, WasmRegToIndex(dest.reg));
emit->emitIns_I(INS_I_const, attrDstAddr, dest.offset);
emit->emitIns(INS_I_add);
emit->emitIns_I(INS_local_get, attrSrcAddr, WasmRegToIndex(srcReg));
emit->emitIns_I(INS_I_const, attrSrcAddr, srcOffset);
emit->emitIns_I(INS_local_get, attrSrcAddr, WasmRegToIndex(source.reg));
emit->emitIns_I(INS_I_const, attrSrcAddr, source.offset);
emit->emitIns(INS_I_add);
// NOTE: This helper's signature omits SP/PEP so all we need on the stack is dst and src.
// TODO-WASM-CQ: add a version of CORINFO_HELP_ASSIGN_BYREF that returns the updated dest/src
Expand All @@ -2923,8 +2951,8 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode)
gcPtrCount--;
}
++i;
srcOffset += TARGET_POINTER_SIZE;
dstOffset += TARGET_POINTER_SIZE;
source.offset += TARGET_POINTER_SIZE;
dest.offset += TARGET_POINTER_SIZE;
}

assert(gcPtrCount == 0);
Expand Down
11 changes: 6 additions & 5 deletions src/coreclr/jit/lowerwasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,18 +275,19 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
}

blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindCpObjUnroll;
SetMultiplyUsed(dstAddr);
if (src->OperIs(GT_IND))
{
SetMultiplyUsed(src->gtGetOp1());
}
}
else
{
assert(blkNode->OperIs(GT_STORE_BLK));
// memory.copy
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindNativeOpcode;
}

SetMultiplyUsed(dstAddr);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checks missing for whether this actually needs the null check (GTF_IND_NONFAULTING).

Also, since we're fixing this code, may as well fix it completely by adding the null check for init blocks too.

if (src->OperIs(GT_IND))
{
SetMultiplyUsed(src->gtGetOp1());
}
}
}

Expand Down
Loading