From e00ac4f87aa8dca798f65b07e8abce31b78ed54d Mon Sep 17 00:00:00 2001 From: Mike Danes Date: Thu, 19 Sep 2019 22:56:45 +0300 Subject: [PATCH] Contain block store addresses --- src/jit/codegenarmarch.cpp | 117 +++++++++++++++++++++------- src/jit/codegenxarch.cpp | 155 ++++++++++++++++++++++++++++++++----- src/jit/lower.h | 1 + src/jit/lowerarmarch.cpp | 73 ++++++++++++++--- src/jit/lowerxarch.cpp | 64 +++++++++++---- src/jit/lsraarmarch.cpp | 17 +++- src/jit/lsraxarch.cpp | 17 +++- 7 files changed, 368 insertions(+), 76 deletions(-) diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index 416beceddee8..d2d97461cb0f 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -1931,8 +1931,33 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) { assert(node->OperIs(GT_STORE_BLK)); - regNumber dstAddrBaseReg = genConsumeReg(node->Addr()); - unsigned dstOffset = 0; + unsigned dstLclNum = BAD_VAR_NUM; + regNumber dstAddrBaseReg = REG_NA; + int dstOffset = 0; + GenTree* dstAddr = node->Addr(); + + if (!dstAddr->isContained()) + { + dstAddrBaseReg = genConsumeReg(dstAddr); + } + else if (dstAddr->OperIsAddrMode()) + { + assert(!dstAddr->AsAddrMode()->HasIndex()); + + dstAddrBaseReg = genConsumeReg(dstAddr->AsAddrMode()->Base()); + dstOffset = dstAddr->AsAddrMode()->Offset(); + } + else + { + assert(dstAddr->OperIsLocalAddr()); + dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum(); + + if (dstAddr->OperIs(GT_LCL_FLD_ADDR)) + { + assert(dstAddr->AsLclFld()->gtLclOffs <= INT32_MAX); + dstOffset = dstAddr->AsLclFld()->gtLclOffs; + } + } regNumber srcReg; GenTree* src = node->Data(); @@ -1965,10 +1990,20 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) emitter* emit = GetEmitter(); unsigned size = node->GetLayout()->GetSize(); + assert(size <= INT32_MAX); + assert(dstOffset < INT32_MAX - static_cast(size)); + #ifdef _TARGET_ARM64_ for (unsigned regSize = 2 * REGSIZE_BYTES; size >= regSize; size -= regSize, dstOffset += regSize) { - emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, srcReg, srcReg, dstAddrBaseReg, dstOffset); + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_S_R_R(INS_stp, EA_8BYTE, EA_8BYTE, srcReg, srcReg, dstLclNum, dstOffset); + } + else + { + emit->emitIns_R_R_R_I(INS_stp, EA_8BYTE, srcReg, srcReg, dstAddrBaseReg, dstOffset); + } } #endif @@ -1986,22 +2021,16 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) { case 1: storeIns = INS_strb; -#ifdef _TARGET_ARM64_ - attr = EA_1BYTE; -#else - attr = EA_4BYTE; -#endif + attr = EA_4BYTE; break; case 2: storeIns = INS_strh; -#ifdef _TARGET_ARM64_ - attr = EA_2BYTE; -#else - attr = EA_4BYTE; -#endif + attr = EA_4BYTE; break; case 4: +#ifdef _TARGET_ARM64_ case 8: +#endif storeIns = INS_str; attr = EA_ATTR(regSize); break; @@ -2009,7 +2038,14 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) unreached(); } - emit->emitIns_R_R_I(storeIns, attr, srcReg, dstAddrBaseReg, dstOffset); + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(storeIns, attr, srcReg, dstLclNum, dstOffset); + } + else + { + emit->emitIns_R_R_I(storeIns, attr, srcReg, dstAddrBaseReg, dstOffset); + } } } @@ -2025,13 +2061,20 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) unsigned dstLclNum = BAD_VAR_NUM; regNumber dstAddrBaseReg = REG_NA; - unsigned dstOffset = 0; + int dstOffset = 0; GenTree* dstAddr = node->Addr(); if (!dstAddr->isContained()) { dstAddrBaseReg = genConsumeReg(dstAddr); } + else if (dstAddr->OperIsAddrMode()) + { + assert(!dstAddr->AsAddrMode()->HasIndex()); + + dstAddrBaseReg = genConsumeReg(dstAddr->AsAddrMode()->Base()); + dstOffset = dstAddr->AsAddrMode()->Offset(); + } else { // TODO-ARM-CQ: If the local frame offset is too large to be encoded, the emitter automatically @@ -2045,12 +2088,17 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) assert(dstAddr->OperIsLocalAddr()); dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum(); - dstOffset = dstAddr->OperIs(GT_LCL_FLD_ADDR) ? dstAddr->AsLclFld()->gtLclOffs : 0; + + if (dstAddr->OperIs(GT_LCL_FLD_ADDR)) + { + assert(dstAddr->AsLclFld()->gtLclOffs <= INT32_MAX); + dstOffset = dstAddr->AsLclFld()->gtLclOffs; + } } unsigned srcLclNum = BAD_VAR_NUM; regNumber srcAddrBaseReg = REG_NA; - unsigned srcOffset = 0; + int srcOffset = 0; GenTree* src = node->Data(); assert(src->isContained()); @@ -2058,7 +2106,12 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { srcLclNum = src->AsLclVarCommon()->GetLclNum(); - srcOffset = src->OperIs(GT_LCL_FLD) ? src->AsLclFld()->gtLclOffs : 0; + + if (src->OperIs(GT_LCL_FLD)) + { + assert(src->AsLclFld()->gtLclOffs <= INT32_MAX); + srcOffset = static_cast(src->AsLclFld()->gtLclOffs); + } } else { @@ -2069,11 +2122,21 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) { srcAddrBaseReg = genConsumeReg(srcAddr); } + else if (srcAddr->OperIsAddrMode()) + { + srcAddrBaseReg = genConsumeReg(srcAddr->AsAddrMode()->Base()); + srcOffset = srcAddr->AsAddrMode()->Offset(); + } else { assert(srcAddr->OperIsLocalAddr()); srcLclNum = srcAddr->AsLclVarCommon()->GetLclNum(); - srcOffset = srcAddr->OperIs(GT_LCL_FLD_ADDR) ? srcAddr->AsLclFld()->gtLclOffs : 0; + + if (srcAddr->OperIs(GT_LCL_FLD_ADDR)) + { + assert(srcAddr->AsLclFld()->gtLclOffs <= INT32_MAX); + srcOffset = static_cast(srcAddr->AsLclFld()->gtLclOffs); + } } } @@ -2085,6 +2148,10 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) emitter* emit = GetEmitter(); unsigned size = node->GetLayout()->GetSize(); + assert(size <= INT32_MAX); + assert(srcOffset < INT32_MAX - static_cast(size)); + assert(dstOffset < INT32_MAX - static_cast(size)); + regNumber tempReg = node->ExtractTempReg(RBM_ALLINT); #ifdef _TARGET_ARM64_ @@ -2132,20 +2199,12 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) case 1: loadIns = INS_ldrb; storeIns = INS_strb; -#ifdef _TARGET_ARM64_ - attr = EA_1BYTE; -#else - attr = EA_4BYTE; -#endif + attr = EA_4BYTE; break; case 2: loadIns = INS_ldrh; storeIns = INS_strh; -#ifdef _TARGET_ARM64_ - attr = EA_2BYTE; -#else - attr = EA_4BYTE; -#endif + attr = EA_4BYTE; break; case 4: #ifdef _TARGET_ARM64_ diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 099d5bad3acf..abffd1c741dc 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -2927,8 +2927,45 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) { assert(node->OperIs(GT_STORE_BLK)); - regNumber dstAddrBaseReg = genConsumeReg(node->Addr()); - unsigned dstOffset = 0; + unsigned dstLclNum = BAD_VAR_NUM; + regNumber dstAddrBaseReg = REG_NA; + regNumber dstAddrIndexReg = REG_NA; + unsigned dstAddrIndexScale = 1; + int dstOffset = 0; + GenTree* dstAddr = node->Addr(); + + if (!dstAddr->isContained()) + { + dstAddrBaseReg = genConsumeReg(dstAddr); + } + else if (dstAddr->OperIsAddrMode()) + { + GenTreeAddrMode* addrMode = dstAddr->AsAddrMode(); + + if (addrMode->HasBase()) + { + dstAddrBaseReg = genConsumeReg(addrMode->Base()); + } + + if (addrMode->HasIndex()) + { + dstAddrIndexReg = genConsumeReg(addrMode->Index()); + dstAddrIndexScale = addrMode->GetScale(); + } + + dstOffset = addrMode->Offset(); + } + else + { + assert(dstAddr->OperIsLocalAddr()); + dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum(); + + if (dstAddr->OperIs(GT_LCL_FLD_ADDR)) + { + assert(dstAddr->AsLclFld()->gtLclOffs <= INT32_MAX); + dstOffset = static_cast(dstAddr->AsLclFld()->gtLclOffs); + } + } regNumber srcIntReg = REG_NA; GenTree* src = node->Data(); @@ -2954,6 +2991,9 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) emitter* emit = GetEmitter(); unsigned size = node->GetLayout()->GetSize(); + assert(size <= INT32_MAX); + assert(dstOffset < (INT32_MAX - static_cast(size))); + // Fill as much as possible using SSE2 stores. if (size >= XMM_REGSIZE_BYTES) { @@ -2977,7 +3017,15 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) for (unsigned regSize = XMM_REGSIZE_BYTES; size >= regSize; size -= regSize, dstOffset += regSize) { - emit->emitIns_AR_R(INS_movdqu, EA_ATTR(regSize), srcXmmReg, dstAddrBaseReg, dstOffset); + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(INS_movdqu, EA_ATTR(regSize), srcXmmReg, dstLclNum, dstOffset); + } + else + { + emit->emitIns_ARX_R(INS_movdqu, EA_ATTR(regSize), srcXmmReg, dstAddrBaseReg, dstAddrIndexReg, + dstAddrIndexScale, dstOffset); + } } // TODO-CQ-XArch: On x86 we could initialize 8 byte at once by using MOVQ instead of two 4 byte MOV stores. @@ -2993,7 +3041,15 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node) regSize /= 2; } - emit->emitIns_AR_R(INS_mov, EA_ATTR(regSize), srcIntReg, dstAddrBaseReg, dstOffset); + if (dstLclNum != BAD_VAR_NUM) + { + emit->emitIns_S_R(INS_mov, EA_ATTR(regSize), srcIntReg, dstLclNum, dstOffset); + } + else + { + emit->emitIns_ARX_R(INS_mov, EA_ATTR(regSize), srcIntReg, dstAddrBaseReg, dstAddrIndexReg, + dstAddrIndexScale, dstOffset); + } } } @@ -3051,33 +3107,64 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) { assert(node->OperIs(GT_STORE_BLK)); - unsigned dstLclNum = BAD_VAR_NUM; - regNumber dstAddrBaseReg = REG_NA; - unsigned dstOffset = 0; - GenTree* dstAddr = node->Addr(); + unsigned dstLclNum = BAD_VAR_NUM; + regNumber dstAddrBaseReg = REG_NA; + regNumber dstAddrIndexReg = REG_NA; + unsigned dstAddrIndexScale = 1; + int dstOffset = 0; + GenTree* dstAddr = node->Addr(); if (!dstAddr->isContained()) { dstAddrBaseReg = genConsumeReg(dstAddr); } + else if (dstAddr->OperIsAddrMode()) + { + GenTreeAddrMode* addrMode = dstAddr->AsAddrMode(); + + if (addrMode->HasBase()) + { + dstAddrBaseReg = genConsumeReg(addrMode->Base()); + } + + if (addrMode->HasIndex()) + { + dstAddrIndexReg = genConsumeReg(addrMode->Index()); + dstAddrIndexScale = addrMode->GetScale(); + } + + dstOffset = addrMode->Offset(); + } else { assert(dstAddr->OperIsLocalAddr()); dstLclNum = dstAddr->AsLclVarCommon()->GetLclNum(); - dstOffset = dstAddr->OperIs(GT_LCL_FLD_ADDR) ? dstAddr->AsLclFld()->gtLclOffs : 0; + + if (dstAddr->OperIs(GT_LCL_FLD_ADDR)) + { + assert(dstAddr->AsLclFld()->gtLclOffs <= INT32_MAX); + dstOffset = static_cast(dstAddr->AsLclFld()->gtLclOffs); + } } - unsigned srcLclNum = BAD_VAR_NUM; - regNumber srcAddrBaseReg = REG_NA; - unsigned srcOffset = 0; - GenTree* src = node->Data(); + unsigned srcLclNum = BAD_VAR_NUM; + regNumber srcAddrBaseReg = REG_NA; + regNumber srcAddrIndexReg = REG_NA; + unsigned srcAddrIndexScale = 1; + int srcOffset = 0; + GenTree* src = node->Data(); assert(src->isContained()); if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD)) { srcLclNum = src->AsLclVarCommon()->GetLclNum(); - srcOffset = src->OperIs(GT_LCL_FLD) ? src->AsLclFld()->gtLclOffs : 0; + + if (src->OperIs(GT_LCL_FLD)) + { + assert(src->AsLclFld()->gtLclOffs <= INT32_MAX); + srcOffset = static_cast(src->AsLclFld()->gtLclOffs); + } } else { @@ -3088,17 +3175,43 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) { srcAddrBaseReg = genConsumeReg(srcAddr); } + else if (srcAddr->OperIsAddrMode()) + { + GenTreeAddrMode* addrMode = srcAddr->AsAddrMode(); + + if (addrMode->HasBase()) + { + srcAddrBaseReg = genConsumeReg(addrMode->Base()); + } + + if (addrMode->HasIndex()) + { + srcAddrIndexReg = genConsumeReg(addrMode->Index()); + srcAddrIndexScale = addrMode->GetScale(); + } + + srcOffset = addrMode->Offset(); + } else { assert(srcAddr->OperIsLocalAddr()); srcLclNum = srcAddr->AsLclVarCommon()->GetLclNum(); - srcOffset = srcAddr->OperIs(GT_LCL_FLD_ADDR) ? srcAddr->AsLclFld()->gtLclOffs : 0; + + if (srcAddr->OperIs(GT_LCL_FLD_ADDR)) + { + assert(srcAddr->AsLclFld()->gtLclOffs <= INT32_MAX); + srcOffset = static_cast(srcAddr->AsLclFld()->gtLclOffs); + } } } emitter* emit = GetEmitter(); unsigned size = node->GetLayout()->GetSize(); + assert(size <= INT32_MAX); + assert(srcOffset < (INT32_MAX - static_cast(size))); + assert(dstOffset < (INT32_MAX - static_cast(size))); + if (size >= XMM_REGSIZE_BYTES) { regNumber tempReg = node->GetSingleTempReg(RBM_ALLFLOAT); @@ -3112,7 +3225,8 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) } else { - emit->emitIns_R_AR(INS_movdqu, EA_ATTR(regSize), tempReg, srcAddrBaseReg, srcOffset); + emit->emitIns_R_ARX(INS_movdqu, EA_ATTR(regSize), tempReg, srcAddrBaseReg, srcAddrIndexReg, + srcAddrIndexScale, srcOffset); } if (dstLclNum != BAD_VAR_NUM) @@ -3121,7 +3235,8 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) } else { - emit->emitIns_AR_R(INS_movdqu, EA_ATTR(regSize), tempReg, dstAddrBaseReg, dstOffset); + emit->emitIns_ARX_R(INS_movdqu, EA_ATTR(regSize), tempReg, dstAddrBaseReg, dstAddrIndexReg, + dstAddrIndexScale, dstOffset); } } @@ -3147,7 +3262,8 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) } else { - emit->emitIns_R_AR(INS_mov, EA_ATTR(regSize), tempReg, srcAddrBaseReg, srcOffset); + emit->emitIns_R_ARX(INS_mov, EA_ATTR(regSize), tempReg, srcAddrBaseReg, srcAddrIndexReg, + srcAddrIndexScale, srcOffset); } if (dstLclNum != BAD_VAR_NUM) @@ -3156,7 +3272,8 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) } else { - emit->emitIns_AR_R(INS_mov, EA_ATTR(regSize), tempReg, dstAddrBaseReg, dstOffset); + emit->emitIns_ARX_R(INS_mov, EA_ATTR(regSize), tempReg, dstAddrBaseReg, dstAddrIndexReg, + dstAddrIndexScale, dstOffset); } } } diff --git a/src/jit/lower.h b/src/jit/lower.h index 4fe552918728..2f03b1d228ba 100644 --- a/src/jit/lower.h +++ b/src/jit/lower.h @@ -282,6 +282,7 @@ class Lowering : public Phase GenTree* LowerConstIntDivOrMod(GenTree* node); GenTree* LowerSignedDivOrMod(GenTree* node); void LowerBlockStore(GenTreeBlk* blkNode); + void ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr); void LowerPutArgStk(GenTreePutArgStk* tree); bool TryCreateAddrMode(GenTree* addr, bool isContainable); diff --git a/src/jit/lowerarmarch.cpp b/src/jit/lowerarmarch.cpp index 95ab71d67156..639db6114ec7 100644 --- a/src/jit/lowerarmarch.cpp +++ b/src/jit/lowerarmarch.cpp @@ -280,6 +280,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) } src->AsIntCon()->SetIconValue(fill); + + ContainBlockStoreAddress(blkNode, size, dstAddr); } else { @@ -327,17 +329,10 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) if (src->OperIs(GT_IND)) { - GenTree* srcAddr = src->AsIndir()->Addr(); - if (srcAddr->OperIsLocalAddr()) - { - srcAddr->SetContained(); - } + ContainBlockStoreAddress(blkNode, size, src->AsIndir()->Addr()); } - if (dstAddr->OperIsLocalAddr()) - { - dstAddr->SetContained(); - } + ContainBlockStoreAddress(blkNode, size, dstAddr); } else { @@ -348,6 +343,66 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) } } +//------------------------------------------------------------------------ +// ContainBlockStoreAddress: Attempt to contain an address used by an unrolled block store. +// +// Arguments: +// blkNode - the block store node +// size - the block size +// addr - the address node to try to contain +// +void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr) +{ + assert(blkNode->OperIs(GT_STORE_BLK) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)); + assert(size < INT32_MAX); + + if (addr->OperIsLocalAddr()) + { + addr->SetContained(); + return; + } + + if (!addr->OperIs(GT_ADD) || addr->gtOverflow() || !addr->AsOp()->gtGetOp2()->OperIs(GT_CNS_INT)) + { + return; + } + + GenTreeIntCon* offsetNode = addr->AsOp()->gtGetOp2()->AsIntCon(); + ssize_t offset = offsetNode->IconValue(); + + // All integer load/store instructions on both ARM32 and ARM64 support + // offsets in range -255..255. Of course, this is a rather conservative + // check. For example, if the offset and size are a multiple of 8 we + // could allow a combined offset of up to 32760 on ARM64. + if ((offset < -255) || (offset > 255) || (offset + static_cast(size) > 256)) + { + return; + } + +#ifdef _TARGET_ARM64_ + // If we're going to use LDP/STP we need to ensure that the offset is + // a multiple of 8 since these instructions do not have an unscaled + // offset variant. + if ((size >= 2 * REGSIZE_BYTES) && (offset % REGSIZE_BYTES != 0)) + { + return; + } +#endif + + if (!IsSafeToContainMem(blkNode, addr)) + { + return; + } + + BlockRange().Remove(offsetNode); + + addr->ChangeOper(GT_LEA); + addr->AsAddrMode()->SetIndex(nullptr); + addr->AsAddrMode()->SetScale(0); + addr->AsAddrMode()->SetOffset(static_cast(offset)); + addr->SetContained(); +} + //------------------------------------------------------------------------ // LowerCast: Lower GT_CAST(srcType, DstType) nodes. // diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp index 08838ee472ff..575b7afff7bf 100644 --- a/src/jit/lowerxarch.cpp +++ b/src/jit/lowerxarch.cpp @@ -205,6 +205,8 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) } src->AsIntCon()->SetIconValue(fill); + + ContainBlockStoreAddress(blkNode, size, dstAddr); } } else @@ -228,8 +230,6 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) // Sometimes the GT_IND type is a non-struct type and then GT_IND lowering may contain the // address, not knowing that GT_IND is part of a block op that has containment restrictions. src->AsIndir()->Addr()->ClearContained(); - - TryCreateAddrMode(src->AsIndir()->Addr(), false); } if (blkNode->OperIs(GT_STORE_OBJ)) @@ -303,21 +303,12 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) { blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll; - // If src or dst are on stack, we don't have to generate the address - // into a register because it's just some constant+SP. if (src->OperIs(GT_IND)) { - GenTree* srcAddr = src->AsIndir()->Addr(); - if (srcAddr->OperIsLocalAddr()) - { - srcAddr->SetContained(); - } + ContainBlockStoreAddress(blkNode, size, src->AsIndir()->Addr()); } - if (dstAddr->OperIsLocalAddr()) - { - dstAddr->SetContained(); - } + ContainBlockStoreAddress(blkNode, size, dstAddr); } else { @@ -333,6 +324,53 @@ void Lowering::LowerBlockStore(GenTreeBlk* blkNode) } } +//------------------------------------------------------------------------ +// ContainBlockStoreAddress: Attempt to contain an address used by an unrolled block store. +// +// Arguments: +// blkNode - the block store node +// size - the block size +// addr - the address node to try to contain +// +void Lowering::ContainBlockStoreAddress(GenTreeBlk* blkNode, unsigned size, GenTree* addr) +{ + assert(blkNode->OperIs(GT_STORE_BLK) && (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)); + assert(size < INT32_MAX); + + if (addr->OperIsLocalAddr()) + { + addr->SetContained(); + return; + } + + if (!addr->OperIsAddrMode() && !TryCreateAddrMode(addr, true)) + { + return; + } + + GenTreeAddrMode* addrMode = addr->AsAddrMode(); + + // On x64 the address mode displacement is signed so it must not exceed INT32_MAX. This check is + // an approximation since the last displacement we generate in an unrolled block operation can be + // up to 16 bytes lower than offset + size. But offsets large enough to hit this case are likely + // to be extremely rare for this to ever be a CQ issue. + // On x86 this shouldn't be needed but then again, offsets large enough to hit this are rare. + if (addrMode->Offset() > (INT32_MAX - static_cast(size))) + { + return; + } + + // Note that the parentNode is always the block node, even if we're dealing with the source address. + // The source address is not directly used by the block node but by an IND node and that IND node is + // always contained. + if (!IsSafeToContainMem(blkNode, addrMode)) + { + return; + } + + addrMode->SetContained(); +} + //------------------------------------------------------------------------ // LowerPutArgStk: Lower a GT_PUTARG_STK. // diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp index 2be762ea8f04..a1266ab6861b 100644 --- a/src/jit/lsraarmarch.cpp +++ b/src/jit/lsraarmarch.cpp @@ -679,11 +679,22 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) useCount++; BuildUse(dstAddr, dstAddrRegMask); } + else if (dstAddr->OperIsAddrMode()) + { + useCount += BuildAddrUses(dstAddr->AsAddrMode()->Base()); + } - if ((srcAddrOrFill != nullptr) && !srcAddrOrFill->isContained()) + if (srcAddrOrFill != nullptr) { - useCount++; - BuildUse(srcAddrOrFill, srcRegMask); + if (!srcAddrOrFill->isContained()) + { + useCount++; + BuildUse(srcAddrOrFill, srcRegMask); + } + else if (srcAddrOrFill->OperIsAddrMode()) + { + useCount += BuildAddrUses(srcAddrOrFill->AsAddrMode()->Base()); + } } if (blkNode->OperIs(GT_STORE_DYN_BLK)) diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp index 0d93bd21314f..19cd15710f77 100644 --- a/src/jit/lsraxarch.cpp +++ b/src/jit/lsraxarch.cpp @@ -1408,11 +1408,22 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) useCount++; BuildUse(dstAddr, dstAddrRegMask); } + else if (dstAddr->OperIsAddrMode()) + { + useCount += BuildAddrUses(dstAddr); + } - if ((srcAddrOrFill != nullptr) && !srcAddrOrFill->isContained()) + if (srcAddrOrFill != nullptr) { - useCount++; - BuildUse(srcAddrOrFill, srcRegMask); + if (!srcAddrOrFill->isContained()) + { + useCount++; + BuildUse(srcAddrOrFill, srcRegMask); + } + else if (srcAddrOrFill->OperIsAddrMode()) + { + useCount += BuildAddrUses(srcAddrOrFill); + } } if (blkNode->OperIs(GT_STORE_DYN_BLK))