diff --git a/src/coreclr/src/jit/codegenarmarch.cpp b/src/coreclr/src/jit/codegenarmarch.cpp index d51353f48aa30b..6fef0bbea88bb2 100644 --- a/src/coreclr/src/jit/codegenarmarch.cpp +++ b/src/coreclr/src/jit/codegenarmarch.cpp @@ -1405,7 +1405,7 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode) for (int i = regCount - 1; i >= 0; --i) { var_types type = op1->gtSkipReloadOrCopy()->GetRegTypeByIndex(i); - regNumber reg = op1->GetRegByIndex(i); + regNumber reg = actualOp1->GetRegByIndex(i); if (op1->IsCopyOrReload()) { // GT_COPY/GT_RELOAD will have valid reg for those positions diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index bd94968c20376e..83f010226e3d3e 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -273,6 +273,96 @@ regMaskTP LinearScan::allSIMDRegs() return availableFloatRegs; } +void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition) +{ + LsraLocation nextLocation; + + if (nextRefPosition == nullptr) + { + nextLocation = MaxLocation; + fixedRegs &= ~genRegMask(regRecord->regNum); + } + else + { + nextLocation = nextRefPosition->nodeLocation; + fixedRegs |= genRegMask(regRecord->regNum); + } + nextFixedRef[regRecord->regNum] = nextLocation; +} + +regMaskTP LinearScan::getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition) +{ + assert(currentInterval->isConstant && RefTypeIsDef(refPosition->refType)); + regMaskTP candidates = (mask & m_RegistersWithConstants); + regMaskTP result = RBM_NONE; + while (candidates != RBM_NONE) + { + regMaskTP candidateBit = genFindLowestBit(candidates); + candidates &= ~candidateBit; + regNumber regNum = genRegNumFromMask(candidateBit); + RegRecord* physRegRecord = getRegisterRecord(regNum); + if (isMatchingConstant(physRegRecord, refPosition)) + { + result |= candidateBit; + } + } + return result; +} + +void LinearScan::clearNextIntervalRef(regNumber reg, var_types regType) +{ + nextIntervalRef[reg] = MaxLocation; +#ifdef TARGET_ARM + if (regType == TYP_DOUBLE) + { + assert(genIsValidDoubleReg(reg)); + regNumber otherReg = REG_NEXT(reg); + nextIntervalRef[otherReg] = MaxLocation; + } +#endif +} + +void LinearScan::clearSpillCost(regNumber reg, var_types regType) +{ + spillCost[reg] = 0; +#ifdef TARGET_ARM + if (regType == TYP_DOUBLE) + { + assert(genIsValidDoubleReg(reg)); + regNumber otherReg = REG_NEXT(reg); + spillCost[otherReg] = 0; + } +#endif +} + +void LinearScan::updateNextIntervalRef(regNumber reg, Interval* interval) +{ + LsraLocation nextRefLocation = interval->getNextRefLocation(); + nextIntervalRef[reg] = nextRefLocation; +#ifdef TARGET_ARM + if (interval->registerType == TYP_DOUBLE) + { + regNumber otherReg = REG_NEXT(reg); + nextIntervalRef[otherReg] = nextRefLocation; + } +#endif +} + +void LinearScan::updateSpillCost(regNumber reg, Interval* interval) +{ + // An interval can have no recentRefPosition if this is the initial assignment + // of a parameter to its home register. + float cost = (interval->recentRefPosition != nullptr) ? getWeight(interval->recentRefPosition) : 0; + spillCost[reg] = cost; +#ifdef TARGET_ARM + if (interval->registerType == TYP_DOUBLE) + { + regNumber otherReg = REG_NEXT(reg); + spillCost[otherReg] = cost; + } +#endif +} + //------------------------------------------------------------------------ // internalFloatRegCandidates: Return the set of registers that are appropriate // for use as internal float registers. @@ -299,18 +389,12 @@ regMaskTP LinearScan::internalFloatRegCandidates() } } -/***************************************************************************** - * Inline functions for RegRecord - *****************************************************************************/ - -bool RegRecord::isFree() +bool LinearScan::isFree(RegRecord* regRecord) { - return ((assignedInterval == nullptr || !assignedInterval->isActive) && !isBusyUntilNextKill); + return ((regRecord->assignedInterval == nullptr || !regRecord->assignedInterval->isActive) && + !isRegBusy(regRecord->regNum, regRecord->registerType)); } -/***************************************************************************** - * Inline functions for LinearScan - *****************************************************************************/ RegRecord* LinearScan::getRegisterRecord(regNumber regNum) { assert((unsigned)regNum < ArrLen(physRegs)); @@ -408,10 +492,11 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) #endif // DEBUG //------------------------------------------------------------------------ -// conflictingFixedRegReference: Determine whether the current RegRecord has a +// conflictingFixedRegReference: Determine whether the 'reg' has a // fixed register use that conflicts with 'refPosition' // // Arguments: +// regNum - The register of interest // refPosition - The RefPosition of interest // // Return Value: @@ -424,28 +509,29 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) // Assumptions: // 'refPosition is non-null. -bool RegRecord::conflictingFixedRegReference(RefPosition* refPosition) +bool LinearScan::conflictingFixedRegReference(regNumber regNum, RefPosition* refPosition) { // Is this a fixed reference of this register? If so, there is no conflict. if (refPosition->isFixedRefOfRegMask(genRegMask(regNum))) { return false; } + // Otherwise, check for conflicts. // There is a conflict if: - // 1. There is a recent RefPosition on this RegRecord that is at this location, - // except in the case where it is a special "putarg" that is associated with this interval, OR + // 1. There is a recent RefPosition on this RegRecord that is at this location, OR // 2. There is an upcoming RefPosition at this location, or at the next location // if refPosition is a delayed use (i.e. must be kept live through the next/def location). LsraLocation refLocation = refPosition->nodeLocation; - if (recentRefPosition != nullptr && recentRefPosition->refType != RefTypeKill && - recentRefPosition->nodeLocation == refLocation && - (!isBusyUntilNextKill || assignedInterval != refPosition->getInterval())) + RegRecord* regRecord = getRegisterRecord(regNum); + if (isRegInUse(regNum, refPosition->getInterval()->registerType) && + (regRecord->assignedInterval != refPosition->getInterval())) { return true; } - LsraLocation nextPhysRefLocation = getNextRefLocation(); + + LsraLocation nextPhysRefLocation = nextFixedRef[regNum]; if (nextPhysRefLocation == refLocation || (refPosition->delayRegFree && nextPhysRefLocation == (refLocation + 1))) { return true; @@ -637,6 +723,7 @@ LinearScan::LinearScan(Compiler* theCompiler) // Get the value of the environment variable that controls stress for register allocation lsraStressMask = JitConfig.JitStressRegs(); + #if 0 if (lsraStressMask != 0) { @@ -705,7 +792,7 @@ LinearScan::LinearScan(Compiler* theCompiler) #ifdef TARGET_ARM64 availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd); #else - availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); + availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); #endif #if ETW_EBP_FRAMED @@ -2571,112 +2658,6 @@ bool copyOrMoveRegInUse(RefPosition* ref, LsraLocation loc) return false; } -// Determine whether the register represented by "physRegRecord" is available at least -// at the "currentLoc", and if so, return the next location at which it is in use in -// "nextRefLocationPtr" -// -bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, - LsraLocation currentLoc, - LsraLocation* nextRefLocationPtr, - RegisterType regType) -{ - LsraLocation nextRefLocation = MaxLocation; - if (physRegRecord->isBusyUntilNextKill) - { - return false; - } - - RefPosition* nextPhysReference = physRegRecord->getNextRefPosition(); - if (nextPhysReference != nullptr) - { - nextRefLocation = nextPhysReference->nodeLocation; - } - else if (!physRegRecord->isCalleeSave) - { - nextRefLocation = MaxLocation - 1; - } - *nextRefLocationPtr = nextRefLocation; - - Interval* assignedInterval = physRegRecord->assignedInterval; - - if (assignedInterval != nullptr) - { - RefPosition* recentReference = assignedInterval->recentRefPosition; - - // The only case where we have an assignedInterval, but recentReference is null - // is where this interval is live at procedure entry (i.e. an arg register), in which - // case it's still live and its assigned register is not available - // (Note that the ParamDef will be recorded as a recentReference when we encounter - // it, but we will be allocating registers, potentially to other incoming parameters, - // as we process the ParamDefs.) - - if (recentReference == nullptr) - { - return false; - } - - // Is this a copyReg/moveReg? It is if the register assignment doesn't match. - // (the recentReference may not be a copyReg/moveReg, because we could have seen another - // reference since the copyReg/moveReg) - - if (!assignedInterval->isAssignedTo(physRegRecord->regNum)) - { - // If the recentReference is for a different register, it can be reassigned, but - // otherwise don't reassign it if it's still in use. - // (Note that it is unlikely that we have a recent copy or move to a different register, - // where this physRegRecord is still pointing at an earlier copy or move, but it is possible, - // especially in stress modes.) - if ((recentReference->registerAssignment == genRegMask(physRegRecord->regNum)) && - copyOrMoveRegInUse(recentReference, currentLoc)) - { - return false; - } - } - else if (!assignedInterval->isActive && assignedInterval->isConstant) - { - // Treat this as unassigned, i.e. do nothing. - // TODO-CQ: Consider adjusting the heuristics (probably in the caller of this method) - // to avoid reusing these registers. - } - // If this interval isn't active, it's available if it isn't referenced - // at this location (or the previous location, if the recent RefPosition - // is a delayRegFree). - else if (!assignedInterval->isActive && - (recentReference->refType == RefTypeExpUse || recentReference->getRefEndLocation() < currentLoc)) - { - // This interval must have a next reference (otherwise it wouldn't be assigned to this register) - RefPosition* nextReference = recentReference->nextRefPosition; - if (nextReference != nullptr) - { - if (nextReference->nodeLocation < nextRefLocation) - { - *nextRefLocationPtr = nextReference->nodeLocation; - } - } - else - { - assert(recentReference->copyReg && - (recentReference->registerAssignment != genRegMask(physRegRecord->regNum))); - } - } - else - { - return false; - } - } - -#ifdef TARGET_ARM - if (regType == TYP_DOUBLE) - { - // Recurse, but check the other half this time (TYP_FLOAT) - if (!registerIsAvailable(findAnotherHalfRegRec(physRegRecord), currentLoc, nextRefLocationPtr, TYP_FLOAT)) - return false; - } -#endif // TARGET_ARM - - return true; -} - //------------------------------------------------------------------------ // getRegisterType: Get the RegisterType to use for the given RefPosition // @@ -2719,7 +2700,13 @@ RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition* // bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition) { - if ((physRegRecord->assignedInterval == nullptr) || !physRegRecord->assignedInterval->isConstant) + if ((physRegRecord->assignedInterval == nullptr) || !physRegRecord->assignedInterval->isConstant || + (refPosition->refType != RefTypeDef)) + { + return false; + } + Interval* interval = refPosition->getInterval(); + if (!interval->isConstant || !isRegConstant(physRegRecord->regNum, interval->registerType)) { return false; } @@ -2727,90 +2714,94 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo GenTree* otherTreeNode = physRegRecord->assignedInterval->firstRefPosition->treeNode; noway_assert(otherTreeNode != nullptr); - if (refPosition->treeNode->OperGet() == otherTreeNode->OperGet()) + if (refPosition->treeNode->OperGet() != otherTreeNode->OperGet()) + { + return false; + } + + switch (otherTreeNode->OperGet()) { - switch (otherTreeNode->OperGet()) + case GT_CNS_INT: { - case GT_CNS_INT: + ssize_t v1 = refPosition->treeNode->AsIntCon()->IconValue(); + ssize_t v2 = otherTreeNode->AsIntCon()->IconValue(); + if ((v1 == v2) && (varTypeGCtype(refPosition->treeNode) == varTypeGCtype(otherTreeNode) || v1 == 0)) { - ssize_t v1 = refPosition->treeNode->AsIntCon()->IconValue(); - ssize_t v2 = otherTreeNode->AsIntCon()->IconValue(); - if ((v1 == v2) && (varTypeGCtype(refPosition->treeNode) == varTypeGCtype(otherTreeNode) || v1 == 0)) - { #ifdef TARGET_64BIT - // If the constant is negative, only reuse registers of the same type. - // This is because, on a 64-bit system, we do not sign-extend immediates in registers to - // 64-bits unless they are actually longs, as this requires a longer instruction. - // This doesn't apply to a 32-bit system, on which long values occupy multiple registers. - // (We could sign-extend, but we would have to always sign-extend, because if we reuse more - // than once, we won't have access to the instruction that originally defines the constant). - if ((refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()) || (v1 >= 0)) + // If the constant is negative, only reuse registers of the same type. + // This is because, on a 64-bit system, we do not sign-extend immediates in registers to + // 64-bits unless they are actually longs, as this requires a longer instruction. + // This doesn't apply to a 32-bit system, on which long values occupy multiple registers. + // (We could sign-extend, but we would have to always sign-extend, because if we reuse more + // than once, we won't have access to the instruction that originally defines the constant). + if ((refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()) || (v1 >= 0)) #endif // TARGET_64BIT - { - return true; - } - } - break; - } - case GT_CNS_DBL: - { - // For floating point constants, the values must be identical, not simply compare - // equal. So we compare the bits. - if (refPosition->treeNode->AsDblCon()->isBitwiseEqual(otherTreeNode->AsDblCon()) && - (refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet())) { return true; } - break; } - default: - break; + break; } + case GT_CNS_DBL: + { + // For floating point constants, the values must be identical, not simply compare + // equal. So we compare the bits. + if (refPosition->treeNode->AsDblCon()->isBitwiseEqual(otherTreeNode->AsDblCon()) && + (refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet())) + { + return true; + } + break; + } + default: + break; } + return false; } //------------------------------------------------------------------------ -// tryAllocateFreeReg: Find a free register that satisfies the requirements for refPosition, -// and takes into account the preferences for the given Interval +// allocateReg: Find a register that satisfies the requirements for refPosition, +// taking into account the preferences for the given Interval, +// and possibly spilling a lower weight Interval. // // Arguments: // currentInterval: The interval for the current allocation // refPosition: The RefPosition of the current Interval for which a register is being allocated -// // Return Value: -// The regNumber, if any, allocated to the RefPositon. Returns REG_NA if no free register is found. +// The regNumber, if any, allocated to the RefPosition. +// Returns REG_NA only if 'refPosition->RegOptional()' is true, and there are +// no free registers and no registers containing lower-weight Intervals that can be spilled. // // Notes: -// TODO-CQ: Consider whether we need to use a different order for tree temps than for vars, as -// reg predict does - -static const regNumber lsraRegOrder[] = {REG_VAR_ORDER}; -const unsigned lsraRegOrderSize = ArrLen(lsraRegOrder); -static const regNumber lsraRegOrderFlt[] = {REG_VAR_ORDER_FLT}; -const unsigned lsraRegOrderFltSize = ArrLen(lsraRegOrderFlt); +// This method will prefer to allocate a free register, but if none are available, +// it will look for a lower-weight Interval to spill. +// Weight and farthest distance of next reference are used to determine whether an Interval +// currently occupying a register should be spilled. It will be spilled either: +// - At its most recent RefPosition, if that is within the current block, OR +// - At the boundary between the previous block and this one +// +// To select a ref position for spilling. +// - If refPosition->RegOptional() == false +// The RefPosition chosen for spilling will be the lowest weight +// of all and if there is is more than one ref position with the +// same lowest weight, among them choses the one with farthest +// distance to its next reference. +// +// - If refPosition->RegOptional() == true +// The ref position chosen for spilling will not only be lowest weight +// of all but also has a weight lower than 'refPosition'. If there is +// no such ref position, no register will be allocated. +// -regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* refPosition) +regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition) { regNumber foundReg = REG_NA; - RegisterType regType = getRegisterType(currentInterval, refPosition); - const regNumber* regOrder; - unsigned regOrderSize; - if (useFloatReg(regType)) - { - regOrder = lsraRegOrderFlt; - regOrderSize = lsraRegOrderFltSize; - } - else - { - regOrder = lsraRegOrder; - regOrderSize = lsraRegOrderSize; - } + RegisterType regType = getRegisterType(currentInterval, refPosition); LsraLocation currentLocation = refPosition->nodeLocation; RefPosition* nextRefPos = refPosition->nextRefPosition; - LsraLocation nextLocation = (nextRefPos == nullptr) ? currentLocation : nextRefPos->nodeLocation; regMaskTP candidates = refPosition->registerAssignment; regMaskTP preferences = currentInterval->registerPreferences; @@ -2838,8 +2829,9 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* // If there is another fixed reference to this register before the use, change the candidates // on this RefPosition to include that of nextRefPos. - if (currFixedRegRefPosition->nextRefPosition != nullptr && - currFixedRegRefPosition->nextRefPosition->nodeLocation <= nextRefPos->getRefEndLocation()) + RefPosition* nextFixedRegRefPosition = defRegRecord->getNextRefPosition(); + if (nextFixedRegRefPosition != nullptr && + nextFixedRegRefPosition->nodeLocation <= nextRefPos->getRefEndLocation()) { candidates |= nextRefPos->registerAssignment; if (preferences == refPosition->registerAssignment) @@ -2909,9 +2901,15 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* regMaskTP newRelatedPreferences = thisRelatedPreferences & relatedPreferences; if (newRelatedPreferences != RBM_NONE && (!avoidByteRegs || thisRelatedPreferences != RBM_BYTE_REGS)) { + // TODO-CQ: The following isFree() check doesn't account for the possibility that there's an + // assignedInterval whose recentRefPosition was delayFree. It also fails to account for + // the TYP_DOUBLE case on ARM. It would be better to replace the call to isFree with + // isRegAvailable(genRegNumFromMask(newRelatedPreferences), regType)), but this is retained + // to achieve zero diffs. + // bool thisIsSingleReg = isSingleRegister(newRelatedPreferences); if (!thisIsSingleReg || (finalRelatedInterval->isLocalVar && - getRegisterRecord(genRegNumFromMask(newRelatedPreferences))->isFree())) + isFree(getRegisterRecord(genRegNumFromMask(newRelatedPreferences))))) { relatedPreferences = newRelatedPreferences; // If this Interval has a downstream def without a single-register preference, continue to iterate. @@ -2994,291 +2992,579 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* // position, which is one location past the use (getRefEndLocation() takes care of this). rangeEndLocation = rangeEndRefPosition->getRefEndLocation(); LsraLocation lastLocation = lastRefPosition->getRefEndLocation(); - regNumber prevReg = REG_NA; - - if (currentInterval->assignedReg) - { - bool useAssignedReg = false; - // This was an interval that was previously allocated to the given - // physical register, and we should try to allocate it to that register - // again, if possible and reasonable. - // Use it preemptively (i.e. before checking other available regs) - // only if it is preferred and available. - - RegRecord* regRec = currentInterval->assignedReg; - prevReg = regRec->regNum; - regMaskTP prevRegBit = genRegMask(prevReg); - - // Is it in the preferred set of regs? - if ((prevRegBit & preferences) != RBM_NONE) - { - // Is it currently available? - LsraLocation nextPhysRefLoc; - if (registerIsAvailable(regRec, currentLocation, &nextPhysRefLoc, currentInterval->registerType)) - { - // If the register is next referenced at this location, only use it if - // this has a fixed reg requirement (i.e. this is the reference that caused - // the FixedReg ref to be created) - - if (!regRec->conflictingFixedRegReference(refPosition)) - { - useAssignedReg = true; - } - } - } - if (useAssignedReg) - { - regNumber foundReg = prevReg; - assignPhysReg(regRec, currentInterval); - refPosition->registerAssignment = genRegMask(foundReg); - return foundReg; - } - else - { - // Don't keep trying to allocate to this register - currentInterval->assignedReg = nullptr; - } - } + RegRecord* prevRegRec = currentInterval->assignedReg; //------------------------------------------------------------------------- // Register Selection - RegRecord* availablePhysRegInterval = nullptr; - bool unassignInterval = false; + RegRecord* availablePhysRegRecord = nullptr; - // Each register will receive a score which is the sum of the scoring criteria below. + // Each register will receive a score which takes into account the scoring criteria below. // These were selected on the assumption that they will have an impact on the "goodness" // of a register selection, and have been tuned to a certain extent by observing the impact - // of the ordering on asmDiffs. However, there is probably much more room for tuning, + // of the ordering on asmDiffs. However, there is much more room for tuning, // and perhaps additional criteria. // - // These are FLAGS (bits) so that we can easily order them and add them together. - // If the scores are equal, but one covers more of the current interval's range, - // then it wins. Otherwise, the one encountered earlier in the regOrder wins. - enum RegisterScore { - VALUE_AVAILABLE = 0x40, // It is a constant value that is already in an acceptable register. - COVERS = 0x20, // It is in the interval's preference set and it covers the entire lifetime. - OWN_PREFERENCE = 0x10, // It is in the preference set of this interval. - COVERS_RELATED = 0x08, // It is in the preference set of the related interval and covers the entire lifetime. - RELATED_PREFERENCE = 0x04, // It is in the preference set of the related interval. - CALLER_CALLEE = 0x02, // It is in the right "set" for the interval (caller or callee-save). - UNASSIGNED = 0x01, // It is not currently assigned to an inactive interval. + FREE = 0x10000, // It is not currently assigned to an *active* interval + + // These are the original criteria for comparing registers that are free. + CONST_AVAILABLE = 0x8000, // It is a constant value that is already in an acceptable register. + THIS_ASSIGNED = 0x4000, // It is in the interval's preference set and it is already assigned to this interval. + COVERS = 0x2000, // It is in the interval's preference set and it covers the current range. + OWN_PREFERENCE = 0x1000, // It is in the preference set of this interval. + COVERS_RELATED = 0x0800, // It is in the preference set of the related interval and covers its entire lifetime. + RELATED_PREFERENCE = 0x0400, // It is in the preference set of the related interval. + CALLER_CALLEE = 0x0200, // It is in the right "set" for the interval (caller or callee-save). + UNASSIGNED = 0x0100, // It is not currently assigned to any (active or inactive) interval + COVERS_FULL = 0x0080, // It covers the full range of the interval from current position to the end. + BEST_FIT = 0x0040, // The available range is the closest match to the full range of the interval. + IS_PREV_REG = 0x0020, // This register was previously assigned to the interval. + REG_ORDER = 0x0010, // Tie-breaker + + // These are the original criteria for comparing registers that are in use. + SPILL_COST = 0x0008, // It has the lowest cost of all the candidates. + FAR_NEXT_REF = 0x0004, // It has a farther next reference than the best candidate thus far. + PREV_REG_OPT = 0x0002, // The previous RefPosition of its current assigned interval is RegOptional. + + // TODO-CQ: Consider using REG_ORDER as a tie-breaker even for busy registers. + REG_NUM = 0x0001, // It has a lower register number. }; - int bestScore = 0; - - // Compute the best possible score so we can stop looping early if we find it. - // TODO-Throughput: At some point we may want to short-circuit the computation of each score, but - // probably not until we've tuned the order of these criteria. At that point, - // we'll need to avoid the short-circuit if we've got a stress option to reverse - // the selection. - int bestPossibleScore = COVERS + UNASSIGNED + OWN_PREFERENCE + CALLER_CALLEE; - if (relatedPreferences != RBM_NONE) - { - bestPossibleScore |= RELATED_PREFERENCE + COVERS_RELATED; - } - - LsraLocation bestLocation = MinLocation; + LsraLocation bestLocation = MinLocation; + LsraLocation farRefLocation = MinLocation; - // In non-debug builds, this will simply get optimized away - bool reverseSelect = false; -#ifdef DEBUG - reverseSelect = doReverseSelect(); -#endif // DEBUG + // These are used in the post-selection updates, and must be set for any selection. + regMaskTP freeCandidates = RBM_NONE; + regMaskTP matchingConstants = RBM_NONE; + regMaskTP unassignedSet = RBM_NONE; - // An optimization for the common case where there is only one candidate - - // avoid looping over all the other registers + // These must be set prior to their use in the associated heuristics. + unsigned int thisSpillWeight = 0; - regNumber singleReg = REG_NA; + // We'll set this to short-circuit remaining heuristics when we have a single candidate. + bool found = false; - if (genMaxOneBit(candidates)) + // Is this a fixedReg? + regMaskTP fixedRegMask = RBM_NONE; + if (refPosition->isFixedRegRef) { - regOrderSize = 1; - singleReg = genRegNumFromMask(candidates); - regOrder = &singleReg; + assert(genMaxOneBit(refPosition->registerAssignment)); + fixedRegMask = refPosition->registerAssignment; + if (candidates == refPosition->registerAssignment) + { + found = true; + if (nextIntervalRef[genRegNumFromMask(candidates)] > lastLocation) + { + unassignedSet = candidates; + } + } } - for (unsigned i = 0; i < regOrderSize && (candidates != RBM_NONE); i++) + // Eliminate candidates that are in-use or busy. + if (!found) { - regNumber regNum = regOrder[i]; - regMaskTP candidateBit = genRegMask(regNum); + regMaskTP busyRegs = regsBusyUntilKill | regsInUseThisLocation; + candidates &= ~busyRegs; - if (!(candidates & candidateBit)) + // Also eliminate as busy any register with a conflicting fixed reference at this or + // the next location. + // Note that this will eliminate the fixedReg, if any, but we'll add it back below. + regMaskTP checkConflictMask = candidates & fixedRegs; + while (checkConflictMask != RBM_NONE) { - continue; - } - - candidates &= ~candidateBit; + regMaskTP checkConflictBit = genFindLowestBit(checkConflictMask); + checkConflictMask &= ~checkConflictBit; + regNumber checkConflictReg = genRegNumFromMask(checkConflictBit); + LsraLocation checkConflictLocation = nextFixedRef[checkConflictReg]; - RegRecord* physRegRecord = getRegisterRecord(regNum); - - int score = 0; - LsraLocation nextPhysRefLocation = MaxLocation; - - // By chance, is this register already holding this interval, as a copyReg or having - // been restored as inactive after a kill? - if (physRegRecord->assignedInterval == currentInterval) - { - availablePhysRegInterval = physRegRecord; - unassignInterval = false; - break; + if ((checkConflictLocation == currentLocation) || + (refPosition->delayRegFree && (checkConflictLocation == (currentLocation + 1)))) + { + candidates &= ~checkConflictBit; + } } + candidates |= fixedRegMask; + found = isSingleRegister(candidates); + } - // Find the next RefPosition of the physical register - if (!registerIsAvailable(physRegRecord, currentLocation, &nextPhysRefLocation, regType)) + // By chance, is prevRegRec already holding this interval, as a copyReg or having + // been restored as inactive after a kill? + // NOTE: this is not currently considered one of the selection criteria - it always wins + // if it is the assignedInterval of 'prevRegRec'. + regMaskTP prevRegBit = RBM_NONE; + if (!found && (prevRegRec != nullptr)) + { + prevRegBit = genRegMask(prevRegRec->regNum); + if ((prevRegRec->assignedInterval == currentInterval) && ((candidates & prevRegBit) != RBM_NONE)) { - continue; + candidates = prevRegBit; + found = true; } + } - // If the register is next referenced at this location, only use it if - // this has a fixed reg requirement (i.e. this is the reference that caused - // the FixedReg ref to be created) + if (!found && (candidates == RBM_NONE)) + { + assert(refPosition->RegOptional()); + currentInterval->assignedReg = nullptr; + return REG_NA; + } - if (physRegRecord->conflictingFixedRegReference(refPosition)) - { - continue; - } + // TODO-Cleanup: Previously, the "reverseSelect" stress mode reversed the order of the heuristics. + // It needs to be re-engineered with this refactoring. + // In non-debug builds, this will simply get optimized away + bool reverseSelect = false; +#ifdef DEBUG + reverseSelect = doReverseSelect(); +#endif // DEBUG - // If this is a definition of a constant interval, check to see if its value is already in this register. - if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType) && - isMatchingConstant(physRegRecord, refPosition)) - { - score |= VALUE_AVAILABLE; - } + // At this point we have determined all the candidates that CAN be considered, + // and we'll begin to apply the heuristics in order. + registerSelector selector; + selector.candidates = candidates; + selector.score = 0; +#ifdef TARGET_ARM + selector.regType = regType; +#endif // TARGET_ARM - // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that - // we don't think it isn't covering the live range. - // This doesn't handle the case where earlier RefPositions for this Interval are also - // FixedRefs of this regNum, but at least those are only interesting in the case where those - // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg. - if (nextPhysRefLocation == rangeEndLocation && rangeEndRefPosition->isFixedRefOfReg(regNum)) - { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval)); - nextPhysRefLocation++; - } + freeCandidates = getFreeCandidates(candidates, regType); - if ((candidateBit & preferences) != RBM_NONE) - { - score |= OWN_PREFERENCE; - if (nextPhysRefLocation > rangeEndLocation) - { - score |= COVERS; - } - } - if ((candidateBit & relatedPreferences) != RBM_NONE) + // Apply the FREE heuristic. + if (freeCandidates == RBM_NONE) + { + // We won't spill if this refPosition is not an actual ref. + if (!refPosition->IsActualRef()) { - score |= RELATED_PREFERENCE; - if (nextPhysRefLocation > relatedLastLocation) - { - score |= COVERS_RELATED; - } + currentInterval->assignedReg = nullptr; + return REG_NA; } + } + else if (!found) + { + found = selector.applySelection(FREE, freeCandidates); + } - // If we had a fixed-reg def of a reg that will be killed before the use, prefer it to any other registers - // with the same score. (Note that we haven't changed the original registerAssignment on the RefPosition). - // Overload the RELATED_PREFERENCE value. - else if (candidateBit == refPosition->registerAssignment) + // Apply the CONST_AVAILABLE (matching constant) heuristic. Only applies if we have freeCandidates. + // Note that we always need to define the 'matchingConstants' set. + if (freeCandidates != RBM_NONE) + { + if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType)) { - score |= RELATED_PREFERENCE; + matchingConstants = getMatchingConstants(selector.candidates, currentInterval, refPosition); + found = selector.applySelection(CONST_AVAILABLE, matchingConstants); } + } - if ((candidateBit & callerCalleePrefs) != RBM_NONE) - { - score |= CALLER_CALLEE; - } + // Apply the THIS_ASSIGNED heuristic. Only applies if we have freeCandidates. + if (!found && (prevRegRec != nullptr) && (freeCandidates != RBM_NONE)) + { + found = selector.applySelection(THIS_ASSIGNED, freeCandidates & preferences & prevRegBit); + } - // The register is considered unassigned if it has no assignedInterval, OR - // if its next reference is beyond the range of this interval. - if (!isAssigned(physRegRecord, lastLocation ARM_ARG(currentInterval->registerType))) + // Compute the sets for COVERS, OWN_PREFERENCE, COVERS_RELATED, COVERS_FULL and UNASSIGNED together, + // as they all require similar computation. + regMaskTP coversSet = RBM_NONE; + regMaskTP preferenceSet = RBM_NONE; + regMaskTP coversRelatedSet = RBM_NONE; + regMaskTP coversFullSet = RBM_NONE; + if (freeCandidates != RBM_NONE) + { + preferenceSet = (selector.candidates & preferences); + regMaskTP coversCandidates = (preferenceSet == RBM_NONE) ? selector.candidates : preferenceSet; + for (; coversCandidates != RBM_NONE;) { - score |= UNASSIGNED; - } - - bool foundBetterCandidate = false; + regMaskTP coversCandidateBit = genFindLowestBit(coversCandidates); + coversCandidates &= ~coversCandidateBit; + regNumber coversCandidateRegNum = genRegNumFromMask(coversCandidateBit); - if (score > bestScore) - { - foundBetterCandidate = true; - } - else if (score == bestScore) - { - // Prefer a register that covers the range. - if (bestLocation <= lastLocation) + // If we have a single candidate we don't need to compute the preference-related sets, but we + // do need to compute the unassignedSet. + if (!found) { - if (nextPhysRefLocation > bestLocation) + // Find the next RefPosition of the register. + LsraLocation nextIntervalLocation = getNextIntervalRef(coversCandidateRegNum, regType); + LsraLocation nextPhysRefLocation = getNextFixedRef(coversCandidateRegNum, regType); + LsraLocation coversCandidateLocation = Min(nextPhysRefLocation, nextIntervalLocation); + + // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that + // we don't think it isn't covering the live range. + // This doesn't handle the case where earlier RefPositions for this Interval are also + // FixedRefs of this regNum, but at least those are only interesting in the case where those + // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg. + if (coversCandidateLocation == rangeEndLocation && + rangeEndRefPosition->isFixedRefOfReg(coversCandidateRegNum)) { - foundBetterCandidate = true; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval)); + coversCandidateLocation++; } - } - // If both cover the range, prefer a register that is killed sooner (leaving the longer range register - // available). If both cover the range and also getting killed at the same location, prefer the one which - // is same as previous assignment. - else if (nextPhysRefLocation > lastLocation) - { - if (nextPhysRefLocation < bestLocation) + if (coversCandidateLocation > rangeEndLocation) + { + coversSet |= coversCandidateBit; + } + if ((coversCandidateBit & relatedPreferences) != RBM_NONE) + { + if (coversCandidateLocation > relatedInterval->lastRefPosition->nodeLocation) + { + coversRelatedSet |= coversCandidateBit; + } + } + else if (coversCandidateBit == refPosition->registerAssignment) { - foundBetterCandidate = true; + // If we had a fixed-reg def of a reg that will be killed before the use, prefer it to any other + // registers with the same score. (Note that we haven't changed the original registerAssignment + // on the RefPosition). + // Overload the RELATED_PREFERENCE value. + // TODO-CQ: Consider if this should be split out. + coversRelatedSet |= coversCandidateBit; } - else if (nextPhysRefLocation == bestLocation && prevReg == regNum) + // Does this cover the full range of the interval? + if (coversCandidateLocation > lastLocation) { - foundBetterCandidate = true; + coversFullSet |= coversCandidateBit; } } + // The register is considered unassigned if it has no assignedInterval, OR + // if its next reference is beyond the range of this interval. + if (nextIntervalRef[coversCandidateRegNum] > lastLocation) + { + unassignedSet |= coversCandidateBit; + } } + } -#ifdef DEBUG - if (doReverseSelect() && bestScore != 0) - { - foundBetterCandidate = !foundBetterCandidate; - } -#endif // DEBUG - - if (foundBetterCandidate) + // Apply the COVERS heuristic. + if (!found) + { + found = selector.applySelection(COVERS, coversSet & preferenceSet); + } + + // Apply the OWN_PREFERENCE heuristic. + // Note that 'preferenceSet' already includes only freeCandidates. + if (!found) + { + assert((preferenceSet & freeCandidates) == preferenceSet); + found = selector.applySelection(OWN_PREFERENCE, preferenceSet); + } + + // Apply the COVERS_RELATED heuristic. + if (!found) + { + assert((coversRelatedSet & freeCandidates) == coversRelatedSet); + found = selector.applySelection(COVERS_RELATED, coversRelatedSet); + } + + // Apply the RELATED_PREFERENCE heuristic. + if (!found) + { + found = selector.applySelection(RELATED_PREFERENCE, relatedPreferences & freeCandidates); + } + + // Apply the CALLER_CALLEE heuristic. + if (!found) + { + found = selector.applySelection(CALLER_CALLEE, callerCalleePrefs & freeCandidates); + } + + // Apply the UNASSIGNED heuristic. + if (!found) + { + found = selector.applySelection(UNASSIGNED, unassignedSet); + } + + // Apply the COVERS_FULL heuristic. + if (!found) + { + assert((coversFullSet & freeCandidates) == coversFullSet); + found = selector.applySelection(COVERS_FULL, coversFullSet); + } + + // Apply the BEST_FIT heuristic. Only applies if we have freeCandidates. + if (!found && (freeCandidates != RBM_NONE)) + { + regMaskTP bestFitSet = RBM_NONE; + // If the best score includes COVERS_FULL, pick the one that's killed soonest. + // If none cover the full range, the BEST_FIT is the one that's killed later. + bool earliestIsBest = ((selector.score & COVERS_FULL) != 0); + LsraLocation bestFitLocation = earliestIsBest ? MaxLocation : MinLocation; + for (regMaskTP bestFitCandidates = selector.candidates; bestFitCandidates != RBM_NONE;) { - bestLocation = nextPhysRefLocation; - availablePhysRegInterval = physRegRecord; - unassignInterval = true; - bestScore = score; + regMaskTP bestFitCandidateBit = genFindLowestBit(bestFitCandidates); + bestFitCandidates &= ~bestFitCandidateBit; + regNumber bestFitCandidateRegNum = genRegNumFromMask(bestFitCandidateBit); + + // Find the next RefPosition of the register. + LsraLocation nextIntervalLocation = getNextIntervalRef(bestFitCandidateRegNum, regType); + LsraLocation nextPhysRefLocation = getNextFixedRef(bestFitCandidateRegNum, regType); + nextPhysRefLocation = Min(nextPhysRefLocation, nextIntervalLocation); + // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that + // we don't think it isn't covering the live range. + // This doesn't handle the case where earlier RefPositions for this Interval are also + // FixedRefs of this regNum, but at least those are only interesting in the case where those + // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg. + // TODO: This duplicates code in an earlier loop, and is basically here to duplicate previous + // behavior; see if we can avoid this. + if (nextPhysRefLocation == rangeEndLocation && rangeEndRefPosition->isFixedRefOfReg(bestFitCandidateRegNum)) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval)); + nextPhysRefLocation++; + } + + if (nextPhysRefLocation == bestFitLocation) + { + bestFitSet |= bestFitCandidateBit; + } + else + { + bool isBetter = false; + if (nextPhysRefLocation > lastLocation) + { + // This covers the full range; favor it if the other doesn't, or if it's a closer match. + if ((bestFitLocation <= lastLocation) || (nextPhysRefLocation < bestFitLocation)) + { + isBetter = true; + } + } + else + { + // This doesn't cover the full range; favor it if the other doesn't either, but this ends later. + if ((bestFitLocation <= lastLocation) && (nextPhysRefLocation > bestFitLocation)) + { + isBetter = true; + } + } + if (isBetter) + { + bestFitSet = bestFitCandidateBit; + bestFitLocation = nextPhysRefLocation; + } + } } + assert(bestFitSet != RBM_NONE); + found = selector.applySelection(BEST_FIT, bestFitSet); + } - // there is no way we can get a better score so break out - if (!reverseSelect && score == bestPossibleScore && bestLocation == rangeEndLocation + 1) + // Apply the IS_PREV_REG heuristic. TODO: Check if Only applies if we have freeCandidates. + // Oddly, the previous heuristics only considered this if it covered the range. + if ((prevRegRec != nullptr) && ((selector.score & COVERS_FULL) != 0)) + { + found = selector.applySingleRegSelection(IS_PREV_REG, prevRegBit); + } + + // Apply the REG_ORDER heuristic. Only applies if we have freeCandidates. + if (!found && (freeCandidates != RBM_NONE)) + { + // This will always result in a single candidate. That is, it is the tie-breaker + // for free candidates, and doesn't make sense as anything other than the last + // heuristic for free registers. + unsigned lowestRegOrder = UINT_MAX; + regMaskTP lowestRegOrderBit = RBM_NONE; + for (regMaskTP regOrderCandidates = selector.candidates; regOrderCandidates != RBM_NONE;) { - break; + regMaskTP regOrderCandidateBit = genFindLowestBit(regOrderCandidates); + regOrderCandidates &= ~regOrderCandidateBit; + regNumber regOrderCandidateRegNum = genRegNumFromMask(regOrderCandidateBit); + unsigned thisRegOrder = getRegisterRecord(regOrderCandidateRegNum)->regOrder; + if (thisRegOrder < lowestRegOrder) + { + lowestRegOrder = thisRegOrder; + lowestRegOrderBit = regOrderCandidateBit; + } } + assert(lowestRegOrderBit != RBM_NONE); + found = selector.applySingleRegSelection(REG_ORDER, lowestRegOrderBit); } - if (availablePhysRegInterval != nullptr) + // The set of registers with the lowest spill weight. + regMaskTP lowestCostSpillSet = RBM_NONE; + // Apply the SPILL_COST heuristic and eliminate regs that can't be spilled. + if (!found) { - if (unassignInterval && isAssigned(availablePhysRegInterval ARM_ARG(currentInterval->registerType))) + // The spill weight for 'refPosition' (the one we're allocating now). + float thisSpillWeight = getWeight(refPosition); + // The spill weight for the best candidate we've found so far. + float bestSpillWeight = FloatingPointUtils::infinite_float(); + // True if we found registers with lower spill weight than this refPosition. + bool foundLowerSpillWeight = false; + + for (regMaskTP spillCandidates = selector.candidates; spillCandidates != RBM_NONE;) { - Interval* const intervalToUnassign = availablePhysRegInterval->assignedInterval; - unassignPhysReg(availablePhysRegInterval ARM_ARG(currentInterval->registerType)); + regMaskTP spillCandidateBit = genFindLowestBit(spillCandidates); + spillCandidates &= ~spillCandidateBit; + regNumber spillCandidateRegNum = genRegNumFromMask(spillCandidateBit); + RegRecord* spillCandidateRegRecord = &physRegs[spillCandidateRegNum]; - if ((bestScore & VALUE_AVAILABLE) != 0 && intervalToUnassign != nullptr) + // Can and should the interval in this register be spilled for this one, + // if we don't find a better alternative? + if ((getNextIntervalRef(spillCandidateRegNum, regType) == currentLocation) && + !spillCandidateRegRecord->assignedInterval->getNextRefPosition()->RegOptional()) { - assert(intervalToUnassign->isConstant); - refPosition->treeNode->SetReuseRegVal(); + continue; } - // If we considered this "unassigned" because this interval's lifetime ends before - // the next ref, remember it. - else if ((bestScore & UNASSIGNED) != 0 && intervalToUnassign != nullptr) + if (!isSpillCandidate(currentInterval, refPosition, spillCandidateRegRecord)) + { + continue; + } + + float currentSpillWeight = spillCost[spillCandidateRegNum]; +#ifdef TARGET_ARM + if (currentInterval->registerType == TYP_DOUBLE) { - updatePreviousInterval(availablePhysRegInterval, intervalToUnassign, intervalToUnassign->registerType); + currentSpillWeight = max(currentSpillWeight, spillCost[REG_NEXT(spillCandidateRegNum)]); + } +#endif + if (currentSpillWeight < bestSpillWeight) + { + bestSpillWeight = currentSpillWeight; + lowestCostSpillSet = spillCandidateBit; + } + else if (currentSpillWeight == bestSpillWeight) + { + lowestCostSpillSet |= spillCandidateBit; } } - else + + // We won't spill if this refPosition is RegOptional() and we have no candidates + // with a lower spill cost. + if ((bestSpillWeight >= thisSpillWeight) && refPosition->RegOptional()) + { + currentInterval->assignedReg = nullptr; + return REG_NA; + } + } + + if (!found) + { + // We must have at least one with the lowest spill cost. + assert(lowestCostSpillSet != RBM_NONE); + found = selector.applySelection(SPILL_COST, lowestCostSpillSet); + } + + // Apply the FAR_NEXT_REF heuristic. + if (!found) + { + LsraLocation farthestLocation = MinLocation; + regMaskTP farthestSet = RBM_NONE; + for (regMaskTP farthestCandidates = selector.candidates; farthestCandidates != RBM_NONE;) + { + regMaskTP farthestCandidateBit = genFindLowestBit(farthestCandidates); + farthestCandidates &= ~farthestCandidateBit; + regNumber farthestCandidateRegNum = genRegNumFromMask(farthestCandidateBit); + + // Find the next RefPosition of the register. + LsraLocation nextIntervalLocation = nextIntervalRef[farthestCandidateRegNum]; + LsraLocation nextPhysRefLocation = Min(nextFixedRef[farthestCandidateRegNum], nextIntervalLocation); + if (nextPhysRefLocation == farthestLocation) + { + farthestSet |= farthestCandidateBit; + } + else if (nextPhysRefLocation > farthestLocation) + { + farthestSet = farthestCandidateBit; + farthestLocation = nextPhysRefLocation; + } + } + // We must have at least one with the lowest spill cost. + assert(farthestSet != RBM_NONE); + found = selector.applySelection(FAR_NEXT_REF, farthestSet); + } + + // Apply the PREV_REG_OPT heuristic. + if (!found) + { + regMaskTP prevRegOptSet = RBM_NONE; + for (regMaskTP prevRegOptCandidates = selector.candidates; prevRegOptCandidates != RBM_NONE;) { - assert((bestScore & VALUE_AVAILABLE) == 0); + regMaskTP prevRegOptCandidateBit = genFindLowestBit(prevRegOptCandidates); + prevRegOptCandidates &= ~prevRegOptCandidateBit; + regNumber prevRegOptCandidateRegNum = genRegNumFromMask(prevRegOptCandidateBit); + Interval* assignedInterval = physRegs[prevRegOptCandidateRegNum].assignedInterval; + // The assigned should be non-null, and should have a recentRefPosition, however since + // this is a heuristic, we don't want a fatal error, so we just assert (not noway_assert). + if ((assignedInterval != nullptr) && (assignedInterval->recentRefPosition != nullptr)) + { + if (assignedInterval->recentRefPosition->reload && assignedInterval->recentRefPosition->RegOptional()) + { + // TODO-Cleanup: Previously, we always used the highest regNum with a previous regOptional + // RefPosition, which is not really consistent with the way other selection criteria are applied. + // should probably be: prevRegOptSet |= prevRegOptCandidateBit; + prevRegOptSet = prevRegOptCandidateBit; + } + } + else + { + assert(!"Spill candidate has no assignedInterval recentRefPosition"); + } } - assignPhysReg(availablePhysRegInterval, currentInterval); - foundReg = availablePhysRegInterval->regNum; - regMaskTP foundRegMask = genRegMask(foundReg); - refPosition->registerAssignment = foundRegMask; + found = selector.applySelection(PREV_REG_OPT, prevRegOptSet); + } + + // Apply the REG_NUM heuristic. + if (!found) + { + found = selector.applySingleRegSelection(REG_NUM, genFindLowestBit(selector.candidates)); } + assert(found && isSingleRegister(selector.candidates)); + regMaskTP foundRegBit = selector.candidates; + foundReg = genRegNumFromMask(foundRegBit); + availablePhysRegRecord = getRegisterRecord(foundReg); + Interval* assignedInterval = availablePhysRegRecord->assignedInterval; + if ((assignedInterval != currentInterval) && isAssigned(availablePhysRegRecord ARM_ARG(regType))) + { + if ((foundRegBit & freeCandidates) == RBM_NONE) + { + // We're spilling. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef TARGET_ARM + if (currentInterval->registerType == TYP_DOUBLE) + { + assert(genIsValidDoubleReg(availablePhysRegRecord->regNum)); + unassignDoublePhysReg(availablePhysRegRecord); + } + else if (assignedInterval->registerType == TYP_DOUBLE) + { + // Make sure we spill both halves of the double register. + assert(genIsValidDoubleReg(assignedInterval->assignedReg->regNum)); + unassignPhysReg(assignedInterval->assignedReg, assignedInterval->recentRefPosition); + } + else +#endif + { + unassignPhysReg(availablePhysRegRecord, assignedInterval->recentRefPosition); + } + } + else + { + // If we considered this "unassigned" because this interval's lifetime ends before + // the next ref, remember it. + // For historical reasons (due to former short-circuiting of this case), if we're reassigning + // the current interval to a previous assignment, we don't remember the previous interval. + // Note that we need to compute this condition before calling unassignPhysReg, which wil reset + // assignedInterval->physReg. + regMaskTP freePrefCandidates = (selector.candidates & preferences & freeCandidates); + bool wasThisAssigned = ((prevRegBit & preferences) == foundRegBit); + bool wasAssigned = (((foundRegBit & unassignedSet) != RBM_NONE) && !wasThisAssigned && + (assignedInterval != nullptr) && (assignedInterval->physReg == foundReg)); + unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); + if ((matchingConstants & foundRegBit) != RBM_NONE) + { + assert(assignedInterval->isConstant); + refPosition->treeNode->SetReuseRegVal(); + } + else if (wasAssigned) + { + updatePreviousInterval(availablePhysRegRecord, assignedInterval, assignedInterval->registerType); + } + else + { + assert((selector.score & CONST_AVAILABLE) == 0); + } + } + } + assignPhysReg(availablePhysRegRecord, currentInterval); + refPosition->registerAssignment = foundRegBit; return foundReg; } @@ -3288,36 +3574,52 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* // Arguments: // physRegRecord - reg to spill // refLocation - Location of RefPosition where this register will be spilled -// recentAssignedRefWeight - Weight of recent assigned RefPosition which will be determined in this function -// farthestRefPosWeight - Current farthestRefPosWeight at allocateBusyReg() // // Return Value: // True - if we can spill physRegRecord // False - otherwise // -// Note: This helper is designed to be used only from allocateBusyReg() and canSpillDoubleReg() -// -bool LinearScan::canSpillReg(RegRecord* physRegRecord, - LsraLocation refLocation, - BasicBlock::weight_t* recentAssignedRefWeight) +bool LinearScan::canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation) { assert(physRegRecord->assignedInterval != nullptr); RefPosition* recentAssignedRef = physRegRecord->assignedInterval->recentRefPosition; if (recentAssignedRef != nullptr) { - if (isRefPositionActive(recentAssignedRef, refLocation)) - { - // We can't spill a register that's active at the current location - return false; - } - - // We don't prefer to spill a register if the weight of recentAssignedRef > weight - // of the spill candidate found so far. We would consider spilling a greater weight - // ref position only if the refPosition being allocated must need a reg. - *recentAssignedRefWeight = getWeight(recentAssignedRef); + // We can't spill a register that's active at the current location. + // We should already have determined this with isRegBusy before calling this method. + assert(!isRefPositionActive(recentAssignedRef, refLocation)); + return true; } - return true; + // recentAssignedRef can only be null if this is a parameter that has not yet been + // moved to a register (or stack), in which case we can't spill it yet. + assert(physRegRecord->assignedInterval->getLocalVar(compiler)->lvIsParam); + return false; +} + +//------------------------------------------------------------------------ +// getSpillWeight: Get the weight associated with spilling the given register +// +// Arguments: +// physRegRecord - reg to spill +// +// Return Value: +// The weight associated with the location at which we will spill. +// +// Note: This helper is designed to be used only from allocateReg() and getDoubleSpillWeight() +// +float LinearScan::getSpillWeight(RegRecord* physRegRecord) +{ + assert(physRegRecord->assignedInterval != nullptr); + RefPosition* recentAssignedRef = physRegRecord->assignedInterval->recentRefPosition; + float weight = BB_ZERO_WEIGHT; + + // We shouldn't call this method if there is no recentAssignedRef. + assert(recentAssignedRef != nullptr); + // We shouldn't call this method if the register is active at this location. + assert(!isRefPositionActive(recentAssignedRef, currentLoc)); + weight = getWeight(recentAssignedRef); + return weight; } #ifdef TARGET_ARM @@ -3327,48 +3629,27 @@ bool LinearScan::canSpillReg(RegRecord* physRegRecord, // Arguments: // physRegRecord - reg to spill (must be a valid double register) // refLocation - Location of RefPosition where this register will be spilled -// recentAssignedRefWeight - Weight of recent assigned RefPosition which will be determined in this function // // Return Value: // True - if we can spill physRegRecord // False - otherwise // -// Notes: -// This helper is designed to be used only from allocateBusyReg() and canSpillDoubleReg(). -// The recentAssignedRefWeight is not updated if either register cannot be spilled. -// -bool LinearScan::canSpillDoubleReg(RegRecord* physRegRecord, - LsraLocation refLocation, - BasicBlock::weight_t* recentAssignedRefWeight) +bool LinearScan::canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation) { assert(genIsValidDoubleReg(physRegRecord->regNum)); - bool retVal = true; - BasicBlock::weight_t weight = BB_ZERO_WEIGHT; - BasicBlock::weight_t weight2 = BB_ZERO_WEIGHT; - - RegRecord* physRegRecord2 = findAnotherHalfRegRec(physRegRecord); + RegRecord* physRegRecord2 = getSecondHalfRegRec(physRegRecord); - if ((physRegRecord->assignedInterval != nullptr) && !canSpillReg(physRegRecord, refLocation, &weight)) + if ((physRegRecord->assignedInterval != nullptr) && !canSpillReg(physRegRecord, refLocation)) { return false; } - if (physRegRecord2->assignedInterval != nullptr) + if ((physRegRecord2->assignedInterval != nullptr) && !canSpillReg(physRegRecord2, refLocation)) { - if (!canSpillReg(physRegRecord2, refLocation, &weight2)) - { - return false; - } - if (weight2 > weight) - { - weight = weight2; - } + return false; } - *recentAssignedRefWeight = weight; return true; } -#endif -#ifdef TARGET_ARM //------------------------------------------------------------------------ // unassignDoublePhysReg: unassign a double register (pair) // @@ -3383,7 +3664,7 @@ void LinearScan::unassignDoublePhysReg(RegRecord* doubleRegRecord) assert(genIsValidDoubleReg(doubleRegRecord->regNum)); RegRecord* doubleRegRecordLo = doubleRegRecord; - RegRecord* doubleRegRecordHi = findAnotherHalfRegRec(doubleRegRecordLo); + RegRecord* doubleRegRecordHi = getSecondHalfRegRec(doubleRegRecordLo); // For a double register, we has following four cases. // Case 1: doubleRegRecLo is assigned to TYP_DOUBLE interval // Case 2: doubleRegRecLo and doubleRegRecHi are assigned to different TYP_FLOAT intervals @@ -3442,86 +3723,6 @@ bool LinearScan::isRefPositionActive(RefPosition* refPosition, LsraLocation refL ((refPosition->nodeLocation + 1 == refLocation) && refPosition->delayRegFree)); } -//---------------------------------------------------------------------------------------- -// isRegInUse: Test whether regRec is being used at the refPosition -// -// Arguments: -// regRec - A register to be tested -// refPosition - RefPosition where regRec is tested -// -// Return Value: -// True - if regRec is being used -// False - otherwise -// -// Notes: -// This helper is designed to be used only from allocateBusyReg(), where: -// - This register was *not* found when looking for a free register, and -// - The caller must have already checked for the case where 'refPosition' is a fixed ref -// (asserted at the beginning of this method). -// -bool LinearScan::isRegInUse(RegRecord* regRec, RefPosition* refPosition) -{ - // We shouldn't reach this check if 'refPosition' is a FixedReg of this register. - assert(!refPosition->isFixedRefOfReg(regRec->regNum)); - Interval* assignedInterval = regRec->assignedInterval; - if (assignedInterval != nullptr) - { - if (!assignedInterval->isActive) - { - // This can only happen if we have a recentRefPosition active at this location that hasn't yet been freed. - CLANG_FORMAT_COMMENT_ANCHOR; - - if (isRefPositionActive(assignedInterval->recentRefPosition, refPosition->nodeLocation)) - { - return true; - } - else - { -#ifdef TARGET_ARM - // In the case of TYP_DOUBLE, we may have the case where 'assignedInterval' is inactive, - // but the other half register is active. If so, it must be have an active recentRefPosition, - // as above. - if (refPosition->getInterval()->registerType == TYP_DOUBLE) - { - RegRecord* otherHalfRegRec = findAnotherHalfRegRec(regRec); - if (!otherHalfRegRec->assignedInterval->isActive) - { - if (isRefPositionActive(otherHalfRegRec->assignedInterval->recentRefPosition, - refPosition->nodeLocation)) - { - return true; - } - else - { - assert(!"Unexpected inactive assigned interval in isRegInUse"); - return true; - } - } - } - else -#endif - { - assert(!"Unexpected inactive assigned interval in isRegInUse"); - return true; - } - } - } - RefPosition* nextAssignedRef = assignedInterval->getNextRefPosition(); - - // We should never spill a register that's occupied by an Interval with its next use at the current - // location. - // Normally this won't occur (unless we actually had more uses in a single node than there are registers), - // because we'll always find something with a later nextLocation, but it can happen in stress when - // we have LSRA_SELECT_NEAREST. - if ((nextAssignedRef != nullptr) && isRefPositionActive(nextAssignedRef, refPosition->nodeLocation) && - !nextAssignedRef->RegOptional()) - { - return true; - } - } - return false; -} - //------------------------------------------------------------------------ // isSpillCandidate: Determine if a register is a spill candidate for a given RefPosition. // @@ -3529,406 +3730,43 @@ bool LinearScan::isRegInUse(RegRecord* regRec, RefPosition* refPosition) // current The interval for the current allocation // refPosition The RefPosition of the current Interval for which a register is being allocated // physRegRecord The RegRecord for the register we're considering for spill -// nextLocation An out (reference) parameter in which the next use location of the -// given RegRecord will be returned. // // Return Value: // True iff the given register can be spilled to accommodate the given RefPosition. // -bool LinearScan::isSpillCandidate(Interval* current, - RefPosition* refPosition, - RegRecord* physRegRecord, - LsraLocation& nextLocation) +bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, RegRecord* physRegRecord) { regMaskTP candidateBit = genRegMask(physRegRecord->regNum); LsraLocation refLocation = refPosition->nodeLocation; - if (physRegRecord->isBusyUntilNextKill) - { - return false; - } - Interval* assignedInterval = physRegRecord->assignedInterval; - if (assignedInterval != nullptr) - { - nextLocation = assignedInterval->getNextRefLocation(); - } + // We shouldn't be calling this if we haven't already determined that the register is not + // busy until the next kill. + assert(!isRegBusy(physRegRecord->regNum, current->registerType)); + // We should already have determined that the register isn't actively in use. + assert(!isRegInUse(physRegRecord->regNum, current->registerType)); + // We shouldn't be calling this if 'refPosition' is a fixed reference to this register. + assert(!refPosition->isFixedRefOfRegMask(candidateBit)); + // We shouldn't be calling this if there is a fixed reference at the same location + // (and it's not due to this reference), as checked above. + assert(!conflictingFixedRegReference(physRegRecord->regNum, refPosition)); + + bool canSpill; #ifdef TARGET_ARM - RegRecord* physRegRecord2 = nullptr; - Interval* assignedInterval2 = nullptr; - - // For ARM32, a double occupies a consecutive even/odd pair of float registers. if (current->registerType == TYP_DOUBLE) { - assert(genIsValidDoubleReg(physRegRecord->regNum)); - physRegRecord2 = findAnotherHalfRegRec(physRegRecord); - if (physRegRecord2->isBusyUntilNextKill) - { - return false; - } - assignedInterval2 = physRegRecord2->assignedInterval; - if ((assignedInterval2 != nullptr) && (assignedInterval2->getNextRefLocation() > nextLocation)) - { - nextLocation = assignedInterval2->getNextRefLocation(); - } - } -#endif - - // If there is a fixed reference at the same location (and it's not due to this reference), - // don't use it. - if (physRegRecord->conflictingFixedRegReference(refPosition)) - { - return false; - } - - if (refPosition->isFixedRefOfRegMask(candidateBit)) - { - // Either: - // - there is a fixed reference due to this node, OR - // - or there is a fixed use fed by a def at this node, OR - // - or we have restricted the set of registers for stress. - // In any case, we must use this register as it's the only candidate - // TODO-CQ: At the time we allocate a register to a fixed-reg def, if it's not going - // to remain live until the use, we should set the candidates to allRegs(regType) - // to avoid a spill - codegen can then insert the copy. - // If this is marked as allocateIfProfitable, the caller will compare the weights - // of this RefPosition and the RefPosition to which it is currently assigned. - assert(refPosition->isFixedRegRef || - (refPosition->nextRefPosition != nullptr && refPosition->nextRefPosition->isFixedRegRef) || - candidatesAreStressLimited()); - return true; + canSpill = canSpillDoubleReg(physRegRecord, refLocation); } - - // If this register is not assigned to an interval, either - // - it has a FixedReg reference at the current location that is not this reference, OR - // - this is the special case of a fixed loReg, where this interval has a use at the same location - // In either case, we cannot use it - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef TARGET_ARM - if (assignedInterval == nullptr && assignedInterval2 == nullptr) -#else - if (assignedInterval == nullptr) -#endif + else +#endif // TARGET_ARM { - RefPosition* nextPhysRegPosition = physRegRecord->getNextRefPosition(); - assert((nextPhysRegPosition != nullptr) && (nextPhysRegPosition->nodeLocation == refLocation) && - (candidateBit != refPosition->registerAssignment)); - return false; + canSpill = canSpillReg(physRegRecord, refLocation); } - - if (isRegInUse(physRegRecord, refPosition)) + if (!canSpill) { return false; } - -#ifdef TARGET_ARM - if (current->registerType == TYP_DOUBLE) - { - if (isRegInUse(physRegRecord2, refPosition)) - { - return false; - } - } -#endif return true; } -//------------------------------------------------------------------------ -// allocateBusyReg: Find a busy register that satisfies the requirements for refPosition, -// and that can be spilled. -// -// Arguments: -// current The interval for the current allocation -// refPosition The RefPosition of the current Interval for which a register is being allocated -// allocateIfProfitable If true, a reg may not be allocated if all other ref positions currently -// occupying registers are more important than the 'refPosition'. -// -// Return Value: -// The regNumber allocated to the RefPositon. Returns REG_NA if no free register is found. -// -// Note: Currently this routine uses weight and farthest distance of next reference -// to select a ref position for spilling. -// a) if allocateIfProfitable = false -// The ref position chosen for spilling will be the lowest weight -// of all and if there is is more than one ref position with the -// same lowest weight, among them choses the one with farthest -// distance to its next reference. -// -// b) if allocateIfProfitable = true -// The ref position chosen for spilling will not only be lowest weight -// of all but also has a weight lower than 'refPosition'. If there is -// no such ref position, reg will not be allocated. -// -regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable) -{ - regNumber foundReg = REG_NA; - - RegisterType regType = getRegisterType(current, refPosition); - regMaskTP candidates = refPosition->registerAssignment; - regMaskTP preferences = (current->registerPreferences & candidates); - if (preferences == RBM_NONE) - { - preferences = candidates; - } - if (candidates == RBM_NONE) - { - // This assumes only integer and floating point register types - // if we target a processor with additional register types, - // this would have to change - candidates = allRegs(regType); - } - -#ifdef DEBUG - candidates = stressLimitRegs(refPosition, candidates); -#endif // DEBUG - - // TODO-CQ: Determine whether/how to take preferences into account in addition to - // prefering the one with the furthest ref position when considering - // a candidate to spill - RegRecord* farthestRefPhysRegRecord = nullptr; -#ifdef TARGET_ARM - RegRecord* farthestRefPhysRegRecord2 = nullptr; -#endif - LsraLocation farthestLocation = MinLocation; - LsraLocation refLocation = refPosition->nodeLocation; - BasicBlock::weight_t farthestRefPosWeight; - if (allocateIfProfitable) - { - // If allocating a reg is optional, we will consider those ref positions - // whose weight is less than 'refPosition' for spilling. - farthestRefPosWeight = getWeight(refPosition); - } - else - { - // If allocating a reg is a must, we start off with max weight so - // that the first spill candidate will be selected based on - // farthest distance alone. Since we start off with farthestLocation - // initialized to MinLocation, the first available ref position - // will be selected as spill candidate and its weight as the - // fathestRefPosWeight. - farthestRefPosWeight = FloatingPointUtils::infinite_float(); - } - - for (regNumber regNum : Registers(regType)) - { - regMaskTP candidateBit = genRegMask(regNum); - if (!(candidates & candidateBit)) - { - continue; - } - RegRecord* physRegRecord = getRegisterRecord(regNum); - RegRecord* physRegRecord2 = nullptr; // only used for TARGET_ARM - LsraLocation nextLocation = MinLocation; - LsraLocation physRegNextLocation; - if (!isSpillCandidate(current, refPosition, physRegRecord, nextLocation)) - { - assert(candidates != candidateBit); - continue; - } - - // We've passed the preliminary checks for a spill candidate. - // Now, if we have a recentAssignedRef, check that it is going to be OK to spill it. - Interval* assignedInterval = physRegRecord->assignedInterval; - BasicBlock::weight_t recentAssignedRefWeight = BB_ZERO_WEIGHT; - RefPosition* recentAssignedRef = nullptr; - RefPosition* recentAssignedRef2 = nullptr; -#ifdef TARGET_ARM - if (current->registerType == TYP_DOUBLE) - { - recentAssignedRef = (assignedInterval == nullptr) ? nullptr : assignedInterval->recentRefPosition; - physRegRecord2 = findAnotherHalfRegRec(physRegRecord); - Interval* assignedInterval2 = physRegRecord2->assignedInterval; - recentAssignedRef2 = (assignedInterval2 == nullptr) ? nullptr : assignedInterval2->recentRefPosition; - if (!canSpillDoubleReg(physRegRecord, refLocation, &recentAssignedRefWeight)) - { - continue; - } - } - else -#endif - { - recentAssignedRef = assignedInterval->recentRefPosition; - if (!canSpillReg(physRegRecord, refLocation, &recentAssignedRefWeight)) - { - continue; - } - } - if (recentAssignedRefWeight > farthestRefPosWeight) - { - continue; - } - - physRegNextLocation = physRegRecord->getNextRefLocation(); - if (nextLocation > physRegNextLocation) - { - nextLocation = physRegNextLocation; - } - - bool isBetterLocation; - -#ifdef DEBUG - if (doSelectNearest() && farthestRefPhysRegRecord != nullptr) - { - isBetterLocation = (nextLocation <= farthestLocation); - } - else -#endif - // This if-stmt is associated with the above else - if (recentAssignedRefWeight < farthestRefPosWeight) - { - isBetterLocation = true; - } - else - { - // This would mean the weight of spill ref position we found so far is equal - // to the weight of the ref position that is being evaluated. In this case - // we prefer to spill ref position whose distance to its next reference is - // the farthest. - assert(recentAssignedRefWeight == farthestRefPosWeight); - - // If allocateIfProfitable=true, the first spill candidate selected - // will be based on weight alone. After we have found a spill - // candidate whose weight is less than the 'refPosition', we will - // consider farthest distance when there is a tie in weights. - // This is to ensure that we don't spill a ref position whose - // weight is equal to weight of 'refPosition'. - if (allocateIfProfitable && farthestRefPhysRegRecord == nullptr) - { - isBetterLocation = false; - } - else - { - isBetterLocation = (nextLocation > farthestLocation); - - if (nextLocation > farthestLocation) - { - isBetterLocation = true; - } - else if (nextLocation == farthestLocation) - { - // Both weight and distance are equal. - // Prefer that ref position which is marked both reload and - // allocate if profitable. These ref positions don't need - // need to be spilled as they are already in memory and - // codegen considers them as contained memory operands. - CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef TARGET_ARM - // TODO-CQ-ARM: Just conservatively "and" two conditions. We may implement a better condition later. - isBetterLocation = true; - if (recentAssignedRef != nullptr) - isBetterLocation &= (recentAssignedRef->reload && recentAssignedRef->RegOptional()); - - if (recentAssignedRef2 != nullptr) - isBetterLocation &= (recentAssignedRef2->reload && recentAssignedRef2->RegOptional()); -#else - isBetterLocation = - (recentAssignedRef != nullptr) && recentAssignedRef->reload && recentAssignedRef->RegOptional(); -#endif - } - else - { - isBetterLocation = false; - } - } - } - - if (isBetterLocation) - { - farthestLocation = nextLocation; - farthestRefPhysRegRecord = physRegRecord; -#ifdef TARGET_ARM - farthestRefPhysRegRecord2 = physRegRecord2; -#endif - farthestRefPosWeight = recentAssignedRefWeight; - } - } - -#if DEBUG - if (allocateIfProfitable) - { - // There may not be a spill candidate or if one is found - // its weight must be less than the weight of 'refPosition' - assert((farthestRefPhysRegRecord == nullptr) || (farthestRefPosWeight < getWeight(refPosition))); - } - else - { - // Must have found a spill candidate. - assert(farthestRefPhysRegRecord != nullptr); - - if (farthestLocation == refLocation) - { - // This must be a RefPosition that is constrained to use a single register, either directly, - // or at the use, or by stress. - bool isConstrained = (refPosition->isFixedRegRef || (refPosition->nextRefPosition != nullptr && - refPosition->nextRefPosition->isFixedRegRef) || - candidatesAreStressLimited()); - if (!isConstrained) - { -#ifdef TARGET_ARM - Interval* assignedInterval = - (farthestRefPhysRegRecord == nullptr) ? nullptr : farthestRefPhysRegRecord->assignedInterval; - Interval* assignedInterval2 = - (farthestRefPhysRegRecord2 == nullptr) ? nullptr : farthestRefPhysRegRecord2->assignedInterval; - RefPosition* nextRefPosition = - (assignedInterval == nullptr) ? nullptr : assignedInterval->getNextRefPosition(); - RefPosition* nextRefPosition2 = - (assignedInterval2 == nullptr) ? nullptr : assignedInterval2->getNextRefPosition(); - if (nextRefPosition != nullptr) - { - if (nextRefPosition2 != nullptr) - { - assert(nextRefPosition->RegOptional() || nextRefPosition2->RegOptional()); - } - else - { - assert(nextRefPosition->RegOptional()); - } - } - else - { - assert(nextRefPosition2 != nullptr && nextRefPosition2->RegOptional()); - } -#else // !TARGET_ARM - Interval* assignedInterval = farthestRefPhysRegRecord->assignedInterval; - RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); - assert(nextRefPosition->RegOptional()); -#endif // !TARGET_ARM - } - } - else - { - assert(farthestLocation > refLocation); - } - } -#endif // DEBUG - - if (farthestRefPhysRegRecord != nullptr) - { - foundReg = farthestRefPhysRegRecord->regNum; - -#ifdef TARGET_ARM - if (current->registerType == TYP_DOUBLE) - { - assert(genIsValidDoubleReg(foundReg)); - unassignDoublePhysReg(farthestRefPhysRegRecord); - } - else -#endif - { - unassignPhysReg(farthestRefPhysRegRecord, farthestRefPhysRegRecord->assignedInterval->recentRefPosition); - } - - assignPhysReg(farthestRefPhysRegRecord, current); - refPosition->registerAssignment = genRegMask(foundReg); - } - else - { - foundReg = REG_NA; - refPosition->registerAssignment = RBM_NONE; - } - - return foundReg; -} - // Grab a register to use to copy and then immediately use. // This is called only for localVar intervals that already have a register // assignment that is not compatible with the current RefPosition. @@ -3959,11 +3797,11 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) assert(oldRegRecord->regNum == oldPhysReg); currentInterval->isActive = false; - regNumber allocatedReg = tryAllocateFreeReg(currentInterval, refPosition); - if (allocatedReg == REG_NA) - { - allocatedReg = allocateBusyReg(currentInterval, refPosition, false); - } + // We *must* allocate a register, and it will be a copyReg. Set that field now, so that + // refPosition->RegOptional() will return false. + refPosition->copyReg = true; + + regNumber allocatedReg = allocateReg(currentInterval, refPosition); // Now restore the old info currentInterval->relatedInterval = savedRelatedInterval; @@ -3971,14 +3809,11 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) currentInterval->assignedReg = oldRegRecord; currentInterval->isActive = true; - refPosition->copyReg = true; return allocatedReg; } //------------------------------------------------------------------------ -// isAssigned: This is the function to check if the given RegRecord has an assignedInterval -// regardless of lastLocation. -// So it would be call isAssigned() with Maxlocation value. +// isAssigned: This is the function to check if the given RegRecord has an assignedInterval. // // Arguments: // regRec - The RegRecord to check that it is assigned. @@ -3987,63 +3822,34 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) // Return Value: // Returns true if the given RegRecord has an assignedInterval. // -// Notes: -// There is the case to check if the RegRecord has an assignedInterval regardless of Lastlocation. -// bool LinearScan::isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType)) { - return isAssigned(regRec, MaxLocation ARM_ARG(newRegType)); -} - -//------------------------------------------------------------------------ -// isAssigned: Check whether the given RegRecord has an assignedInterval -// that has a reference prior to the given location. -// -// Arguments: -// regRec - The RegRecord of interest -// lastLocation - The LsraLocation up to which we want to check -// newRegType - The `RegisterType` of interval we want to check -// (this is for the purposes of checking the other half of a TYP_DOUBLE RegRecord) -// -// Return value: -// Returns true if the given RegRecord (and its other half, if TYP_DOUBLE) has an assignedInterval -// that is referenced prior to the given location -// -// Notes: -// The register is not considered to be assigned if it has no assignedInterval, or that Interval's -// next reference is beyond lastLocation -// -bool LinearScan::isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType)) -{ - Interval* assignedInterval = regRec->assignedInterval; - - if ((assignedInterval == nullptr) || assignedInterval->getNextRefLocation() > lastLocation) + if (regRec->assignedInterval != nullptr) { + return true; + } #ifdef TARGET_ARM - if (newRegType == TYP_DOUBLE) - { - RegRecord* anotherRegRec = findAnotherHalfRegRec(regRec); + if (newRegType == TYP_DOUBLE) + { + RegRecord* otherRegRecord = getSecondHalfRegRec(regRec); - if ((anotherRegRec->assignedInterval == nullptr) || - (anotherRegRec->assignedInterval->getNextRefLocation() > lastLocation)) - { - // In case the newRegType is a double register, - // the score would be set UNASSIGNED if another register is also not set. - return false; - } - } - else -#endif + if (otherRegRecord->assignedInterval != nullptr) { - return false; + return true; } } - - return true; +#endif + return false; } -// Check if the interval is already assigned and if it is then unassign the physical record -// then set the assignedInterval to 'interval' +//------------------------------------------------------------------------ +// checkAndAssignInterval: Check if the interval is already assigned and +// if it is then unassign the physical record +// and set the assignedInterval to 'interval' +// +// Arguments: +// regRec - The RegRecord of interest +// interval - The Interval that we're going to assign to 'regRec' // void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval) { @@ -4094,8 +3900,8 @@ void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval) regMaskTP assignedRegMask = genRegMask(regRec->regNum); compiler->codeGen->regSet.rsSetRegsModified(assignedRegMask DEBUGARG(true)); - checkAndAssignInterval(regRec, interval); interval->assignedReg = regRec; + checkAndAssignInterval(regRec, interval); interval->physReg = regRec->regNum; interval->isActive = true; @@ -4163,6 +3969,15 @@ void LinearScan::setIntervalAsSpilled(Interval* interval) // Now we need to mark the local as spilled also, even if the lower half is never spilled, // as this will use the upper part of its home location. interval = interval->relatedInterval; + // We'll now mark this as spilled, so it changes the spillCost. + RefPosition* recentRefPos = interval->recentRefPosition; + if (!interval->isSpilled && interval->isActive && (recentRefPos != nullptr)) + { + VarSetOps::AddElemD(compiler, splitOrSpilledVars, interval->getVarIndex(compiler)); + interval->isSpilled = true; + regNumber reg = interval->physReg; + spillCost[reg] = getSpillWeight(getRegisterRecord(reg)); + } } #endif if (interval->isLocalVar) @@ -4337,7 +4152,7 @@ void LinearScan::unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegTy { if (newRegType == TYP_DOUBLE) { - anotherRegRec = findAnotherHalfRegRec(regRecToUnassign); + anotherRegRec = getSecondHalfRegRec(regRecToUnassign); } } #endif @@ -4379,7 +4194,8 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio regNumber thisRegNum = regRec->regNum; // Is assignedInterval actually still assigned to this register? - bool intervalIsAssigned = (assignedInterval->physReg == thisRegNum); + bool intervalIsAssigned = (assignedInterval->physReg == thisRegNum); + regNumber regToUnassign = thisRegNum; #ifdef TARGET_ARM RegRecord* anotherRegRec = nullptr; @@ -4388,28 +4204,42 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio if (assignedInterval->registerType == TYP_DOUBLE) { assert(isFloatRegType(regRec->registerType)); + RegRecord* doubleRegRec; + if (genIsValidDoubleReg(thisRegNum)) + { + anotherRegRec = getSecondHalfRegRec(regRec); + doubleRegRec = regRec; + } + else + { + regToUnassign = REG_PREV(thisRegNum); + anotherRegRec = getRegisterRecord(regToUnassign); + doubleRegRec = anotherRegRec; + } - anotherRegRec = findAnotherHalfRegRec(regRec); - - // Both two RegRecords should have been assigned to the same interval. + // Both RegRecords should have been assigned to the same interval. assert(assignedInterval == anotherRegRec->assignedInterval); if (!intervalIsAssigned && (assignedInterval->physReg == anotherRegRec->regNum)) { intervalIsAssigned = true; } - } -#endif // TARGET_ARM - checkAndClearInterval(regRec, spillRefPosition); + clearNextIntervalRef(regToUnassign, TYP_DOUBLE); + clearSpillCost(regToUnassign, TYP_DOUBLE); + checkAndClearInterval(doubleRegRec, spillRefPosition); -#ifdef TARGET_ARM - if (assignedInterval->registerType == TYP_DOUBLE) - { - // Both two RegRecords should have been unassigned together. + // Both RegRecords should have been unassigned together. assert(regRec->assignedInterval == nullptr); assert(anotherRegRec->assignedInterval == nullptr); } + else #endif // TARGET_ARM + { + clearNextIntervalRef(thisRegNum, assignedInterval->registerType); + clearSpillCost(thisRegNum, assignedInterval->registerType); + checkAndClearInterval(regRec, spillRefPosition); + } + makeRegAvailable(regToUnassign, assignedInterval->registerType); RefPosition* nextRefPosition = nullptr; if (spillRefPosition != nullptr) @@ -4505,6 +4335,14 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio { regRec->assignedInterval = regRec->previousInterval; regRec->previousInterval = nullptr; + if (regRec->assignedInterval->physReg != thisRegNum) + { + clearNextIntervalRef(thisRegNum, regRec->assignedInterval->registerType); + } + else + { + updateNextIntervalRef(thisRegNum, regRec->assignedInterval); + } #ifdef TARGET_ARM // Note: @@ -4588,6 +4426,7 @@ void LinearScan::spillGCRefs(RefPosition* killRefPosition) { INDEBUG(killedRegs = true); unassignPhysReg(regRecord, assignedInterval->recentRefPosition); + makeRegAvailable(nextReg, assignedInterval->registerType); } } INDEBUG(dumpLsraAllocationEvent(killedRegs ? LSRA_EVENT_DONE_KILL_GC_REFS : LSRA_EVENT_NO_GC_KILLS, nullptr, REG_NA, @@ -4876,11 +4715,13 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) if (!enregisterLocalVars) { // Just clear any constant registers and return. + resetAvailableRegs(); for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) { RegRecord* physRegRecord = getRegisterRecord(reg); Interval* assignedInterval = physRegRecord->assignedInterval; - + clearNextIntervalRef(reg, physRegRecord->registerType); + clearSpillCost(reg, physRegRecord->registerType); if (assignedInterval != nullptr) { assert(assignedInterval->isConstant); @@ -5024,7 +4865,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) assert(targetReg != REG_STK); assert(interval->assignedReg != nullptr && interval->assignedReg->regNum == targetReg && interval->assignedReg->assignedInterval == interval); - liveRegs |= genRegMask(targetReg); + liveRegs |= getRegMask(targetReg, interval->registerType); continue; } } @@ -5052,10 +4893,11 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) // Keep the register assignment - if another var has it, it will get unassigned. // Otherwise, resolution will fix it up later, and it will be more // likely to match other assignments this way. + targetReg = interval->physReg; interval->isActive = true; - liveRegs |= genRegMask(interval->physReg); - INDEBUG(inactiveRegs |= genRegMask(interval->physReg)); - setVarReg(inVarToRegMap, varIndex, interval->physReg); + liveRegs |= getRegMask(targetReg, interval->registerType); + INDEBUG(inactiveRegs |= genRegMask(targetReg)); + setVarReg(inVarToRegMap, varIndex, targetReg); } else { @@ -5065,7 +4907,12 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) if (targetReg != REG_STK) { RegRecord* targetRegRecord = getRegisterRecord(targetReg); - liveRegs |= genRegMask(targetReg); + liveRegs |= getRegMask(targetReg, interval->registerType); + if (!allocationPassComplete) + { + updateNextIntervalRef(targetReg, interval); + updateSpillCost(targetReg, interval); + } if (!interval->isActive) { interval->isActive = true; @@ -5083,7 +4930,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) (targetRegRecord->assignedInterval->registerType == TYP_FLOAT))) { assert(genIsValidDoubleReg(targetReg)); - unassignIntervalBlockStart(findAnotherHalfRegRec(targetRegRecord), + unassignIntervalBlockStart(getSecondHalfRegRec(targetRegRecord), allocationPassComplete ? nullptr : inVarToRegMap); } #endif // TARGET_ARM @@ -5098,13 +4945,19 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) } } - // Unassign any registers that are no longer live. + // Unassign any registers that are no longer live, and set register state, if allocating. + if (!allocationPassComplete) + { + resetRegState(); + setRegsInUse(liveRegs); + } for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) { + RegRecord* physRegRecord = getRegisterRecord(reg); if ((liveRegs & genRegMask(reg)) == 0) { - RegRecord* physRegRecord = getRegisterRecord(reg); - Interval* assignedInterval = physRegRecord->assignedInterval; + makeRegAvailable(reg, physRegRecord->registerType); + Interval* assignedInterval = physRegRecord->assignedInterval; if (assignedInterval != nullptr) { @@ -5142,6 +4995,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) // Skip next float register, because we already addressed a double register assert(genIsValidDoubleReg(reg)); reg = REG_NEXT(reg); + makeRegAvailable(reg, physRegRecord->registerType); } #endif // TARGET_ARM } @@ -5149,8 +5003,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) #ifdef TARGET_ARM else { - RegRecord* physRegRecord = getRegisterRecord(reg); - Interval* assignedInterval = physRegRecord->assignedInterval; + Interval* assignedInterval = physRegRecord->assignedInterval; if (assignedInterval != nullptr && assignedInterval->registerType == TYP_DOUBLE) { @@ -5227,20 +5080,33 @@ void LinearScan::dumpRefPositions(const char* str) } #endif // DEBUG -bool LinearScan::registerIsFree(regNumber regNum, RegisterType regType) +//------------------------------------------------------------------------ +// LinearScan::makeRegisterInactive: Make the interval currently assigned to +// a register inactive. +// +// Arguments: +// physRegRecord - the RegRecord for the register +// +// Return Value: +// None. +// +// Notes: +// It may be that the RegRecord has already been freed, e.g. due to a kill, +// or it may be that the register was a copyReg, so is not the assigned register +// of the Interval currently occupying the register, in which case this method has no effect. +// +void LinearScan::makeRegisterInactive(RegRecord* physRegRecord) { - RegRecord* physRegRecord = getRegisterRecord(regNum); - - bool isFree = physRegRecord->isFree(); - -#ifdef TARGET_ARM - if (isFree && regType == TYP_DOUBLE) + Interval* assignedInterval = physRegRecord->assignedInterval; + // It may have already been freed by a "Kill" + if ((assignedInterval != nullptr) && (assignedInterval->physReg == physRegRecord->regNum)) { - isFree = getSecondHalfRegRec(physRegRecord)->isFree(); + assignedInterval->isActive = false; + if (assignedInterval->isConstant) + { + clearNextIntervalRef(physRegRecord->regNum, assignedInterval->registerType); + } } -#endif // TARGET_ARM - - return isFree; } //------------------------------------------------------------------------ @@ -5263,31 +5129,36 @@ bool LinearScan::registerIsFree(regNumber regNum, RegisterType regType) // defs remain), it will remain assigned to the physRegRecord. However, since // it is marked inactive, the register will be available, albeit less desirable // to allocate. +// void LinearScan::freeRegister(RegRecord* physRegRecord) { Interval* assignedInterval = physRegRecord->assignedInterval; - // It may have already been freed by a "Kill" + makeRegAvailable(physRegRecord->regNum, physRegRecord->registerType); + clearSpillCost(physRegRecord->regNum, physRegRecord->registerType); + makeRegisterInactive(physRegRecord); + if (assignedInterval != nullptr) { - assignedInterval->isActive = false; - // If this is a constant node, that we may encounter again (e.g. constant), - // don't unassign it until we need the register. - if (!assignedInterval->isConstant) - { - RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); - // Unassign the register only if there are no more RefPositions, or the next - // one is a def. Note that the latter condition doesn't actually ensure that - // there aren't subsequent uses that could be reached by a def in the assigned - // register, but is merely a heuristic to avoid tying up the register (or using - // it when it's non-optimal). A better alternative would be to use SSA, so that - // we wouldn't unnecessarily link separate live ranges to the same register. - if (nextRefPosition == nullptr || RefTypeIsDef(nextRefPosition->refType)) - { + // TODO: Under the following conditions we should be just putting it in regsToMakeInactive + // not regsToFree. + // + // We don't unassign in the following conditions: + // - If this is a constant node, that we may encounter again, OR + // - If its recent RefPosition is not a last-use and its next RefPosition is non-null. + // - If there are no more RefPositions, or the next + // one is a def. Note that the latter condition doesn't actually ensure that + // there aren't subsequent uses that could be reached by a value in the assigned + // register, but is merely a heuristic to avoid tying up the register (or using + // it when it's non-optimal). A better alternative would be to use SSA, so that + // we wouldn't unnecessarily link separate live ranges to the same register. + // + RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); + if (!assignedInterval->isConstant && (nextRefPosition == nullptr || RefTypeIsDef(nextRefPosition->refType))) + { #ifdef TARGET_ARM - assert((assignedInterval->registerType != TYP_DOUBLE) || genIsValidDoubleReg(physRegRecord->regNum)); + assert((assignedInterval->registerType != TYP_DOUBLE) || genIsValidDoubleReg(physRegRecord->regNum)); #endif // TARGET_ARM - unassignPhysReg(physRegRecord, nullptr); - } + unassignPhysReg(physRegRecord, nullptr); } } } @@ -5306,11 +5177,20 @@ void LinearScan::freeRegisters(regMaskTP regsToFree) } INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FREE_REGS)); + makeRegsAvailable(regsToFree); while (regsToFree != RBM_NONE) { regMaskTP nextRegBit = genFindLowestBit(regsToFree); regsToFree &= ~nextRegBit; - regNumber nextReg = genRegNumFromMask(nextRegBit); + regNumber nextReg = genRegNumFromMask(nextRegBit); + RegRecord* regRecord = getRegisterRecord(nextReg); +#ifdef TARGET_ARM + if (regRecord->assignedInterval != nullptr && (regRecord->assignedInterval->registerType == TYP_DOUBLE)) + { + assert(genIsValidDoubleReg(nextReg)); + regsToFree &= ~(nextRegBit << 1); + } +#endif freeRegister(getRegisterRecord(nextReg)); } } @@ -5353,9 +5233,33 @@ void LinearScan::allocateRegisters() #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE } + resetRegState(); for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) { - getRegisterRecord(reg)->recentRefPosition = nullptr; + RegRecord* physRegRecord = getRegisterRecord(reg); + physRegRecord->recentRefPosition = nullptr; + updateNextFixedRef(physRegRecord, physRegRecord->firstRefPosition); + + // Is this an incoming arg register? (Note that we don't, currently, consider reassigning + // an incoming arg register as having spill cost.) + Interval* interval = physRegRecord->assignedInterval; + if (interval != nullptr) + { +#ifdef TARGET_ARM + if ((interval->registerType != TYP_DOUBLE) || genIsValidDoubleReg(reg)) +#endif // TARGET_ARM + { + updateNextIntervalRef(reg, interval); + updateSpillCost(reg, interval); + setRegInUse(reg, interval->registerType); + INDEBUG(registersToDump |= getRegMask(reg, interval->registerType)); + } + } + else + { + clearNextIntervalRef(reg, physRegRecord->registerType); + clearSpillCost(reg, physRegRecord->registerType); + } } #ifdef DEBUG @@ -5378,9 +5282,14 @@ void LinearScan::allocateRegisters() BasicBlock* currentBlock = nullptr; - LsraLocation prevLocation = MinLocation; - regMaskTP regsToFree = RBM_NONE; - regMaskTP delayRegsToFree = RBM_NONE; + LsraLocation prevLocation = MinLocation; + regMaskTP regsToFree = RBM_NONE; + regMaskTP delayRegsToFree = RBM_NONE; + regMaskTP regsToMakeInactive = RBM_NONE; + regMaskTP delayRegsToMakeInactive = RBM_NONE; + regMaskTP copyRegsToFree = RBM_NONE; + regsInUseThisLocation = RBM_NONE; + regsInUseNextLocation = RBM_NONE; // This is the most recent RefPosition for which a register was allocated // - currently only used for DEBUG but maintained in non-debug, for clarity of code @@ -5394,6 +5303,29 @@ void LinearScan::allocateRegisters() RefPosition* currentRefPosition = &refPositionIterator; RefPosition* nextRefPosition = currentRefPosition->nextRefPosition; + // TODO: Can we combine this with the freeing of registers below? It might + // mess with the dump, since this was previously being done before the call below + // to dumpRegRecords. + regMaskTP tempRegsToMakeInactive = (regsToMakeInactive | delayRegsToMakeInactive); + while (tempRegsToMakeInactive != RBM_NONE) + { + regMaskTP nextRegBit = genFindLowestBit(tempRegsToMakeInactive); + tempRegsToMakeInactive &= ~nextRegBit; + regNumber nextReg = genRegNumFromMask(nextRegBit); + RegRecord* regRecord = getRegisterRecord(nextReg); + clearSpillCost(regRecord->regNum, regRecord->registerType); + makeRegisterInactive(regRecord); + } + if (currentRefPosition->nodeLocation > prevLocation) + { + makeRegsAvailable(regsToMakeInactive); + // TODO: Clean this up. We need to make the delayRegs inactive as well, but don't want + // to mark them as free yet. + regsToMakeInactive |= delayRegsToMakeInactive; + regsToMakeInactive = delayRegsToMakeInactive; + delayRegsToMakeInactive = RBM_NONE; + } + #ifdef DEBUG // Set the activeRefPosition to null until we're done with any boundary handling. activeRefPosition = nullptr; @@ -5437,14 +5369,15 @@ void LinearScan::allocateRegisters() LsraLocation currentLocation = currentRefPosition->nodeLocation; - if ((regsToFree | delayRegsToFree) != RBM_NONE) + // Free at a new location. + if (currentLocation > prevLocation) { - // Free at a new location, or at a basic block boundary - if (refType == RefTypeBB) - { - assert(currentLocation > prevLocation); - } - if (currentLocation > prevLocation) + // CopyRegs are simply made available - we don't want to make the associated interval inactive. + makeRegsAvailable(copyRegsToFree); + copyRegsToFree = RBM_NONE; + regsInUseThisLocation = regsInUseNextLocation; + regsInUseNextLocation = RBM_NONE; + if ((regsToFree | delayRegsToFree) != RBM_NONE) { freeRegisters(regsToFree); if ((currentLocation > (prevLocation + 1)) && (delayRegsToFree != RBM_NONE)) @@ -5454,10 +5387,105 @@ void LinearScan::allocateRegisters() assert(!"Found a delayRegFree associated with Location with no reference"); // However, to be cautious for the Release build case, we will free them. freeRegisters(delayRegsToFree); - delayRegsToFree = RBM_NONE; + delayRegsToFree = RBM_NONE; + regsInUseThisLocation = RBM_NONE; } regsToFree = delayRegsToFree; delayRegsToFree = RBM_NONE; + +#ifdef DEBUG + // Validate the current state just after we've freed the registers. This ensures that any pending + // freed registers will have had their state updated to reflect the intervals they were holding. + for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + { + regMaskTP regMask = genRegMask(reg); + // If this isn't available or if it's still waiting to be freed (i.e. it was in + // delayRegsToFree and so now it's in regsToFree), then skip it. + if ((regMask & (availableIntRegs | availableFloatRegs) & ~regsToFree) == RBM_NONE) + { + continue; + } + RegRecord* physRegRecord = getRegisterRecord(reg); + Interval* assignedInterval = physRegRecord->assignedInterval; + if (assignedInterval != nullptr) + { + bool isAssignedReg = (assignedInterval->physReg == reg); + RefPosition* recentRefPosition = assignedInterval->recentRefPosition; + // If we have a copyReg or a moveReg, we might have assigned this register to an Interval, + // but that isn't considered its assignedReg. + if (recentRefPosition != nullptr) + { + if (recentRefPosition->refType == RefTypeExpUse) + { + // We don't update anything on these, as they're just placeholders to extend the + // lifetime. + continue; + } + // For copyReg or moveReg, we don't have anything further to assert. + if (recentRefPosition->copyReg || recentRefPosition->moveReg) + { + continue; + } + assert(assignedInterval->isConstant == isRegConstant(reg, assignedInterval->registerType)); + if (assignedInterval->isActive) + { + // If this is not the register most recently allocated, it must be from a copyReg, + // or it was placed there by the inVarToRegMap. In either case it must be a lclVar. + + if (!isAssignedToInterval(assignedInterval, physRegRecord)) + { + assert(assignedInterval->isLocalVar); + // We'd like to assert that this was either set by the inVarToRegMap, or by + // a copyReg, but we can't traverse backward to check for a copyReg, because + // we only have recentRefPosition, and there may be a previous RefPosition + // at the same Location with a copyReg. + } + if (isAssignedReg) + { + assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); + assert(!isRegAvailable(reg, assignedInterval->registerType)); + assert((recentRefPosition == nullptr) || + (spillCost[reg] == getSpillWeight(physRegRecord))); + } + else + { + assert((nextIntervalRef[reg] == MaxLocation) || + isRegBusy(reg, assignedInterval->registerType)); + } + } + else + { + if ((assignedInterval->physReg == reg) && !assignedInterval->isConstant) + { + assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); + } + else + { + assert(nextIntervalRef[reg] == MaxLocation); + assert(isRegAvailable(reg, assignedInterval->registerType)); + assert(spillCost[reg] == 0); + } + } + } + } + else + { + assert(isRegAvailable(reg, physRegRecord->registerType)); + assert(!isRegConstant(reg, physRegRecord->registerType)); + assert(nextIntervalRef[reg] == MaxLocation); + assert(spillCost[reg] == 0); + } + LsraLocation thisNextFixedRef = physRegRecord->getNextRefLocation(); + assert(nextFixedRef[reg] == thisNextFixedRef); +#ifdef TARGET_ARM + // If this is occupied by a double interval, skip the corresponding float reg. + if ((assignedInterval != nullptr) && (assignedInterval->registerType == TYP_DOUBLE)) + { + reg = REG_NEXT(reg); + } +#endif + } +#endif // DEBUG } } prevLocation = currentLocation; @@ -5495,9 +5523,11 @@ void LinearScan::allocateRegisters() { // Free any delayed regs (now in regsToFree) before processing the block boundary freeRegisters(regsToFree); - regsToFree = RBM_NONE; - handledBlockEnd = true; - curBBStartLocation = currentRefPosition->nodeLocation; + regsToFree = RBM_NONE; + regsInUseThisLocation = RBM_NONE; + regsInUseNextLocation = RBM_NONE; + handledBlockEnd = true; + curBBStartLocation = currentRefPosition->nodeLocation; if (currentBlock == nullptr) { currentBlock = startBlockSequence(); @@ -5523,42 +5553,50 @@ void LinearScan::allocateRegisters() continue; } - // If this is a FixedReg, disassociate any inactive constant interval from this register. - // Otherwise, do nothing. - if (refType == RefTypeFixedReg) + if (currentRefPosition->isPhysRegRef) { RegRecord* regRecord = currentRefPosition->getReg(); Interval* assignedInterval = regRecord->assignedInterval; - if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) + updateNextFixedRef(regRecord, currentRefPosition->nextRefPosition); + + // If this is a FixedReg, disassociate any inactive constant interval from this register. + // Otherwise, do nothing. + if (refType == RefTypeFixedReg) { - regRecord->assignedInterval = nullptr; + if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) + { + clearConstantReg(regRecord->regNum, assignedInterval->registerType); + regRecord->assignedInterval = nullptr; + spillCost[regRecord->regNum] = 0; #ifdef TARGET_ARM - // Update overlapping floating point register for TYP_DOUBLE - if (assignedInterval->registerType == TYP_DOUBLE) - { - regRecord = findAnotherHalfRegRec(regRecord); - assert(regRecord->assignedInterval == assignedInterval); - regRecord->assignedInterval = nullptr; + // Update overlapping floating point register for TYP_DOUBLE + if (assignedInterval->registerType == TYP_DOUBLE) + { + RegRecord* otherRegRecord = findAnotherHalfRegRec(regRecord); + assert(otherRegRecord->assignedInterval == assignedInterval); + otherRegRecord->assignedInterval = nullptr; + spillCost[otherRegRecord->regNum] = 0; + } +#endif // TARGET_ARM } -#endif + regsInUseThisLocation |= currentRefPosition->registerAssignment; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition->assignedReg())); + continue; } - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition->assignedReg())); - continue; - } - if (refType == RefTypeKill) - { - RegRecord* currentReg = currentRefPosition->getReg(); - Interval* assignedInterval = currentReg->assignedInterval; - - if (assignedInterval != nullptr) + if (refType == RefTypeKill) { - unassignPhysReg(currentReg, assignedInterval->recentRefPosition); + if (assignedInterval != nullptr) + { + unassignPhysReg(regRecord, assignedInterval->recentRefPosition); + clearConstantReg(regRecord->regNum, assignedInterval->registerType); + makeRegAvailable(regRecord->regNum, assignedInterval->registerType); + } + clearRegBusyUntilKill(regRecord->regNum); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum)); + continue; } - currentReg->isBusyUntilNextKill = false; - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, currentReg->regNum)); - continue; } // If this is an exposed use, do nothing - this is merely a placeholder to attempt to @@ -5568,6 +5606,11 @@ void LinearScan::allocateRegisters() if (refType == RefTypeExpUse) { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_EXP_USE)); + currentInterval = currentRefPosition->getInterval(); + if (currentInterval->physReg != REG_NA) + { + updateNextIntervalRef(currentInterval->physReg, currentInterval); + } continue; } @@ -5620,6 +5663,12 @@ void LinearScan::allocateRegisters() INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval)); didDump = true; setIntervalAsSpilled(currentInterval); + if (assignedRegister != REG_NA) + { + clearNextIntervalRef(assignedRegister, currentInterval->registerType); + clearSpillCost(assignedRegister, currentInterval->registerType); + makeRegAvailable(assignedRegister, currentInterval->registerType); + } } } #ifdef FEATURE_SIMD @@ -5704,7 +5753,7 @@ void LinearScan::allocateRegisters() // as special. if (srcInterval->isActive && genRegMask(srcInterval->physReg) == currentRefPosition->registerAssignment && - currentInterval->getNextRefLocation() == physRegRecord->getNextRefLocation()) + currentInterval->getNextRefLocation() == nextFixedRef[srcInterval->physReg]) { assert(physRegRecord->regNum == srcInterval->physReg); @@ -5720,7 +5769,7 @@ void LinearScan::allocateRegisters() // special putarg_reg doesn't get spilled and re-allocated prior to // its use at the call node. This is ensured by marking physical reg // record as busy until next kill. - physRegRecord->isBusyUntilNextKill = true; + setRegBusyUntilKill(srcInterval->physReg, srcInterval->registerType); } else { @@ -5762,7 +5811,10 @@ void LinearScan::allocateRegisters() else { currentInterval->isActive = true; + setRegInUse(assignedRegister, currentInterval->registerType); + updateSpillCost(assignedRegister, currentInterval); } + updateNextIntervalRef(assignedRegister, currentInterval); } assert(currentInterval->assignedReg != nullptr && currentInterval->assignedReg->regNum == assignedRegister && @@ -5792,18 +5844,20 @@ void LinearScan::allocateRegisters() // Will the assigned register cover the lifetime? If not, does it at least // meet the preferences for the next RefPosition? - RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg); - RefPosition* nextPhysRegRefPos = physRegRecord->getNextRefPosition(); - if (nextPhysRegRefPos != nullptr && - nextPhysRegRefPos->nodeLocation <= currentInterval->lastRefPosition->nodeLocation) + LsraLocation nextPhysRegLocation = nextFixedRef[assignedRegister]; + if (nextPhysRegLocation <= currentInterval->lastRefPosition->nodeLocation) { // Check to see if the existing assignment matches the preferences (e.g. callee save registers) // and ensure that the next use of this localVar does not occur after the nextPhysRegRefPos // There must be a next RefPosition, because we know that the Interval extends beyond the // nextPhysRegRefPos. assert(nextRefPosition != nullptr); - if (!matchesPreferences || nextPhysRegRefPos->nodeLocation < nextRefPosition->nodeLocation || - physRegRecord->conflictingFixedRegReference(nextRefPosition)) + if (!matchesPreferences || nextPhysRegLocation < nextRefPosition->nodeLocation) + { + keepAssignment = false; + } + else if ((nextRefPosition->registerAssignment != assignedRegBit) && + (nextPhysRegLocation <= nextRefPosition->getRefEndLocation())) { keepAssignment = false; } @@ -5821,7 +5875,9 @@ void LinearScan::allocateRegisters() if (keepAssignment == false) { + RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg); currentRefPosition->registerAssignment = allRegs(currentInterval->registerType); + currentRefPosition->isFixedRegRef = false; unassignPhysRegNoSpill(physRegRecord); // If the preferences are currently set to just this register, reset them to allRegs @@ -5847,18 +5903,22 @@ void LinearScan::allocateRegisters() if (assignedRegister != REG_NA) { RegRecord* physRegRecord = getRegisterRecord(assignedRegister); - - // If there is a conflicting fixed reference, insert a copy. - if (physRegRecord->conflictingFixedRegReference(currentRefPosition)) + assert((assignedRegBit == currentRefPosition->registerAssignment) || + (physRegRecord->assignedInterval == currentInterval) || + !isRegInUse(assignedRegister, currentInterval->registerType)); + if (conflictingFixedRegReference(assignedRegister, currentRefPosition)) { // We may have already reassigned the register to the conflicting reference. // If not, we need to unassign this interval. if (physRegRecord->assignedInterval == currentInterval) { unassignPhysRegNoSpill(physRegRecord); + physRegRecord->assignedInterval = nullptr; + clearConstantReg(assignedRegister, currentInterval->registerType); } currentRefPosition->moveReg = true; assignedRegister = REG_NA; + currentRefPosition->registerAssignment &= ~assignedRegBit; setIntervalAsSplit(currentInterval); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister)); } @@ -5907,37 +5967,52 @@ void LinearScan::allocateRegisters() } } } + regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); + regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); + regsInUseThisLocation |= copyRegMask | assignedRegMask; if (currentRefPosition->lastUse) - { - assert(currentRefPosition->isIntervalRef()); - unassign = true; - } - if (unassign) { if (currentRefPosition->delayRegFree) { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval, assignedRegister)); - delayRegsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment); + delayRegsToFree |= copyRegMask | assignedRegMask; + regsInUseNextLocation |= copyRegMask | assignedRegMask; } else { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister)); - regsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment); + regsToFree |= copyRegMask | assignedRegMask; + } + } + else + { + copyRegsToFree |= copyRegMask; + if (currentRefPosition->delayRegFree) + { + regsInUseNextLocation |= copyRegMask | assignedRegMask; } } + // If this is a tree temp (non-localVar) interval, we will need an explicit move. + // Note: In theory a moveReg should cause the Interval to now have the new reg as its + // assigned register. However, that's not currently how this works. + // If we ever actually move lclVar intervals instead of copying, this will need to change. if (!currentInterval->isLocalVar) { currentRefPosition->moveReg = true; currentRefPosition->copyReg = false; } + clearNextIntervalRef(copyReg, currentInterval->registerType); + clearSpillCost(copyReg, currentInterval->registerType); + updateNextIntervalRef(assignedRegister, currentInterval); + updateSpillCost(assignedRegister, currentInterval); continue; } else { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister)); - regsToFree |= genRegMask(assignedRegister); + regsToFree |= getRegMask(assignedRegister, currentInterval->registerType); // We want a new register, but we don't want this to be considered a spill. assignedRegister = REG_NA; if (physRegRecord->assignedInterval == currentInterval) @@ -5950,21 +6025,19 @@ void LinearScan::allocateRegisters() if (assignedRegister == REG_NA) { - bool allocateReg = true; - if (currentRefPosition->RegOptional()) { - // We can avoid allocating a register if it is a the last use requiring a reload. + // We can avoid allocating a register if it is a last use requiring a reload. if (currentRefPosition->lastUse && currentRefPosition->reload) { - allocateReg = false; + allocate = false; } else if (currentInterval->isWriteThru) { // Don't allocate if the next reference is in a cold block. if (nextRefPosition == nullptr || (nextRefPosition->nodeLocation >= firstColdLoc)) { - allocateReg = false; + allocate = false; } } @@ -5974,71 +6047,43 @@ void LinearScan::allocateRegisters() if ((currentRefPosition->refType == RefTypeUpperVectorRestore) && (currentInterval->physReg == REG_NA)) { assert(currentRefPosition->regOptional); - allocateReg = false; + allocate = false; } #endif #ifdef DEBUG // Under stress mode, don't allocate registers to RegOptional RefPositions. - if (allocateReg && regOptionalNoAlloc()) + if (allocate && regOptionalNoAlloc()) { - allocateReg = false; + allocate = false; } #endif } - if (allocateReg) + if (allocate) { - // Try to allocate a register - assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition); + // Allocate a register, if we must, or if it is profitable to do so. + // If we have a fixed reg requirement, and the interval is inactive in another register, + // unassign that register. + if (currentRefPosition->isFixedRegRef && !currentInterval->isActive && + (currentInterval->assignedReg != nullptr) && + (currentInterval->assignedReg->assignedInterval == currentInterval) && + (genRegMask(currentInterval->assignedReg->regNum) != currentRefPosition->registerAssignment)) + { + unassignPhysReg(currentInterval->assignedReg, nullptr); + } + assignedRegister = allocateReg(currentInterval, currentRefPosition); } - // If no register was found, and if the currentRefPosition must have a register, - // then find a register to spill + // If no register was found, this RefPosition must not require a register. if (assignedRegister == REG_NA) { - bool isAllocatable = currentRefPosition->IsActualRef(); -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE && defined(TARGET_ARM64) - if (currentInterval->isUpperVector) - { - // On Arm64, we can't save the upper half to memory without a register. - isAllocatable = true; - assert(!currentRefPosition->RegOptional()); - } -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE && TARGET_ARM64 - if (isAllocatable) - { - if (allocateReg) - { - assignedRegister = - allocateBusyReg(currentInterval, currentRefPosition, currentRefPosition->RegOptional()); - } - - if (assignedRegister != REG_NA) - { - INDEBUG( - dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_SPILLED_REG, currentInterval, assignedRegister)); - } - else - { - // This can happen only for those ref positions that are to be allocated - // only if profitable. - noway_assert(currentRefPosition->RegOptional()); - - currentRefPosition->registerAssignment = RBM_NONE; - currentRefPosition->reload = false; - setIntervalAsSpilled(currentInterval); - - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); - } - } - else - { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); - currentRefPosition->registerAssignment = RBM_NONE; - currentInterval->isActive = false; - setIntervalAsSpilled(currentInterval); - } + assert(currentRefPosition->RegOptional()); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + currentRefPosition->registerAssignment = RBM_NONE; + currentRefPosition->reload = false; + currentInterval->isActive = false; + setIntervalAsSpilled(currentInterval); } #ifdef DEBUG else @@ -6072,12 +6117,19 @@ void LinearScan::allocateRegisters() } // If we allocated a register, record it - if (currentInterval != nullptr && assignedRegister != REG_NA) + if (assignedRegister != REG_NA) { - assignedRegBit = genRegMask(assignedRegister); + assignedRegBit = genRegMask(assignedRegister); + regMaskTP regMask = getRegMask(assignedRegister, currentInterval->registerType); + regsInUseThisLocation |= regMask; + if (currentRefPosition->delayRegFree) + { + regsInUseNextLocation |= regMask; + } currentRefPosition->registerAssignment = assignedRegBit; - currentInterval->physReg = assignedRegister; - regsToFree &= ~assignedRegBit; // we'll set it again later if it's dead + + currentInterval->physReg = assignedRegister; + regsToFree &= ~regMask; // we'll set it again later if it's dead // If this interval is dead, free the register. // The interval could be dead if this is a user variable, or if the @@ -6085,9 +6137,10 @@ void LinearScan::allocateRegisters() // is not used, etc. // If this is an UpperVector we'll neither free it nor preference it // (it will be freed when it is used). + bool stillInReg = true; + bool unassign = false; if (!currentInterval->IsUpperVector()) { - bool unassign = false; if (currentInterval->isWriteThru) { if (currentRefPosition->refType == RefTypeDef) @@ -6105,44 +6158,57 @@ void LinearScan::allocateRegisters() if (currentRefPosition->lastUse || currentRefPosition->nextRefPosition == nullptr) { assert(currentRefPosition->isIntervalRef()); - - if (refType != RefTypeExpUse && currentRefPosition->nextRefPosition == nullptr) + // If this isn't a final use, we'll mark the register as available, but keep the association. + if ((refType != RefTypeExpUse) && (currentRefPosition->nextRefPosition == nullptr)) { unassign = true; } else { + if (currentRefPosition->delayRegFree) + { + delayRegsToMakeInactive |= regMask; + } + else + { + regsToMakeInactive |= regMask; + } + // TODO-Cleanup: this makes things consistent with previous, and will enable preferences + // to be propagated, but it seems less than ideal. currentInterval->isActive = false; } + // Update the register preferences for the relatedInterval, if this is 'preferencedToDef'. + // Don't propagate to subsequent relatedIntervals; that will happen as they are allocated, and we + // don't know yet whether the register will be retained. + if (currentInterval->relatedInterval != nullptr) + { + currentInterval->relatedInterval->updateRegisterPreferences(assignedRegBit); + } } + if (unassign) { if (currentRefPosition->delayRegFree) { - delayRegsToFree |= assignedRegBit; + delayRegsToFree |= regMask; INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED)); } else { - regsToFree |= assignedRegBit; + regsToFree |= regMask; INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE)); } } - - // Update the register preferences for the relatedInterval, if this is 'preferencedToDef'. - // Don't propagate to subsequent relatedIntervals; that will happen as they are allocated, and we - // don't know yet whether the register will be retained. - if ((currentRefPosition->lastUse || nextRefPosition == nullptr) && - (currentInterval->relatedInterval != nullptr)) - { - currentInterval->relatedInterval->updateRegisterPreferences(assignedRegBit); - } } - - lastAllocatedRefPosition = currentRefPosition; + if (!unassign) + { + updateNextIntervalRef(assignedRegister, currentInterval); + updateSpillCost(assignedRegister, currentInterval); + } } + lastAllocatedRefPosition = currentRefPosition; } #ifdef JIT32_GCENCODER @@ -6284,20 +6350,46 @@ void LinearScan::updateAssignedInterval(RegRecord* reg, Interval* interval, Regi #ifdef TARGET_ARM // Update overlapping floating point register for TYP_DOUBLE. Interval* oldAssignedInterval = reg->assignedInterval; + regNumber doubleReg = REG_NA; if (regType == TYP_DOUBLE) { - RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg); - + doubleReg = reg->regNum; + RegRecord* anotherHalfReg = getSecondHalfRegRec(reg); anotherHalfReg->assignedInterval = interval; } else if ((oldAssignedInterval != nullptr) && (oldAssignedInterval->registerType == TYP_DOUBLE)) { - RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg); - + RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg); + doubleReg = genIsValidDoubleReg(reg->regNum) ? reg->regNum : anotherHalfReg->regNum; anotherHalfReg->assignedInterval = nullptr; } + if (doubleReg != REG_NA) + { + clearNextIntervalRef(doubleReg, TYP_DOUBLE); + clearSpillCost(doubleReg, TYP_DOUBLE); + clearConstantReg(doubleReg, TYP_DOUBLE); + } #endif reg->assignedInterval = interval; + if (interval != nullptr) + { + setRegInUse(reg->regNum, interval->registerType); + if (interval->isConstant) + { + setConstantReg(reg->regNum, interval->registerType); + } + else + { + clearConstantReg(reg->regNum, interval->registerType); + } + updateNextIntervalRef(reg->regNum, interval); + updateSpillCost(reg->regNum, interval); + } + else + { + clearNextIntervalRef(reg->regNum, reg->registerType); + clearSpillCost(reg->regNum, reg->registerType); + } } //----------------------------------------------------------------------------- @@ -10018,7 +10110,7 @@ void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, } if ((interval != nullptr) && (reg != REG_NA) && (reg != REG_STK)) { - registersToDump |= genRegMask(reg); + registersToDump |= getRegMask(reg, interval->registerType); dumpRegRecordTitleIfNeeded(); } @@ -10074,16 +10166,10 @@ void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, break; // Restoring the previous register - case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL: - assert(interval != nullptr); - dumpRefPositionShort(activeRefPosition, currentBlock); - printf("SRstr %-4s ", getRegName(reg)); - dumpRegRecords(); - break; - case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL: + case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL: assert(interval != nullptr); - if (activeRefPosition == nullptr) + if ((activeRefPosition == nullptr) || (activeRefPosition->refType == RefTypeBB)) { printf(emptyRefPositionFormat, ""); } @@ -10091,7 +10177,7 @@ void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, { dumpRefPositionShort(activeRefPosition, currentBlock); } - printf("Restr %-4s ", getRegName(reg)); + printf((event == LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL) ? "Restr %-4s " : "SRstr %-4s ", getRegName(reg)); dumpRegRecords(); break; @@ -10166,11 +10252,6 @@ void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, printf("Reuse %-4s ", getRegName(reg)); break; - case LSRA_EVENT_ALLOC_SPILLED_REG: - dumpRefPositionShort(activeRefPosition, currentBlock); - printf("Steal %-4s ", getRegName(reg)); - break; - case LSRA_EVENT_NO_ENTRY_REG_ALLOCATED: assert(interval != nullptr && interval->isLocalVar); dumpRefPositionShort(activeRefPosition, currentBlock); @@ -10412,12 +10493,12 @@ void LinearScan::dumpRegRecords() { static char columnFormatArray[18]; - for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++) + for (regNumber regNum = REG_FIRST; regNum <= (regNumber)lastUsedRegNumIndex; regNum = REG_NEXT(regNum)) { - if (shouldDumpReg((regNumber)regNumIndex)) + if (shouldDumpReg(regNum)) { printf("%s", columnSeparator); - RegRecord& regRecord = physRegs[regNumIndex]; + RegRecord& regRecord = physRegs[regNum]; Interval* interval = regRecord.assignedInterval; if (interval != nullptr) { @@ -10431,7 +10512,7 @@ void LinearScan::dumpRegRecords() #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE printf("%c", activeChar); } - else if (regRecord.isBusyUntilNextKill) + else if ((genRegMask(regNum) & regsBusyUntilKill) != RBM_NONE) { printf(columnFormatArray, "Busy"); } @@ -10957,8 +11038,11 @@ void LinearScan::verifyFinalAllocation() } else { - interval->physReg = regNum; - interval->assignedReg = regRecord; + if (!currentRefPosition->copyReg) + { + interval->physReg = regNum; + interval->assignedReg = regRecord; + } regRecord->assignedInterval = interval; } } diff --git a/src/coreclr/src/jit/lsra.h b/src/coreclr/src/jit/lsra.h index 0d443f900c6cd8..a89a008dd60261 100644 --- a/src/coreclr/src/jit/lsra.h +++ b/src/coreclr/src/jit/lsra.h @@ -471,12 +471,11 @@ class RegRecord : public Referenceable public: RegRecord() { - assignedInterval = nullptr; - previousInterval = nullptr; - regNum = REG_NA; - isCalleeSave = false; - registerType = IntRegisterType; - isBusyUntilNextKill = false; + assignedInterval = nullptr; + previousInterval = nullptr; + regNum = REG_NA; + isCalleeSave = false; + registerType = IntRegisterType; } void init(regNumber reg) @@ -511,8 +510,6 @@ class RegRecord : public Referenceable void tinyDump(); #endif // DEBUG - bool isFree(); - // RefPosition * getNextRefPosition(); // LsraLocation getNextRefLocation(); @@ -528,15 +525,10 @@ class RegRecord : public Referenceable // assignedInterval becomes inactive. Interval* previousInterval; - regNumber regNum; - bool isCalleeSave; - RegisterType registerType; - // This register must be considered busy until the next time it is explicitly killed. - // This is used so that putarg_reg can avoid killing its lclVar source, while avoiding - // the problem with the reg becoming free if the last-use is encountered before the call. - bool isBusyUntilNextKill; - - bool conflictingFixedRegReference(RefPosition* refPosition); + regNumber regNum; + bool isCalleeSave; + RegisterType registerType; + unsigned char regOrder; }; inline bool leafInRange(GenTree* leaf, int lower, int upper) @@ -976,9 +968,7 @@ class LinearScan : public LinearScanInterface bool isSecondHalfReg(RegRecord* regRec, Interval* interval); RegRecord* getSecondHalfRegRec(RegRecord* regRec); RegRecord* findAnotherHalfRegRec(RegRecord* regRec); - bool canSpillDoubleReg(RegRecord* physRegRecord, - LsraLocation refLocation, - BasicBlock::weight_t* recentAssignedRefWeight); + bool canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation); void unassignDoublePhysReg(RegRecord* doubleRegRecord); #endif void updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType); @@ -986,8 +976,8 @@ class LinearScan : public LinearScanInterface bool canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval); bool isAssignedToInterval(Interval* interval, RegRecord* regRec); bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation); - bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, BasicBlock::weight_t* recentAssignedRefWeight); - bool isRegInUse(RegRecord* regRec, RefPosition* refPosition); + bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation); + float getSpillWeight(RegRecord* physRegRecord); // insert refpositions representing prolog zero-inits which will be added later void insertZeroInitRefPositions(); @@ -1056,11 +1046,7 @@ class LinearScan : public LinearScanInterface regMaskTP allSIMDRegs(); regMaskTP internalFloatRegCandidates(); - bool registerIsFree(regNumber regNum, RegisterType regType); - bool registerIsAvailable(RegRecord* physRegRecord, - LsraLocation currentLoc, - LsraLocation* nextRefLocationPtr, - RegisterType regType); + void makeRegisterInactive(RegRecord* physRegRecord); void freeRegister(RegRecord* physRegRecord); void freeRegisters(regMaskTP regsToFree); @@ -1143,15 +1129,12 @@ class LinearScan : public LinearScanInterface * Register management ****************************************************************************/ RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition); - regNumber tryAllocateFreeReg(Interval* current, RefPosition* refPosition); - regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable); + + regNumber allocateReg(Interval* current, RefPosition* refPosition); regNumber assignCopyReg(RefPosition* refPosition); bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition); - bool isSpillCandidate(Interval* current, - RefPosition* refPosition, - RegRecord* physRegRecord, - LsraLocation& nextLocation); + bool isSpillCandidate(Interval* current, RefPosition* refPosition, RegRecord* physRegRecord); void checkAndAssignInterval(RegRecord* regRec, Interval* interval); void assignPhysReg(RegRecord* regRec, Interval* interval); void assignPhysReg(regNumber reg, Interval* interval) @@ -1160,7 +1143,6 @@ class LinearScan : public LinearScanInterface } bool isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType)); - bool isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType)); void checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition); void unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegType)); void unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition); @@ -1176,6 +1158,59 @@ class LinearScan : public LinearScanInterface void spillGCRefs(RefPosition* killRefPosition); + /***************************************************************************** + * Register selection + ****************************************************************************/ + regMaskTP getFreeCandidates(regMaskTP candidates, var_types regType) + { + regMaskTP result = candidates & m_AvailableRegs; +#ifdef TARGET_ARM + // For TYP_DOUBLE on ARM, we can only use register for which the odd half is + // also available. + if (regType == TYP_DOUBLE) + { + result &= (m_AvailableRegs >> 1); + } +#endif // TARGET_ARM + return result; + } + + struct registerSelector + { + regMaskTP candidates; + int score; +#ifdef TARGET_ARM + var_types regType; +#endif // TARGET_ARM + + // Apply a simple mask-based selection heuristic, and return 'true' if we now have a single candidate. + bool applySelection(int selectionScore, regMaskTP selectionCandidates) + { + regMaskTP newCandidates = candidates & selectionCandidates; + if (newCandidates != RBM_NONE) + { + score += selectionScore; + candidates = newCandidates; + return isSingleRegister(candidates); + } + return false; + } + + // Select a single register, if it is in the candidate set. + // Return true if so. + bool applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate) + { + assert(isSingleRegister(selectionCandidate)); + regMaskTP newCandidates = candidates & selectionCandidate; + if (newCandidates != RBM_NONE) + { + candidates = newCandidates; + return true; + } + return false; + } + }; + /***************************************************************************** * For Resolution phase ****************************************************************************/ @@ -1316,8 +1351,7 @@ class LinearScan : public LinearScanInterface // Allocation decisions LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG, - LSRA_EVENT_ALLOC_SPILLED_REG, LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG, - LSRA_EVENT_REUSE_REG, + LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG, LSRA_EVENT_REUSE_REG, }; void dumpLsraAllocationEvent(LsraDumpEvent event, Interval* interval = nullptr, @@ -1488,6 +1522,154 @@ class LinearScan : public LinearScanInterface VARSET_TP largeVectorCalleeSaveCandidateVars; #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + //----------------------------------------------------------------------- + // Register status + //----------------------------------------------------------------------- + + regMaskTP m_AvailableRegs; + regNumber getRegForType(regNumber reg, var_types regType) + { +#ifdef TARGET_ARM + if ((regType == TYP_DOUBLE) && !genIsValidDoubleReg(reg)) + { + reg = REG_PREV(reg); + } +#endif // TARGET_ARM + return reg; + } + + regMaskTP getRegMask(regNumber reg, var_types regType) + { + reg = getRegForType(reg, regType); + regMaskTP regMask = genRegMask(reg); +#ifdef TARGET_ARM + if (regType == TYP_DOUBLE) + { + assert(genIsValidDoubleReg(reg)); + regMask |= (regMask << 1); + } +#endif // TARGET_ARM + return regMask; + } + + void resetAvailableRegs() + { + m_AvailableRegs = (availableIntRegs | availableFloatRegs); + m_RegistersWithConstants = RBM_NONE; + } + + bool isRegAvailable(regNumber reg, var_types regType) + { + regMaskTP regMask = getRegMask(reg, regType); + return (m_AvailableRegs & regMask) == regMask; + } + void setRegsInUse(regMaskTP regMask) + { + m_AvailableRegs &= ~regMask; + } + void setRegInUse(regNumber reg, var_types regType) + { + regMaskTP regMask = getRegMask(reg, regType); + setRegsInUse(regMask); + } + void makeRegsAvailable(regMaskTP regMask) + { + m_AvailableRegs |= regMask; + } + void makeRegAvailable(regNumber reg, var_types regType) + { + regMaskTP regMask = getRegMask(reg, regType); + makeRegsAvailable(regMask); + } + + void clearNextIntervalRef(regNumber reg, var_types regType); + void updateNextIntervalRef(regNumber reg, Interval* interval); + + void clearSpillCost(regNumber reg, var_types regType); + void updateSpillCost(regNumber reg, Interval* interval); + + regMaskTP m_RegistersWithConstants; + void clearConstantReg(regNumber reg, var_types regType) + { + m_RegistersWithConstants &= ~getRegMask(reg, regType); + } + void setConstantReg(regNumber reg, var_types regType) + { + m_RegistersWithConstants |= getRegMask(reg, regType); + } + bool isRegConstant(regNumber reg, var_types regType) + { + reg = getRegForType(reg, regType); + regMaskTP regMask = getRegMask(reg, regType); + return (m_RegistersWithConstants & regMask) == regMask; + } + regMaskTP getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition); + + regMaskTP fixedRegs; + LsraLocation nextFixedRef[REG_COUNT]; + void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition); + LsraLocation getNextFixedRef(regNumber regNum, var_types regType) + { + LsraLocation loc = nextFixedRef[regNum]; +#ifdef TARGET_ARM + if (regType == TYP_DOUBLE) + { + loc = Min(loc, nextFixedRef[regNum + 1]); + } +#endif + return loc; + } + + LsraLocation nextIntervalRef[REG_COUNT]; + LsraLocation getNextIntervalRef(regNumber regNum, var_types regType) + { + LsraLocation loc = nextIntervalRef[regNum]; +#ifdef TARGET_ARM + if (regType == TYP_DOUBLE) + { + loc = Min(loc, nextIntervalRef[regNum + 1]); + } +#endif + return loc; + } + float spillCost[REG_COUNT]; + + regMaskTP regsBusyUntilKill; + regMaskTP regsInUseThisLocation; + regMaskTP regsInUseNextLocation; + bool isRegBusy(regNumber reg, var_types regType) + { + regMaskTP regMask = getRegMask(reg, regType); + return (regsBusyUntilKill & regMask) != RBM_NONE; + } + void setRegBusyUntilKill(regNumber reg, var_types regType) + { + regsBusyUntilKill |= getRegMask(reg, regType); + } + void clearRegBusyUntilKill(regNumber reg) + { + regsBusyUntilKill &= ~genRegMask(reg); + } + + bool isRegInUse(regNumber reg, var_types regType) + { + regMaskTP regMask = getRegMask(reg, regType); + return (regsInUseThisLocation & regMask) != RBM_NONE; + } + + void resetRegState() + { + resetAvailableRegs(); + regsBusyUntilKill = RBM_NONE; + } + + bool conflictingFixedRegReference(regNumber regNum, RefPosition* refPosition); + + // This method should not be used and is here to retain old behavior. + // It should be replaced by isRegAvailable(). + // See comment in allocateReg(); + bool isFree(RegRecord* regRecord); + //----------------------------------------------------------------------- // Build methods //----------------------------------------------------------------------- @@ -1551,7 +1733,7 @@ class LinearScan : public LinearScanInterface int BuildSimple(GenTree* tree); int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE); - int BuildDelayFreeUses(GenTree* node, regMaskTP candidates = RBM_NONE); + int BuildDelayFreeUses(GenTree* node, GenTree* rmwNode = nullptr, regMaskTP candidates = RBM_NONE); int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE); int BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE); void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs); @@ -1728,7 +1910,7 @@ class Interval : public Referenceable // True if this interval is defined by a putArg, whose source is a non-last-use lclVar. // During allocation, this flag will be cleared if the source is not already in the required register. // Othewise, we will leave the register allocated to the lclVar, but mark the RegRecord as - // isBusyUntilNextKill, so that it won't be reused if the lclVar goes dead before the call. + // isBusyUntilKill, so that it won't be reused if the lclVar goes dead before the call. bool isSpecialPutArg : 1; // True if this interval interferes with a call. diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 9c67a05cc55657..9463b1f4820870 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -1098,8 +1098,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { if (isRMW) { - srcCount += BuildDelayFreeUses(intrin.op2); - srcCount += BuildDelayFreeUses(intrin.op3, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS); + srcCount += BuildDelayFreeUses(intrin.op2, nullptr); + srcCount += BuildDelayFreeUses(intrin.op3, nullptr, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS); } else { diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index bdcd8dcd3999cb..a0cbf2d5f24a9d 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -345,6 +345,7 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de (getRegisterType(interval, useRefPosition) == regType)); regMaskTP candidates = allRegs(regType); defRefPosition->registerAssignment = candidates; + defRefPosition->isFixedRegRef = false; return; } INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6, interval)); @@ -1791,17 +1792,33 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, Lsra JITDUMP("\n"); } +static const regNumber lsraRegOrder[] = {REG_VAR_ORDER}; +const unsigned lsraRegOrderSize = ArrLen(lsraRegOrder); +static const regNumber lsraRegOrderFlt[] = {REG_VAR_ORDER_FLT}; +const unsigned lsraRegOrderFltSize = ArrLen(lsraRegOrderFlt); + //------------------------------------------------------------------------ // buildPhysRegRecords: Make an interval for each physical register // void LinearScan::buildPhysRegRecords() { - RegisterType regType = IntRegisterType; for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) { RegRecord* curr = &physRegs[reg]; curr->init(reg); } + for (unsigned int i = 0; i < lsraRegOrderSize; i++) + { + regNumber reg = lsraRegOrder[i]; + RegRecord* curr = &physRegs[reg]; + curr->regOrder = (unsigned char)i; + } + for (unsigned int i = 0; i < lsraRegOrderFltSize; i++) + { + regNumber reg = lsraRegOrderFlt[i]; + RegRecord* curr = &physRegs[reg]; + curr->regOrder = (unsigned char)i; + } } //------------------------------------------------------------------------ @@ -2139,6 +2156,7 @@ void LinearScan::buildIntervals() assert(inArgReg < REG_COUNT); mask = genRegMask(inArgReg); assignPhysReg(inArgReg, interval); + INDEBUG(registersToDump |= getRegMask(inArgReg, interval->registerType)); } RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask); pos->setRegOptional(true); @@ -3029,59 +3047,85 @@ void LinearScan::setDelayFree(RefPosition* use) //------------------------------------------------------------------------ // BuildDelayFreeUses: Build Use RefPositions for an operand that might be contained, -// and which need to be marked delayRegFree +// and which may need to be marked delayRegFree // // Arguments: -// node - The node of interest +// node - The node of interest +// rmwNode - The node that has RMW semantics (if applicable) +// candidates - The set of candidates for the uses // // Return Value: // The number of source registers used by the *parent* of this node. // -int LinearScan::BuildDelayFreeUses(GenTree* node, regMaskTP candidates) +int LinearScan::BuildDelayFreeUses(GenTree* node, GenTree* rmwNode, regMaskTP candidates) { - RefPosition* use; + RefPosition* use = nullptr; + Interval* rmwInterval = nullptr; + bool rmwIsLastUse = false; + GenTree* addr = nullptr; + if ((rmwNode != nullptr) && isCandidateLocalRef(rmwNode)) + { + rmwInterval = getIntervalForLocalVarNode(rmwNode->AsLclVar()); + // Note: we don't handle multi-reg vars here. It's not clear that there are any cases + // where we'd encounter a multi-reg var in an RMW context. + assert(!rmwNode->AsLclVar()->IsMultiReg()); + rmwIsLastUse = rmwNode->AsLclVar()->IsLastUse(0); + } if (!node->isContained()) { use = BuildUse(node, candidates); - setDelayFree(use); - return 1; } - if (node->OperIsHWIntrinsic()) + else if (node->OperIsHWIntrinsic()) { use = BuildUse(node->gtGetOp1(), candidates); - setDelayFree(use); - return 1; } - if (!node->OperIsIndir()) + else if (!node->OperIsIndir()) { return 0; } - GenTreeIndir* indirTree = node->AsIndir(); - GenTree* addr = indirTree->gtOp1; - if (!addr->isContained()) + else { - use = BuildUse(addr, candidates); - setDelayFree(use); - return 1; + GenTreeIndir* indirTree = node->AsIndir(); + addr = indirTree->gtOp1; + if (!addr->isContained()) + { + use = BuildUse(addr, candidates); + } + else if (!addr->OperIs(GT_LEA)) + { + return 0; + } } - if (!addr->OperIs(GT_LEA)) + if (use != nullptr) { - return 0; + if ((use->getInterval() != rmwInterval) || (!rmwIsLastUse && !use->lastUse)) + { + setDelayFree(use); + } + return 1; } + // If we reach here we have a contained LEA in 'addr'. + GenTreeAddrMode* const addrMode = addr->AsAddrMode(); unsigned srcCount = 0; if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained()) { use = BuildUse(addrMode->Base(), candidates); - setDelayFree(use); + if ((use->getInterval() != rmwInterval) || (!rmwIsLastUse && !use->lastUse)) + { + setDelayFree(use); + } srcCount++; } if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained()) { use = BuildUse(addrMode->Index(), candidates); - setDelayFree(use); + if ((use->getInterval() != rmwInterval) || (!rmwIsLastUse && !use->lastUse)) + { + setDelayFree(use); + } srcCount++; } return srcCount; diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index db787c55fdf282..133bcf28acf5a5 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -876,7 +876,7 @@ int LinearScan::BuildRMWUses(GenTreeOp* node, regMaskTP candidates) } else if (delayUseOperand == op1) { - srcCount += BuildDelayFreeUses(op1, op1Candidates); + srcCount += BuildDelayFreeUses(op1, op2, op1Candidates); } else { @@ -893,7 +893,7 @@ int LinearScan::BuildRMWUses(GenTreeOp* node, regMaskTP candidates) } else if (delayUseOperand == op2) { - srcCount += BuildDelayFreeUses(op2, op2Candidates); + srcCount += BuildDelayFreeUses(op2, op1, op2Candidates); } else { @@ -987,7 +987,7 @@ int LinearScan::BuildShiftRotate(GenTree* tree) { if (!shiftBy->isContained()) { - srcCount += BuildDelayFreeUses(shiftBy, RBM_RCX); + srcCount += BuildDelayFreeUses(shiftBy, source, RBM_RCX); buildKillPositionsForNode(tree, currentLoc + 1, RBM_RCX); } BuildDef(tree, dstCandidates); @@ -1778,7 +1778,7 @@ int LinearScan::BuildModDiv(GenTree* tree) srcCount = 1; } - srcCount += BuildDelayFreeUses(op2, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX)); + srcCount += BuildDelayFreeUses(op2, op1, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX)); buildInternalRegisterUses(); @@ -2341,8 +2341,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) tgtPrefUse = BuildUse(op1); srcCount += 1; - srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2); - srcCount += BuildDelayFreeUses(op3, RBM_XMM0); + srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1); + srcCount += BuildDelayFreeUses(op3, op1, RBM_XMM0); buildUses = false; } @@ -2378,7 +2378,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) tgtPrefUse = BuildUse(op1); srcCount += 1; - srcCount += BuildDelayFreeUses(op2, varTypeIsByte(baseType) ? allByteRegs() : RBM_NONE); + srcCount += BuildDelayFreeUses(op2, op1, varTypeIsByte(baseType) ? allByteRegs() : RBM_NONE); buildUses = false; break; @@ -2395,7 +2395,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { // op3 reg should be different from target reg to // store the lower half result after executing the instruction - srcCount += BuildDelayFreeUses(op3); + srcCount += BuildDelayFreeUses(op3, op1); // Need a internal register different from the dst to take the lower half result buildInternalIntRegisterDefForNode(intrinsicTree); setInternalRegsDelayFree = true; @@ -2431,7 +2431,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += 1; srcCount += BuildOperandUses(op2); - srcCount += BuildDelayFreeUses(op3); + srcCount += BuildDelayFreeUses(op3, op1); } else if (op1->isContained()) { @@ -2440,7 +2440,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) tgtPrefUse = BuildUse(op3); srcCount += BuildOperandUses(op1); - srcCount += BuildDelayFreeUses(op2); + srcCount += BuildDelayFreeUses(op2, op1); srcCount += 1; } else @@ -2452,7 +2452,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) if (copiesUpperBits) { - srcCount += BuildDelayFreeUses(op2); + srcCount += BuildDelayFreeUses(op2, op1); } else { @@ -2460,7 +2460,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += 1; } - srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3); + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); } buildUses = false; @@ -2475,7 +2475,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) // Any pair of the index, mask, or destination registers should be different srcCount += BuildOperandUses(op1); - srcCount += BuildDelayFreeUses(op2); + srcCount += BuildDelayFreeUses(op2, op1); // op3 should always be contained assert(op3->isContained()); diff --git a/src/coreclr/src/jit/target.h b/src/coreclr/src/jit/target.h index a2b5f93387662e..5d3e7ad96d14c3 100644 --- a/src/coreclr/src/jit/target.h +++ b/src/coreclr/src/jit/target.h @@ -1288,7 +1288,7 @@ typedef unsigned char regNumberSmall; REG_R6, REG_R7, REG_R8, REG_R9, REG_R10, \ REG_R11, REG_R13, REG_R14, \ REG_R12, REG_R15, REG_IP0, REG_IP1, \ - REG_CALLEE_SAVED_ORDER + REG_CALLEE_SAVED_ORDER, REG_LR #define REG_VAR_ORDER_FLT REG_V16, REG_V17, REG_V18, REG_V19, \ REG_V20, REG_V21, REG_V22, REG_V23, \ diff --git a/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.cs b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.cs new file mode 100644 index 00000000000000..53081061bb0775 --- /dev/null +++ b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.cs @@ -0,0 +1,60 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + + +using System; +using System.Runtime.CompilerServices; + + +class GitHub_13735 +{ + [MethodImpl(MethodImplOptions.NoInlining)] + static int GetRandom() { + return 0; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void Print(int x) { } + + static void SampleA() + { + var a = GetRandom(); + var b = GetRandom(); + var c = GetRandom(); + var d = GetRandom(); + var e = GetRandom(); + var f = GetRandom(); + var g = GetRandom(); + var h = GetRandom(); + for (var x = 0; x < 100; ++x) + { + var xa = GetRandom(); + var xb = GetRandom(); + var xc = GetRandom(); + var xd = GetRandom(); + var xe = GetRandom(); + var xf = GetRandom(); + Print(xa); + Print(xb); + Print(xc); + Print(xd); + Print(xe); + Print(xf); + } + Print(a); + Print(b); + Print(c); + Print(d); + Print(e); + Print(f); + Print(g); + Print(h); + } + + public static int Main() + { + SampleA(); + return 100; + } +} diff --git a/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.csproj b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.csproj new file mode 100644 index 00000000000000..986494e092b5aa --- /dev/null +++ b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.csproj @@ -0,0 +1,12 @@ + + + Exe + + + + True + + + + +