diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 757491b95034df..5e05109dbded53 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -8841,7 +8841,7 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize // -saved bool for synchronized methods // slots for ret address + FP + EnC callee-saves - int preservedAreaSize = (2 + genCountBits((uint64_t)RBM_ENC_CALLEE_SAVED)) * REGSIZE_BYTES; + int preservedAreaSize = (2 + genCountBits(RBM_ENC_CALLEE_SAVED)) * REGSIZE_BYTES; if (compiler->info.compFlags & CORINFO_FLG_SYNCH) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9281f55471ec78..3d2979e7389355 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8317,14 +8317,14 @@ class Compiler return reg; } - _regMask_enum GetRegMask() const + regMaskTP GetRegMask() const { return regMask; } private: - regNumber reg; - _regMask_enum regMask; + regNumber reg; + regMaskTP regMask; }; VirtualStubParamInfo* virtualStubParamInfo; @@ -9851,7 +9851,8 @@ class Compiler // On these platforms we assume the register that the target is // passed in is preserved by the validator and take care to get the // target from the register for the call (even in debug mode). - static_assert_no_msg((RBM_VALIDATE_INDIRECT_CALL_TRASH & (1 << REG_VALIDATE_INDIRECT_CALL_ADDR)) == 0); + static_assert_no_msg((RBM_VALIDATE_INDIRECT_CALL_TRASH & regMaskTP(1 << REG_VALIDATE_INDIRECT_CALL_ADDR)) == + RBM_NONE); if (JitConfig.JitForceControlFlowGuard()) return true; diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 022e3aa492a0c0..5faaae35ebdd63 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -8787,7 +8787,7 @@ void emitter::emitRecordGCcall(BYTE* codePos, unsigned char callInstrSize) callDsc* call; #ifdef JIT32_GCENCODER - unsigned regs = (unsigned)(emitThisGCrefRegs | emitThisByrefRegs) & ~RBM_INTRET; + unsigned regs = (unsigned)((emitThisGCrefRegs | emitThisByrefRegs) & ~RBM_INTRET).GetIntRegSet(); // The JIT32 GCInfo encoder allows us to (as the comment previously here said): // "Bail if this is a totally boring call", but the GCInfoEncoder/Decoder interface @@ -10062,7 +10062,7 @@ void emitter::emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callIn // of callee-saved registers only). for (unsigned calleeSavedRegIdx = 0; calleeSavedRegIdx < CNT_CALL_GC_REGS; calleeSavedRegIdx++) { - regMaskSmall calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx]; + regMaskTP calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx]; if (emitThisGCrefRegs & calleeSavedRbm) { gcrefRegs |= (1 << calleeSavedRegIdx); diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index f714e49bf4e3c6..fd0a27f61818e1 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -6867,9 +6867,9 @@ void emitter::emitDispRegmask(int imm, bool encodedPC_LR) } else { - hasPC = (imm & RBM_PC) != 0; - hasLR = (imm & RBM_LR) != 0; - imm &= ~(RBM_PC | RBM_LR); + hasPC = (imm & SRBM_PC) != 0; + hasLR = (imm & SRBM_LR) != 0; + imm &= ~(SRBM_PC | SRBM_LR); } regNumber reg = REG_R0; diff --git a/src/coreclr/jit/emitinl.h b/src/coreclr/jit/emitinl.h index 66a33b813d58fa..26c48e604830b5 100644 --- a/src/coreclr/jit/emitinl.h +++ b/src/coreclr/jit/emitinl.h @@ -403,8 +403,8 @@ inline ssize_t emitter::emitGetInsAmdAny(const instrDesc* id) const /*static*/ inline unsigned emitter::emitDecodeCallGCregs(instrDesc* id) { - unsigned regmask = 0; - unsigned encodeMask; + regMaskTP regmask = RBM_NONE; + unsigned encodeMask; #ifdef TARGET_X86 assert(REGNUM_BITS >= 3); @@ -568,7 +568,7 @@ inline ssize_t emitter::emitGetInsAmdAny(const instrDesc* id) const NYI("unknown target"); #endif - return regmask; + return (unsigned int)regmask.getLow(); } #ifdef TARGET_XARCH diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index 902029791f20c0..a8194fb26c532e 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -4471,8 +4471,8 @@ void GCInfo::gcMakeRegPtrTable( assert(call->u1.cdArgMask == 0 && call->cdArgCnt == 0); // Other than that, we just have to deal with the regmasks. - regMaskSmall gcrefRegMask = call->cdGCrefRegs & RBM_CALL_GC_REGS; - regMaskSmall byrefRegMask = call->cdByrefRegs & RBM_CALL_GC_REGS; + regMaskSmall gcrefRegMask = call->cdGCrefRegs & RBM_CALL_GC_REGS.GetIntRegSet(); + regMaskSmall byrefRegMask = call->cdByrefRegs & RBM_CALL_GC_REGS.GetIntRegSet(); assert((gcrefRegMask & byrefRegMask) == 0); @@ -4558,9 +4558,11 @@ void GCInfo::gcMakeRegPtrTable( { // This is a true call site. - regMaskSmall gcrefRegMask = genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallGCrefRegs); + regMaskSmall gcrefRegMask = + genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallGCrefRegs).GetIntRegSet(); - regMaskSmall byrefRegMask = genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallByrefRegs); + regMaskSmall byrefRegMask = + genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallByrefRegs).GetIntRegSet(); assert((gcrefRegMask & byrefRegMask) == 0); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 39c9251c74b6eb..b47ffb394b80bb 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -243,7 +243,7 @@ SingleTypeRegSet LinearScan::allRegs(RegisterType rt) SingleTypeRegSet LinearScan::allByteRegs() { #ifdef TARGET_X86 - return availableIntRegs & RBM_BYTE_REGS; + return availableIntRegs & RBM_BYTE_REGS.GetIntRegSet(); #else return availableIntRegs; #endif @@ -265,7 +265,7 @@ SingleTypeRegSet LinearScan::allSIMDRegs() SingleTypeRegSet LinearScan::lowSIMDRegs() { #if defined(TARGET_AMD64) - return (availableFloatRegs & RBM_LOWFLOAT); + return (availableFloatRegs & RBM_LOWFLOAT.GetFloatRegSet()); #else return availableFloatRegs; #endif @@ -278,7 +278,11 @@ void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPo RefPosition* kill = nextKill; while ((kill != nullptr) && (kill->nodeLocation < nextLocation)) { +#ifdef HAS_MORE_THAN_64_REGISTERS + if (kill->killRegisterAssignment.IsRegNumInMask(regRecord->regNum)) +#else if ((kill->registerAssignment & genSingleTypeRegMask(regRecord->regNum)) != RBM_NONE) +#endif { nextLocation = kill->nodeLocation; break; @@ -449,11 +453,7 @@ SingleTypeRegSet LinearScan::internalFloatRegCandidates() } else { -#ifdef TARGET_AMD64 return RBM_FLT_CALLEE_TRASH.GetFloatRegSet(); -#else - return RBM_FLT_CALLEE_TRASH; -#endif // TARGET_AMD64 } } @@ -526,34 +526,32 @@ SingleTypeRegSet LinearScan::getConstrainedRegMask(RefPosition* refPosition, #if defined(TARGET_AMD64) #ifdef UNIX_AMD64_ABI // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers. -static const SingleTypeRegSet LsraLimitSmallIntSet = - (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13); +static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13); #else // !UNIX_AMD64_ABI // On Windows Amd64 use the RDI and RSI as callee saved registers. -static const SingleTypeRegSet LsraLimitSmallIntSet = - (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI); +static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI); #endif // !UNIX_AMD64_ABI -static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); -static const SingleTypeRegSet LsraLimitUpperSimdSet = +static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); +static const regMaskTP LsraLimitUpperSimdSet = (RBM_XMM16 | RBM_XMM17 | RBM_XMM18 | RBM_XMM19 | RBM_XMM20 | RBM_XMM21 | RBM_XMM22 | RBM_XMM23 | RBM_XMM24 | RBM_XMM25 | RBM_XMM26 | RBM_XMM27 | RBM_XMM28 | RBM_XMM29 | RBM_XMM30 | RBM_XMM31); #elif defined(TARGET_ARM) // On ARM, we may need two registers to set up the target register for a virtual call, so we need // to have at least the maximum number of arg registers, plus 2. -static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); -static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); +static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); +static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); #elif defined(TARGET_ARM64) -static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); -static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); +static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); +static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); #elif defined(TARGET_X86) -static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); -static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); +static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); +static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); #elif defined(TARGET_LOONGARCH64) -static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); -static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); +static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); +static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); #elif defined(TARGET_RISCV64) -static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); -static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); +static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); +static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); #else #error Unsupported or unset target architecture #endif // target @@ -596,29 +594,28 @@ SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, RegisterT case LSRA_LIMIT_CALLEE: if (!compiler->opts.compDbgEnC) { - mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_SAVED, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_SAVED.GetRegSetForType(regType), + minRegCount); } break; case LSRA_LIMIT_CALLER: { -#ifdef TARGET_XARCH mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_TRASH.GetRegSetForType(regType), minRegCount); -#else - mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_TRASH, minRegCount); -#endif // TARGET_AMD64 } break; case LSRA_LIMIT_SMALL_SET: if ((mask & LsraLimitSmallIntSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, regType, mask, LsraLimitSmallIntSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, + LsraLimitSmallIntSet.GetRegSetForType(regType), minRegCount); } else if ((mask & LsraLimitSmallFPSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, regType, mask, LsraLimitSmallFPSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, + LsraLimitSmallFPSet.GetRegSetForType(regType), minRegCount); } break; @@ -626,7 +623,8 @@ SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, RegisterT case LSRA_LIMIT_UPPER_SIMD_SET: if ((mask & LsraLimitUpperSimdSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, regType, mask, LsraLimitUpperSimdSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, + LsraLimitUpperSimdSet.GetRegSetForType(regType), minRegCount); } break; #endif @@ -847,29 +845,28 @@ LinearScan::LinearScan(Compiler* theCompiler) // Note: one known reason why we exclude LR is because NativeAOT has dependency on not // using LR as a GPR. See: https://github.com/dotnet/runtime/issues/101932 // Once that is addressed, we may consider allowing LR in availableIntRegs. - availableIntRegs = ((RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd.getLow())); + availableIntRegs = + (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd).GetIntRegSet(); #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd.getLow()); + availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd).GetIntRegSet(); #else - availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd.getLow()); + availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd).GetIntRegSet(); #endif #if ETW_EBP_FRAMED - availableIntRegs &= ~RBM_FPBASE; + availableIntRegs &= ~RBM_FPBASE.GetIntRegSet(); #endif // ETW_EBP_FRAMED #ifdef TARGET_AMD64 availableFloatRegs = RBM_ALLFLOAT.GetFloatRegSet(); availableDoubleRegs = RBM_ALLDOUBLE.GetFloatRegSet(); #else - availableFloatRegs = RBM_ALLFLOAT; - availableDoubleRegs = RBM_ALLDOUBLE; + availableFloatRegs = RBM_ALLFLOAT.GetFloatRegSet(); + availableDoubleRegs = RBM_ALLDOUBLE.GetFloatRegSet(); #endif -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) availableMaskRegs = RBM_ALLMASK.GetPredicateRegSet(); -#elif defined(TARGET_ARM64) - availableMaskRegs = RBM_ALLMASK; #endif #if defined(TARGET_AMD64) || defined(TARGET_ARM64) @@ -877,9 +874,14 @@ LinearScan::LinearScan(Compiler* theCompiler) { // When the EnC option is set we have an exact set of registers that we always save // that are also available in future versions. - availableIntRegs &= ~RBM_INT_CALLEE_SAVED | RBM_ENC_CALLEE_SAVED; + availableIntRegs &= (~RBM_INT_CALLEE_SAVED | RBM_ENC_CALLEE_SAVED).GetIntRegSet(); +#if defined(UNIX_AMD64_ABI) availableFloatRegs &= ~RBM_FLT_CALLEE_SAVED; availableDoubleRegs &= ~RBM_FLT_CALLEE_SAVED; +#else + availableFloatRegs &= ~RBM_FLT_CALLEE_SAVED.GetFloatRegSet(); + availableDoubleRegs &= ~RBM_FLT_CALLEE_SAVED.GetFloatRegSet(); +#endif // UNIX_AMD64_ABI #if defined(TARGET_XARCH) availableMaskRegs &= ~RBM_MSK_CALLEE_SAVED; #endif // TARGET_XARCH @@ -889,8 +891,8 @@ LinearScan::LinearScan(Compiler* theCompiler) #if defined(TARGET_AMD64) if (compiler->canUseEvexEncoding()) { - availableFloatRegs |= RBM_HIGHFLOAT; - availableDoubleRegs |= RBM_HIGHFLOAT; + availableFloatRegs |= RBM_HIGHFLOAT.GetFloatRegSet(); + availableDoubleRegs |= RBM_HIGHFLOAT.GetFloatRegSet(); } #endif @@ -2813,7 +2815,7 @@ void LinearScan::setFrameType() SingleTypeRegSet removeMask = RBM_NONE; if (frameType == FT_EBP_FRAME) { - removeMask |= RBM_FPBASE; + removeMask |= RBM_FPBASE.GetIntRegSet(); } compiler->rpFrameType = frameType; @@ -2826,7 +2828,7 @@ void LinearScan::setFrameType() compiler->codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD; assert(REG_OPT_RSVD != REG_FP); JITDUMP(" Reserved REG_OPT_RSVD (%s) due to large frame\n", getRegName(REG_OPT_RSVD)); - removeMask |= RBM_OPT_RSVD; + removeMask |= RBM_OPT_RSVD.GetIntRegSet(); } #endif // TARGET_ARMARCH || TARGET_RISCV64 @@ -4039,7 +4041,7 @@ void LinearScan::processKills(RefPosition* killRefPosition) { RefPosition* nextKill = killRefPosition->nextRefPosition; - regMaskTP killedRegs = killRefPosition->registerAssignment; + regMaskTP killedRegs = killRefPosition->getKillRegisterAssignment(); while (killedRegs.IsNonEmpty()) { regNumber killedReg = genFirstRegNumFromMaskAndToggle(killedRegs); @@ -4059,9 +4061,9 @@ void LinearScan::processKills(RefPosition* killRefPosition) updateNextFixedRef(regRecord, regNextRefPos, nextKill); } - regsBusyUntilKill &= ~killRefPosition->registerAssignment; + regsBusyUntilKill &= ~killRefPosition->getKillRegisterAssignment(); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, nullptr, NONE, - killRefPosition->registerAssignment)); + killRefPosition->getKillRegisterAssignment())); } //------------------------------------------------------------------------ @@ -8811,7 +8813,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, { // Exclude any doubles for which the odd half isn't in freeRegs, // and restrict down to just the even part of the even/odd pair. - freeRegs &= (freeRegs & RBM_ALLDOUBLE_HIGH) >> 1; + freeRegs &= (freeRegs & RBM_ALLDOUBLE_HIGH.GetFloatRegSet()) >> 1; } #endif @@ -8822,13 +8824,9 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, else { // Prefer a callee-trashed register if possible to prevent new prolog/epilog saves/restores. - if ((freeRegs & RBM_CALLEE_TRASH) != 0) + if ((freeRegs & RBM_CALLEE_TRASH.GetRegSetForType(type)) != 0) { -#ifdef TARGET_XARCH freeRegs &= RBM_CALLEE_TRASH.GetRegSetForType(type); -#else - freeRegs &= RBM_CALLEE_TRASH; -#endif } regNumber tempReg = genRegNumFromMask(genFindLowestBit(freeRegs), type); @@ -13595,7 +13593,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // clause below creates a mask to do this. if (currentInterval->registerType == TYP_DOUBLE) { - candidates &= ~((busyRegs & RBM_ALLDOUBLE_HIGH) >> 1); + candidates &= ~((busyRegs & RBM_ALLDOUBLE_HIGH.GetFloatRegSet()) >> 1); } #endif // TARGET_ARM @@ -13918,7 +13916,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( // clause below creates a mask to do this. if (currentInterval->registerType == TYP_DOUBLE) { - candidates &= ~((busyRegs & RBM_ALLDOUBLE_HIGH) >> 1); + candidates &= ~((busyRegs & RBM_ALLDOUBLE_HIGH.GetFloatRegSet()) >> 1); } #endif // TARGET_ARM diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 62186a3d976ce7..d0a34f6ddcb53b 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2466,16 +2466,25 @@ class RefPosition RefPosition* nextRefPosition; // The remaining fields are common to both options - GenTree* treeNode; + union + { + struct + { + GenTree* treeNode; + + // Prior to the allocation pass, registerAssignment captures the valid registers + // for this RefPosition. + // After the allocation pass, this contains the actual assignment + SingleTypeRegSet registerAssignment; + }; +#ifdef HAS_MORE_THAN_64_REGISTERS + regMaskTP killRegisterAssignment; +#endif + }; unsigned int bbNum; LsraLocation nodeLocation; - // Prior to the allocation pass, registerAssignment captures the valid registers - // for this RefPosition. - // After the allocation pass, this contains the actual assignment - SingleTypeRegSet registerAssignment; - RefType refType; // NOTE: C++ only packs bitfields if the base type is the same. So make all the base @@ -2586,9 +2595,9 @@ class RefPosition : referent(nullptr) , nextRefPosition(nullptr) , treeNode(treeNode) + , registerAssignment(RBM_NONE) , bbNum(bbNum) , nodeLocation(nodeLocation) - , registerAssignment(RBM_NONE) , refType(refType) , multiRegIdx(0) #ifdef TARGET_ARM64 @@ -2653,6 +2662,16 @@ class RefPosition return referent->registerType; } + regMaskTP getKillRegisterAssignment() + { + assert(refType == RefTypeKill); +#ifdef HAS_MORE_THAN_64_REGISTERS + return killRegisterAssignment; +#else + return registerAssignment; +#endif + } + // Returns true if it is a reference on a GenTree node. bool IsActualRef() { diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index fc77279eabb75e..a0ed0524b0e4e5 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -482,7 +482,7 @@ int LinearScan::BuildNode(GenTree* tree) { assert(tree->TypeGet() == TYP_INT); srcCount = 1; - BuildUse(tree->gtGetOp1(), RBM_INTRET); + BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } break; @@ -628,7 +628,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CATCH_ARG: srcCount = 0; assert(dstCount == 1); - BuildDef(tree, RBM_EXCEPTION_OBJECT); + BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; case GT_COPY: diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 53c543291671ec..ea444c5cea320c 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -238,7 +238,7 @@ SingleTypeRegSet LinearScan::filterConsecutiveCandidates(SingleTypeRegSet float currAvailableRegs &= ~endMask; } while (currAvailableRegs != RBM_NONE); - SingleTypeRegSet v0_v31_mask = RBM_V0 | RBM_V31; + SingleTypeRegSet v0_v31_mask = SRBM_V0 | SRBM_V31; if ((floatCandidates & v0_v31_mask) == v0_v31_mask) { // Finally, check for round robin case where sequence of last register @@ -255,48 +255,48 @@ SingleTypeRegSet LinearScan::filterConsecutiveCandidates(SingleTypeRegSet float { if ((floatCandidates & v0_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V31; + consecutiveResult |= SRBM_V31; overallResult |= v0_v31_mask; } break; } case 3: { - SingleTypeRegSet v0_v30_v31_mask = RBM_V0 | RBM_V30 | RBM_V31; + SingleTypeRegSet v0_v30_v31_mask = SRBM_V0 | SRBM_V30 | SRBM_V31; if ((floatCandidates & v0_v30_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V30; + consecutiveResult |= SRBM_V30; overallResult |= v0_v30_v31_mask; } - SingleTypeRegSet v0_v1_v31_mask = RBM_V0 | RBM_V1 | RBM_V31; + SingleTypeRegSet v0_v1_v31_mask = SRBM_V0 | SRBM_V1 | SRBM_V31; if ((floatCandidates & v0_v1_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V31; + consecutiveResult |= SRBM_V31; overallResult |= v0_v1_v31_mask; } break; } case 4: { - SingleTypeRegSet v0_v29_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + SingleTypeRegSet v0_v29_v30_v31_mask = SRBM_V0 | SRBM_V29 | SRBM_V30 | SRBM_V31; if ((floatCandidates & v0_v29_v30_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V29; + consecutiveResult |= SRBM_V29; overallResult |= v0_v29_v30_v31_mask; } - SingleTypeRegSet v0_v1_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + SingleTypeRegSet v0_v1_v30_v31_mask = SRBM_V0 | SRBM_V29 | SRBM_V30 | SRBM_V31; if ((floatCandidates & v0_v1_v30_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V30; + consecutiveResult |= SRBM_V30; overallResult |= v0_v1_v30_v31_mask; } - SingleTypeRegSet v0_v1_v2_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + SingleTypeRegSet v0_v1_v2_v31_mask = SRBM_V0 | SRBM_V29 | SRBM_V30 | SRBM_V31; if ((floatCandidates & v0_v1_v2_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V31; + consecutiveResult |= SRBM_V31; overallResult |= v0_v1_v2_v31_mask; } break; @@ -429,8 +429,8 @@ SingleTypeRegSet LinearScan::getConsecutiveCandidates(SingleTypeRegSet allCandi if (getStressLimitRegs() != LSRA_LIMIT_NONE) { // For stress, make only alternate registers available so we can stress the selection of free/busy registers. - floatFreeCandidates &= (RBM_V0 | RBM_V2 | RBM_V4 | RBM_V6 | RBM_V8 | RBM_V10 | RBM_V12 | RBM_V14 | RBM_V16 | - RBM_V18 | RBM_V20 | RBM_V22 | RBM_V24 | RBM_V26 | RBM_V28 | RBM_V30); + floatFreeCandidates &= SRBM_V0 | SRBM_V2 | SRBM_V4 | SRBM_V6 | SRBM_V8 | SRBM_V10 | SRBM_V12 | SRBM_V14 | + SRBM_V16 | SRBM_V18 | SRBM_V20 | SRBM_V22 | SRBM_V24 | SRBM_V26 | SRBM_V28 | SRBM_V30; } #endif @@ -744,7 +744,7 @@ int LinearScan::BuildNode(GenTree* tree) #ifdef SWIFT_SUPPORT case GT_SWIFT_ERROR_RET: - BuildUse(tree->gtGetOp1(), RBM_SWIFT_ERROR); + BuildUse(tree->gtGetOp1(), RBM_SWIFT_ERROR.GetIntRegSet()); // Plus one for error register srcCount = BuildReturn(tree) + 1; killMask = getKillSetForReturn(); @@ -762,7 +762,7 @@ int LinearScan::BuildNode(GenTree* tree) { assert(tree->TypeGet() == TYP_INT); srcCount = 1; - BuildUse(tree->gtGetOp1(), RBM_INTRET); + BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } break; @@ -1266,7 +1266,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CATCH_ARG: srcCount = 0; assert(dstCount == 1); - BuildDef(tree, RBM_EXCEPTION_OBJECT); + BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; case GT_INDEX_ADDR: @@ -1302,7 +1302,7 @@ int LinearScan::BuildNode(GenTree* tree) // and we know REG_SWIFT_ERROR should be busy up to this point, anyway. // By forcing LSRA to use REG_SWIFT_ERROR as both the source and destination register, // we can ensure the redundant move is elided. - BuildDef(tree, RBM_SWIFT_ERROR); + BuildDef(tree, RBM_SWIFT_ERROR.GetIntRegSet()); break; #endif // SWIFT_SUPPORT @@ -1583,7 +1583,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else if (HWIntrinsicInfo::IsMaskedOperation(intrin.id)) { - SingleTypeRegSet predMask = RBM_ALLMASK; + SingleTypeRegSet predMask = RBM_ALLMASK.GetPredicateRegSet(); if (intrin.id == NI_Sve_ConditionalSelect) { // If this is conditional select, make sure to check the embedded @@ -1597,13 +1597,13 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou const HWIntrinsic intrinEmb(embOp2Node); if (HWIntrinsicInfo::IsLowMaskedOperation(intrinEmb.id)) { - predMask = RBM_LOWMASK; + predMask = RBM_LOWMASK.GetPredicateRegSet(); } } } else if (HWIntrinsicInfo::IsLowMaskedOperation(intrin.id)) { - predMask = RBM_LOWMASK; + predMask = RBM_LOWMASK.GetPredicateRegSet(); } srcCount += BuildOperandUses(intrin.op1, predMask); @@ -1639,12 +1639,13 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (isRMW) { srcCount += BuildDelayFreeUses(intrin.op2, nullptr); - srcCount += BuildDelayFreeUses(intrin.op3, nullptr, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS); + srcCount += + BuildDelayFreeUses(intrin.op3, nullptr, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS.GetFloatRegSet()); } else { srcCount += BuildOperandUses(intrin.op2); - srcCount += BuildOperandUses(intrin.op3, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS); + srcCount += BuildOperandUses(intrin.op3, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS.GetFloatRegSet()); } if (intrin.op4 != nullptr) @@ -1659,7 +1660,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { assert(!isRMW); - srcCount += BuildOperandUses(intrin.op2, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS); + srcCount += BuildOperandUses(intrin.op2, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS.GetFloatRegSet()); if (intrin.op3 != nullptr) { @@ -1976,7 +1977,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (intrin.op2->gtType == TYP_MASK) { assert(lowVectorOperandNum != 2); - candidates = RBM_ALLMASK; + candidates = RBM_ALLMASK.GetPredicateRegSet(); } if (forceOp2DelayFree) @@ -2309,12 +2310,12 @@ void LinearScan::getLowVectorOperandAndCandidates(HWIntrinsic intrin, size_t* op if (baseElementSize == 8) { - *candidates = RBM_SVE_INDEXED_D_ELEMENT_ALLOWED_REGS; + *candidates = RBM_SVE_INDEXED_D_ELEMENT_ALLOWED_REGS.GetFloatRegSet(); } else { assert(baseElementSize == 4); - *candidates = RBM_SVE_INDEXED_S_ELEMENT_ALLOWED_REGS; + *candidates = RBM_SVE_INDEXED_S_ELEMENT_ALLOWED_REGS.GetFloatRegSet(); } switch (intrin.id) diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 323dea8d4809a9..a99ef06cc3e578 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -171,7 +171,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // Fast tail call - make sure that call target is always computed in volatile registers // that will not be overridden by epilog sequence. - ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH & ~RBM_LR; + ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH.GetIntRegSet() & ~SRBM_LR; if (compiler->getNeedsGSSecurityCookie()) { ctrlExprCandidates &= @@ -186,7 +186,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // For R2R and VSD we have stub address in REG_R2R_INDIRECT_PARAM // and will load call address into the temp register from this register. - SingleTypeRegSet candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + SingleTypeRegSet candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH.getLow(); assert(candidates != RBM_NONE); buildInternalIntRegisterDefForNode(call, candidates); } @@ -214,7 +214,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // the target. We do not handle these constraints on the same // refposition too well so we help ourselves a bit here by forcing the // null check with LR. - SingleTypeRegSet candidates = call->IsFastTailCall() ? RBM_LR : RBM_NONE; + SingleTypeRegSet candidates = call->IsFastTailCall() ? SRBM_LR : RBM_NONE; buildInternalIntRegisterDefForNode(call, candidates); } #endif // TARGET_ARM @@ -228,7 +228,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // The ARM CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers. - singleDstCandidates = RBM_PINVOKE_TCB; + singleDstCandidates = RBM_PINVOKE_TCB.GetIntRegSet(); } else #endif // TARGET_ARM @@ -236,15 +236,15 @@ int LinearScan::BuildCall(GenTreeCall* call) { if (varTypeUsesFloatArgReg(registerType)) { - singleDstCandidates = RBM_FLOATRET; + singleDstCandidates = RBM_FLOATRET.GetFloatRegSet(); } else if (registerType == TYP_LONG) { - singleDstCandidates = RBM_LNGRET; + singleDstCandidates = RBM_LNGRET.GetIntRegSet(); } else { - singleDstCandidates = RBM_INTRET; + singleDstCandidates = RBM_INTRET.GetIntRegSet(); } } @@ -370,7 +370,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // Don't assign the call target to any of the argument registers because // we will use them to also pass floating point arguments as required // by Arm64 ABI. - ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS); + ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS.GetIntRegSet()); } if (ctrlExpr != nullptr) @@ -698,7 +698,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // a temporary register to perform the sequence of loads and stores. // We can't use the special Write Barrier registers, so exclude them from the mask SingleTypeRegSet internalIntCandidates = - allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); + allRegs(TYP_INT) & + ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF).GetRegSetForType(IntRegisterType); buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); if (size >= 2 * REGSIZE_BYTES) @@ -716,7 +717,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) } // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. - dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF; + dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF.GetIntRegSet(); // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, @@ -724,7 +725,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (srcAddrOrFill != nullptr) { assert(!srcAddrOrFill->isContained()); - srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF; + srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF.GetIntRegSet(); } } break; @@ -881,7 +882,7 @@ int LinearScan::BuildCast(GenTreeCast* cast) // Floating point to integer casts requires a temporary register. if (varTypeIsFloating(srcType) && !varTypeIsFloating(castType)) { - buildInternalFloatRegisterDefForNode(cast, RBM_ALLFLOAT); + buildInternalFloatRegisterDefForNode(cast, RBM_ALLFLOAT.GetFloatRegSet()); setInternalRegsDelayFree = true; } #endif diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index e130d9fc600cf6..d711922718d81a 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -626,7 +626,7 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, if (theInterval != nullptr && theInterval->isLocalVar && compiler->compMethodRequiresPInvokeFrame() && theInterval->varNum == compiler->genReturnLocal) { - mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME); + mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME).GetRegSetForType(theInterval->registerType); noway_assert(mask != RBM_NONE); } #endif // !TARGET_AMD64 @@ -710,6 +710,10 @@ void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask.getLow()); +#ifdef HAS_MORE_THAN_64_REGISTERS + pos->killRegisterAssignment = mask; +#endif + *killTail = pos; killTail = &pos->nextRefPosition; } @@ -853,21 +857,21 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) #if defined(TARGET_XARCH) #ifdef TARGET_AMD64 - killMask.RemoveRegsetForType(RBM_FLT_CALLEE_TRASH.getLow(), FloatRegisterType); - killMask.RemoveRegsetForType(RBM_MSK_CALLEE_TRASH.getLow(), MaskRegisterType); + killMask.RemoveRegsetForType(RBM_FLT_CALLEE_TRASH.GetFloatRegSet(), FloatRegisterType); + killMask.RemoveRegsetForType(RBM_MSK_CALLEE_TRASH.GetPredicateRegSet(), MaskRegisterType); #else - killMask.RemoveRegsetForType(RBM_FLT_CALLEE_TRASH, FloatRegisterType); + killMask.RemoveRegsetForType(RBM_FLT_CALLEE_TRASH.GetFloatRegSet(), FloatRegisterType); killMask &= ~RBM_MSK_CALLEE_TRASH; #endif // TARGET_AMD64 #else - killMask.RemoveRegsetForType(RBM_FLT_CALLEE_TRASH, FloatRegisterType); + killMask.RemoveRegsetForType(RBM_FLT_CALLEE_TRASH.GetFloatRegSet(), FloatRegisterType); #endif // TARGET_XARCH } #ifdef TARGET_ARM if (call->IsVirtualStub()) { - killMask.AddGprRegs(compiler->virtualStubParamInfo->GetRegMask()); + killMask.AddGprRegs(compiler->virtualStubParamInfo->GetRegMask().GetIntRegSet()); } #else // !TARGET_ARM // Verify that the special virtual stub call registers are in the kill mask. @@ -883,7 +887,7 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) // so don't use the register post-call until it is consumed by SwiftError. if (call->HasSwiftErrorHandling()) { - killMask.AddGprRegs(RBM_SWIFT_ERROR); + killMask.AddGprRegs(RBM_SWIFT_ERROR.GetIntRegSet()); } #endif // SWIFT_SUPPORT @@ -919,7 +923,7 @@ regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode) if (isCopyBlk) { // rep movs kills RCX, RDI and RSI - killMask.AddGprRegs(RBM_RCX | RBM_RDI | RBM_RSI); + killMask.AddGprRegs(SRBM_RCX | SRBM_RDI | SRBM_RSI); } else { @@ -927,7 +931,7 @@ regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode) // (Note that the Data() node, if not constant, will be assigned to // RCX, but it's find that this kills it, as the value is not available // after this node in any case.) - killMask.AddGprRegs(RBM_RDI | RBM_RCX); + killMask.AddGprRegs(SRBM_RDI | SRBM_RCX); } break; #endif @@ -1199,9 +1203,9 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo if (compiler->killGCRefs(tree)) { - RefPosition* pos = - newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree, (availableIntRegs & ~RBM_ARG_REGS)); - insertedKills = true; + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree, + (availableIntRegs & ~RBM_ARG_REGS.GetIntRegSet())); + insertedKills = true; } return insertedKills; @@ -1517,8 +1521,8 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, if (!varInterval->isPartiallySpilled) { Interval* upperVectorInterval = getUpperVectorInterval(varIndex); - RefPosition* pos = - newRefPosition(upperVectorInterval, currentLoc, RefTypeUpperVectorSave, tree, RBM_FLT_CALLEE_SAVED); + RefPosition* pos = newRefPosition(upperVectorInterval, currentLoc, RefTypeUpperVectorSave, tree, + RBM_FLT_CALLEE_SAVED.GetFloatRegSet()); varInterval->isPartiallySpilled = true; pos->skipSaveRestore = blockAlwaysReturn; pos->liveVarUpperSave = VarSetOps::IsMember(compiler, liveLargeVectors, varIndex); @@ -1575,7 +1579,7 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, if (listNode->ref->getInterval()->recentRefPosition->refType != RefTypeUpperVectorSave) { RefPosition* pos = newRefPosition(listNode->ref->getInterval(), currentLoc, RefTypeUpperVectorSave, - tree, RBM_FLT_CALLEE_SAVED); + tree, RBM_FLT_CALLEE_SAVED.GetFloatRegSet()); } } } @@ -2393,7 +2397,7 @@ void LinearScan::buildIntervals() // If there is a secret stub param, it is also live in if (compiler->info.compPublishStubParam) { - intRegState->rsCalleeRegArgMaskLiveIn.AddGprRegs(RBM_SECRET_STUB_PARAM); + intRegState->rsCalleeRegArgMaskLiveIn.AddGprRegs(RBM_SECRET_STUB_PARAM.GetIntRegSet()); LclVarDsc* stubParamDsc = compiler->lvaGetDesc(compiler->lvaStubArgumentVar); if (isCandidateVar(stubParamDsc)) @@ -3121,7 +3125,7 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, SingleTypeRegSet dstCandidates, { dstCandidates = availableIntRegs; } - dstCandidates &= ~RBM_NON_BYTE_REGS; + dstCandidates &= ~RBM_NON_BYTE_REGS.GetIntRegSet(); assert(dstCandidates != RBM_NONE); } #endif // TARGET_X86 @@ -4200,8 +4204,8 @@ int LinearScan::BuildReturn(GenTree* tree) assert((op1->OperGet() == GT_LONG) && op1->isContained()); GenTree* loVal = op1->gtGetOp1(); GenTree* hiVal = op1->gtGetOp2(); - BuildUse(loVal, RBM_LNGRET_LO); - BuildUse(hiVal, RBM_LNGRET_HI); + BuildUse(loVal, RBM_LNGRET_LO.GetIntRegSet()); + BuildUse(hiVal, RBM_LNGRET_HI.GetIntRegSet()); return 2; } else @@ -4214,7 +4218,7 @@ int LinearScan::BuildReturn(GenTree* tree) #ifdef TARGET_ARM64 if (varTypeIsSIMD(tree) && !op1->IsMultiRegLclVar()) { - BuildUse(op1, RBM_DOUBLERET); + BuildUse(op1, RBM_DOUBLERET.GetFloatRegSet()); return 1; } #endif // TARGET_ARM64 @@ -4300,21 +4304,25 @@ int LinearScan::BuildReturn(GenTree* tree) useCandidates = RBM_NONE; break; case TYP_FLOAT: +#ifdef TARGET_X86 useCandidates = RBM_FLOATRET; +#else + useCandidates = RBM_FLOATRET.GetFloatRegSet(); +#endif break; case TYP_DOUBLE: // We ONLY want the valid double register in the RBM_DOUBLERET mask. #ifdef TARGET_AMD64 useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE).GetFloatRegSet(); #else - useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE); + useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE).GetFloatRegSet(); #endif // TARGET_AMD64 break; case TYP_LONG: - useCandidates = RBM_LNGRET; + useCandidates = RBM_LNGRET.GetIntRegSet(); break; default: - useCandidates = RBM_INTRET; + useCandidates = RBM_INTRET.GetIntRegSet(); break; } BuildUse(op1, useCandidates); @@ -4504,8 +4512,8 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) // is an indir through an lea, we need to actually instantiate the // lea in a register assert(!addr->isContained() && !src->isContained()); - SingleTypeRegSet addrCandidates = RBM_WRITE_BARRIER_DST; - SingleTypeRegSet srcCandidates = RBM_WRITE_BARRIER_SRC; + SingleTypeRegSet addrCandidates = RBM_WRITE_BARRIER_DST.GetIntRegSet(); + SingleTypeRegSet srcCandidates = RBM_WRITE_BARRIER_SRC.GetIntRegSet(); #if defined(TARGET_X86) && NOGC_WRITE_BARRIERS @@ -4515,8 +4523,8 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) // Special write barrier: // op1 (addr) goes into REG_OPTIMIZED_WRITE_BARRIER_DST (rdx) and // op2 (src) goes into any int register. - addrCandidates = RBM_OPTIMIZED_WRITE_BARRIER_DST; - srcCandidates = RBM_OPTIMIZED_WRITE_BARRIER_SRC; + addrCandidates = RBM_OPTIMIZED_WRITE_BARRIER_DST.GetIntRegSet(); + srcCandidates = RBM_OPTIMIZED_WRITE_BARRIER_SRC.GetIntRegSet(); } #endif // defined(TARGET_X86) && NOGC_WRITE_BARRIERS diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index dd2805be4bcbe1..0d5c629b7843c6 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -183,7 +183,7 @@ int LinearScan::BuildNode(GenTree* tree) { assert(tree->TypeGet() == TYP_INT); srcCount = 1; - BuildUse(tree->gtGetOp1(), RBM_INTRET); + BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } break; @@ -560,7 +560,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CATCH_ARG: srcCount = 0; assert(dstCount == 1); - BuildDef(tree, RBM_EXCEPTION_OBJECT); + BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; case GT_INDEX_ADDR: @@ -734,7 +734,7 @@ int LinearScan::BuildCall(GenTreeCall* call) SingleTypeRegSet candidates = RBM_NONE; if (call->IsFastTailCall()) { - candidates = (allRegs(TYP_INT) & (RBM_INT_CALLEE_TRASH & ~RBM_GSCOOKIE_TMP)); + candidates = (allRegs(TYP_INT) & (RBM_INT_CALLEE_TRASH & ~RBM_GSCOOKIE_TMP).GetIntRegSet()); assert(candidates != RBM_NONE); } @@ -749,15 +749,15 @@ int LinearScan::BuildCall(GenTreeCall* call) { if (varTypeUsesFloatArgReg(registerType)) { - singleDstCandidates = RBM_FLOATRET; + singleDstCandidates = RBM_FLOATRET.GetFloatRegSet(); } else if (registerType == TYP_LONG) { - singleDstCandidates = RBM_LNGRET; + singleDstCandidates = RBM_LNGRET.GetIntRegSet(); } else { - singleDstCandidates = RBM_INTRET; + singleDstCandidates = RBM_INTRET.GetIntRegSet(); } } @@ -857,7 +857,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // Don't assign the call target to any of the argument registers because // we will use them to also pass floating point arguments as required // by LOONGARCH64 ABI. - ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS); + ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS.GetIntRegSet()); } if (ctrlExpr != nullptr) @@ -1141,7 +1141,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // a temporary register to perform the sequence of loads and stores. // We can't use the special Write Barrier registers, so exclude them from the mask SingleTypeRegSet internalIntCandidates = - allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); + allRegs(TYP_INT) & + ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF).GetRegSetForType(IntRegisterType); buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); if (size >= 2 * REGSIZE_BYTES) @@ -1152,7 +1153,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) } // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. - dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF; + dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF.GetIntRegSet(); // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, @@ -1160,7 +1161,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (srcAddrOrFill != nullptr) { assert(!srcAddrOrFill->isContained()); - srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF; + srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF.GetIntRegSet(); } } break; diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 2983c035fe8d1a..2ea90408f41342 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -184,7 +184,7 @@ int LinearScan::BuildNode(GenTree* tree) { assert(tree->TypeGet() == TYP_INT); srcCount = 1; - BuildUse(tree->gtGetOp1(), RBM_INTRET); + BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } break; @@ -702,7 +702,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CATCH_ARG: srcCount = 0; assert(dstCount == 1); - BuildDef(tree, RBM_EXCEPTION_OBJECT); + BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; case GT_INDEX_ADDR: @@ -882,7 +882,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // Fast tail call - make sure that call target is always computed in volatile registers // that will not be overridden by epilog sequence. - ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH.GetIntRegSet(); if (compiler->getNeedsGSSecurityCookie()) { ctrlExprCandidates &= @@ -898,7 +898,7 @@ int LinearScan::BuildCall(GenTreeCall* call) SingleTypeRegSet candidates = RBM_NONE; if (call->IsFastTailCall()) { - candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH.GetIntRegSet(); assert(candidates != RBM_NONE); } @@ -913,15 +913,15 @@ int LinearScan::BuildCall(GenTreeCall* call) { if (varTypeUsesFloatArgReg(registerType)) { - singleDstCandidates = RBM_FLOATRET; + singleDstCandidates = RBM_FLOATRET.GetFloatRegSet(); } else if (registerType == TYP_LONG) { - singleDstCandidates = RBM_LNGRET; + singleDstCandidates = RBM_LNGRET.GetIntRegSet(); } else { - singleDstCandidates = RBM_INTRET; + singleDstCandidates = RBM_INTRET.GetIntRegSet(); } } @@ -1021,7 +1021,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // Don't assign the call target to any of the argument registers because // we will use them to also pass floating point arguments as required // by RISCV64 ABI. - ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS); + ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS.GetIntRegSet()); } if (ctrlExpr != nullptr) @@ -1298,7 +1298,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // a temporary register to perform the sequence of loads and stores. // We can't use the special Write Barrier registers, so exclude them from the mask SingleTypeRegSet internalIntCandidates = - allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); + allRegs(TYP_INT) & + ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF).GetRegSetForType(IntRegisterType); buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); if (size >= 2 * REGSIZE_BYTES) @@ -1309,7 +1310,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) } // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. - dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF; + dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF.GetIntRegSet(); // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, @@ -1317,7 +1318,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (srcAddrOrFill != nullptr) { assert(!srcAddrOrFill->isContained()); - srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF; + srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF.GetIntRegSet(); } } break; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index d538f43ed3b83e..1bec0beebe6b69 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -195,7 +195,7 @@ int LinearScan::BuildNode(GenTree* tree) #ifdef SWIFT_SUPPORT case GT_SWIFT_ERROR_RET: - BuildUse(tree->gtGetOp1(), RBM_SWIFT_ERROR); + BuildUse(tree->gtGetOp1(), RBM_SWIFT_ERROR.GetIntRegSet()); // Plus one for error register srcCount = BuildReturn(tree) + 1; killMask = getKillSetForReturn(); @@ -213,7 +213,7 @@ int LinearScan::BuildNode(GenTree* tree) { assert(tree->TypeGet() == TYP_INT); srcCount = 1; - BuildUse(tree->gtGetOp1(), RBM_INTRET); + BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } break; @@ -442,11 +442,11 @@ int LinearScan::BuildNode(GenTree* tree) // Comparand is preferenced to RAX. // The remaining two operands can be in any reg other than RAX. - const SingleTypeRegSet nonRaxCandidates = availableIntRegs & ~RBM_RAX; + SingleTypeRegSet nonRaxCandidates = availableIntRegs & ~SRBM_RAX; BuildUse(addr, nonRaxCandidates); - BuildUse(data, varTypeIsByte(tree) ? (nonRaxCandidates & RBM_BYTE_REGS) : nonRaxCandidates); - BuildUse(comparand, RBM_RAX); - BuildDef(tree, RBM_RAX); + BuildUse(data, varTypeIsByte(tree) ? (nonRaxCandidates & RBM_BYTE_REGS.GetIntRegSet()) : nonRaxCandidates); + BuildUse(comparand, SRBM_RAX); + BuildDef(tree, SRBM_RAX); } break; @@ -461,10 +461,10 @@ int LinearScan::BuildNode(GenTree* tree) assert(!varTypeIsByte(data)); // if tree's value is used, we'll emit a cmpxchg-loop idiom (requires RAX) - buildInternalIntRegisterDefForNode(tree, availableIntRegs & ~RBM_RAX); - BuildUse(addr, availableIntRegs & ~RBM_RAX); - BuildUse(data, availableIntRegs & ~RBM_RAX); - BuildDef(tree, RBM_RAX); + buildInternalIntRegisterDefForNode(tree, availableIntRegs & ~SRBM_RAX); + BuildUse(addr, availableIntRegs & ~SRBM_RAX); + BuildUse(data, availableIntRegs & ~SRBM_RAX); + BuildDef(tree, SRBM_RAX); buildInternalRegisterUses(); srcCount = 2; assert(dstCount == 1); @@ -485,7 +485,7 @@ int LinearScan::BuildNode(GenTree* tree) setDelayFree(addrUse); tgtPrefUse = addrUse; assert(!data->isContained()); - BuildUse(data, varTypeIsByte(tree) ? RBM_BYTE_REGS : RBM_NONE); + BuildUse(data, varTypeIsByte(tree) ? RBM_BYTE_REGS.GetIntRegSet() : RBM_NONE); srcCount = 2; assert(dstCount == 1); BuildDef(tree); @@ -577,7 +577,7 @@ int LinearScan::BuildNode(GenTree* tree) if (varTypeIsByte(tree)) { // on X86 we have to use byte-able regs for byte-wide loads - BuildUse(tree->gtGetOp1(), RBM_BYTE_REGS); + BuildUse(tree->gtGetOp1(), RBM_BYTE_REGS.GetIntRegSet()); srcCount = 1; break; } @@ -597,7 +597,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CATCH_ARG: srcCount = 0; assert(dstCount == 1); - BuildDef(tree, RBM_EXCEPTION_OBJECT); + BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; #if defined(FEATURE_EH_WINDOWS_X86) @@ -652,7 +652,7 @@ int LinearScan::BuildNode(GenTree* tree) // and we know REG_SWIFT_ERROR should be busy up to this point, anyway. // By forcing LSRA to use REG_SWIFT_ERROR as both the source and destination register, // we can ensure the redundant move is elided. - BuildDef(tree, RBM_SWIFT_ERROR); + BuildDef(tree, RBM_SWIFT_ERROR.GetIntRegSet()); break; #endif // SWIFT_SUPPORT @@ -1065,8 +1065,8 @@ int LinearScan::BuildShiftRotate(GenTree* tree) #endif else { - srcCandidates = availableIntRegs & ~RBM_RCX; - dstCandidates = availableIntRegs & ~RBM_RCX; + srcCandidates = availableIntRegs & ~SRBM_RCX; + dstCandidates = availableIntRegs & ~SRBM_RCX; } // Note that Rotate Left/Right instructions don't set ZF and SF flags. @@ -1120,8 +1120,8 @@ int LinearScan::BuildShiftRotate(GenTree* tree) { if (!shiftBy->isContained()) { - srcCount += BuildDelayFreeUses(shiftBy, source, RBM_RCX); - buildKillPositionsForNode(tree, currentLoc + 1, RBM_RCX); + srcCount += BuildDelayFreeUses(shiftBy, source, SRBM_RCX); + buildKillPositionsForNode(tree, currentLoc + 1, SRBM_RCX); } BuildDef(tree, dstCandidates); } @@ -1129,8 +1129,8 @@ int LinearScan::BuildShiftRotate(GenTree* tree) { if (!shiftBy->isContained()) { - srcCount += BuildOperandUses(shiftBy, RBM_RCX); - buildKillPositionsForNode(tree, currentLoc + 1, RBM_RCX); + srcCount += BuildOperandUses(shiftBy, SRBM_RCX); + buildKillPositionsForNode(tree, currentLoc + 1, SRBM_RCX); } } return srcCount; @@ -1185,7 +1185,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the // correct argument registers. - singleDstCandidates = RBM_PINVOKE_TCB; + singleDstCandidates = RBM_PINVOKE_TCB.GetIntRegSet(); } else #endif // TARGET_X86 @@ -1197,7 +1197,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // The return value will be on the X87 stack, and we will need to move it. singleDstCandidates = allRegs(registerType); #else // !TARGET_X86 - singleDstCandidates = RBM_FLOATRET; + singleDstCandidates = RBM_FLOATRET.GetFloatRegSet(); #endif // !TARGET_X86 } else @@ -1206,11 +1206,11 @@ int LinearScan::BuildCall(GenTreeCall* call) if (registerType == TYP_LONG) { - singleDstCandidates = RBM_LNGRET; + singleDstCandidates = RBM_LNGRET.GetIntRegSet(); } else { - singleDstCandidates = RBM_INTRET; + singleDstCandidates = RBM_INTRET.GetIntRegSet(); } } } @@ -1329,7 +1329,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // Fast tail call - make sure that call target is always computed in volatile registers // that will not be restored in the epilog sequence. - ctrlExprCandidates = RBM_INT_CALLEE_TRASH; + ctrlExprCandidates = RBM_INT_CALLEE_TRASH.GetIntRegSet(); } #ifdef TARGET_X86 else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT)) @@ -1342,7 +1342,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // Where EAX is also used as an argument to the stub dispatch helper. Make // sure that the call target address is computed into EAX in this case. assert(ctrlExpr->isIndir() && ctrlExpr->isContained()); - ctrlExprCandidates = RBM_VIRTUAL_STUB_TARGET; + ctrlExprCandidates = RBM_VIRTUAL_STUB_TARGET.GetIntRegSet(); } #endif // TARGET_X86 @@ -1353,7 +1353,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // Don't assign the call target to any of the argument registers because // we will use them to also pass floating point arguments as required // by Amd64 ABI. - ctrlExprCandidates = availableIntRegs & ~(RBM_ARG_REGS); + ctrlExprCandidates = availableIntRegs & ~(RBM_ARG_REGS.GetIntRegSet()); } srcCount += BuildOperandUses(ctrlExpr, ctrlExprCandidates); } @@ -1487,9 +1487,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) break; case GenTreeBlk::BlkOpKindRepInstr: - dstAddrRegMask = RBM_RDI; - srcRegMask = RBM_RAX; - sizeRegMask = RBM_RCX; + dstAddrRegMask = SRBM_RDI; + srcRegMask = SRBM_RAX; + sizeRegMask = SRBM_RCX; break; case GenTreeBlk::BlkOpKindLoop: @@ -1513,13 +1513,13 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) { case GenTreeBlk::BlkOpKindCpObjRepInstr: // We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq. - sizeRegMask = RBM_RCX; + sizeRegMask = SRBM_RCX; FALLTHROUGH; case GenTreeBlk::BlkOpKindCpObjUnroll: // The srcAddr must be in a register. If it was under a GT_IND, we need to subsume all of its sources. - dstAddrRegMask = RBM_RDI; - srcRegMask = RBM_RSI; + dstAddrRegMask = SRBM_RDI; + srcRegMask = SRBM_RSI; break; case GenTreeBlk::BlkOpKindUnroll: @@ -1612,9 +1612,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) break; case GenTreeBlk::BlkOpKindRepInstr: - dstAddrRegMask = RBM_RDI; - srcRegMask = RBM_RSI; - sizeRegMask = RBM_RCX; + dstAddrRegMask = SRBM_RDI; + srcRegMask = SRBM_RSI; + sizeRegMask = SRBM_RCX; break; default: @@ -1676,7 +1676,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (internalIsByte && (useCount >= BYTE_REG_COUNT)) { noway_assert(internalIntDef != nullptr); - internalIntDef->registerAssignment = RBM_RAX; + internalIntDef->registerAssignment = SRBM_RAX; } #endif @@ -1834,9 +1834,9 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk) #ifndef TARGET_X86 case GenTreePutArgStk::Kind::PartialRepInstr: #endif - buildInternalIntRegisterDefForNode(putArgStk, RBM_RDI); - buildInternalIntRegisterDefForNode(putArgStk, RBM_RCX); - buildInternalIntRegisterDefForNode(putArgStk, RBM_RSI); + buildInternalIntRegisterDefForNode(putArgStk, SRBM_RDI); + buildInternalIntRegisterDefForNode(putArgStk, SRBM_RCX); + buildInternalIntRegisterDefForNode(putArgStk, SRBM_RSI); break; #ifdef TARGET_X86 @@ -1931,13 +1931,13 @@ int LinearScan::BuildModDiv(GenTree* tree) { // We are interested in just the remainder. // RAX is used as a trashable register during computation of remainder. - dstCandidates = RBM_RDX; + dstCandidates = SRBM_RDX; } else { // We are interested in just the quotient. // RDX gets used as trashable register during computation of quotient - dstCandidates = RBM_RAX; + dstCandidates = SRBM_RAX; } #ifdef TARGET_X86 @@ -1956,20 +1956,20 @@ int LinearScan::BuildModDiv(GenTree* tree) // This situation also requires an internal register. buildInternalIntRegisterDefForNode(tree); - BuildUse(loVal, RBM_EAX); - BuildUse(hiVal, RBM_EDX); + BuildUse(loVal, SRBM_EAX); + BuildUse(hiVal, SRBM_EDX); srcCount = 2; } else #endif { // If possible would like to have op1 in RAX to avoid a register move. - RefPosition* op1Use = BuildUse(op1, RBM_EAX); + RefPosition* op1Use = BuildUse(op1, SRBM_EAX); tgtPrefUse = op1Use; srcCount = 1; } - srcCount += BuildDelayFreeUses(op2, op1, availableIntRegs & ~(RBM_RAX | RBM_RDX)); + srcCount += BuildDelayFreeUses(op2, op1, availableIntRegs & ~(SRBM_RAX | SRBM_RDX)); buildInternalRegisterUses(); @@ -2312,7 +2312,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // MaskMove hardcodes the destination (op3) in DI/EDI/RDI srcCount += BuildOperandUses(op1, BuildEvexIncompatibleMask(op1)); srcCount += BuildOperandUses(op2, BuildEvexIncompatibleMask(op2)); - srcCount += BuildOperandUses(op3, RBM_EDI); + srcCount += BuildOperandUses(op3, SRBM_EDI); buildUses = false; break; @@ -2332,7 +2332,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou srcCount += 1; srcCount += op2->isContained() ? BuildOperandUses(op2, BuildEvexIncompatibleMask(op2)) : BuildDelayFreeUses(op2, op1, BuildEvexIncompatibleMask(op2)); - srcCount += BuildDelayFreeUses(op3, op1, RBM_XMM0); + srcCount += BuildDelayFreeUses(op3, op1, SRBM_XMM0); buildUses = false; } @@ -2381,8 +2381,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(isRMW); // DIV implicitly put op1(lower) to EAX and op2(upper) to EDX - srcCount += BuildOperandUses(op1, RBM_EAX); - srcCount += BuildOperandUses(op2, RBM_EDX); + srcCount += BuildOperandUses(op1, SRBM_EAX); + srcCount += BuildOperandUses(op2, SRBM_EDX); if (!op3->isContained()) { @@ -2405,8 +2405,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } // result put in EAX and EDX - BuildDef(intrinsicTree, RBM_EAX, 0); - BuildDef(intrinsicTree, RBM_EDX, 1); + BuildDef(intrinsicTree, SRBM_EAX, 0); + BuildDef(intrinsicTree, SRBM_EDX, 1); buildUses = false; break; @@ -2416,7 +2416,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_BMI2_X64_MultiplyNoFlags: { assert(numArgs == 2 || numArgs == 3); - srcCount += BuildOperandUses(op1, RBM_EDX); + srcCount += BuildOperandUses(op1, SRBM_EDX); srcCount += BuildOperandUses(op2); if (numArgs == 3) { @@ -2988,7 +2988,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) } if ((nonMemSource != nullptr) && !nonMemSource->isContained() && varTypeIsByte(indirTree)) { - srcCandidates = RBM_BYTE_REGS; + srcCandidates = RBM_BYTE_REGS.GetIntRegSet(); } if (otherIndir != nullptr) { @@ -3098,19 +3098,19 @@ int LinearScan::BuildMul(GenTree* tree) // Here we set RAX as the only destination candidate // In LSRA we set the kill set for this operation to RBM_RAX|RBM_RDX // - dstCandidates = RBM_RAX; + dstCandidates = SRBM_RAX; } else if (tree->OperGet() == GT_MULHI) { // Have to use the encoding:RDX:RAX = RAX * rm. Since we only care about the // upper 32 bits of the result set the destination candidate to REG_RDX. - dstCandidates = RBM_RDX; + dstCandidates = SRBM_RDX; } #if defined(TARGET_X86) else if (tree->OperGet() == GT_MUL_LONG) { // have to use the encoding:RDX:RAX = RAX * rm - dstCandidates = RBM_RAX | RBM_RDX; + dstCandidates = SRBM_RAX | SRBM_RDX; dstCount = 2; } #endif diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index a033e49fcad1fd..9589527c5cac65 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -950,11 +950,11 @@ regNumber genRegArgNext(regNumber argReg) * are encoded in GC information at call sites. */ -const regMaskSmall raRbmCalleeSaveOrder[] = {RBM_CALL_GC_REGS_ORDER}; +const regMaskTP raRbmCalleeSaveOrder[] = {RBM_CALL_GC_REGS_ORDER}; -regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask) +regMaskTP genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask) { - regMaskSmall res = 0; + regMaskTP res = 0; for (int i = 0; i < CNT_CALL_GC_REGS; i++) { if ((calleeSaveMask & (1 << i)) != 0) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index d04f93acafaa43..d5073aa9ce7f4e 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -121,8 +121,8 @@ enum _regNumber_enum : unsigned enum _regMask_enum : uint64_t { RBM_NONE = 0, -#define REGDEF(name, rnum, mask, sname) RBM_##name = mask, -#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, +#define REGDEF(name, rnum, mask, sname) SRBM_##name = mask, +#define REGALIAS(alias, realname) SRBM_##alias = SRBM_##realname, #include "register.h" }; @@ -142,8 +142,8 @@ enum _regNumber_enum : unsigned enum _regMask_enum : uint64_t { RBM_NONE = 0, -#define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask, -#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, +#define REGDEF(name, rnum, mask, xname, wname) SRBM_##name = mask, +#define REGALIAS(alias, realname) SRBM_##alias = SRBM_##realname, #include "register.h" }; @@ -164,8 +164,8 @@ enum _regMask_enum : uint64_t { RBM_NONE = 0, -#define REGDEF(name, rnum, mask, sname) RBM_##name = mask, -#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, +#define REGDEF(name, rnum, mask, sname) SRBM_##name = mask, +#define REGALIAS(alias, realname) SRBM_##alias = SRBM_##realname, #include "register.h" }; @@ -186,8 +186,8 @@ enum _regMask_enum : unsigned { RBM_NONE = 0, -#define REGDEF(name, rnum, mask, sname) RBM_##name = mask, -#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, +#define REGDEF(name, rnum, mask, sname) SRBM_##name = mask, +#define REGALIAS(alias, realname) SRBM_##alias = SRBM_##realname, #include "register.h" }; @@ -233,12 +233,6 @@ typedef uint64_t regMaskSmall; // #define HAS_MORE_THAN_64_REGISTERS 1 #endif // TARGET_ARM64 -#ifdef HAS_MORE_THAN_64_REGISTERS -#define MORE_THAN_64_REGISTERS_ARG(x) , x -#else -#define MORE_THAN_64_REGISTERS_ARG(x) -#endif - // TODO: Rename regMaskSmall as RegSet64 (at least for 64-bit) typedef regMaskSmall SingleTypeRegSet; inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg); @@ -260,18 +254,33 @@ struct regMaskTP void RemoveRegNumFromMask(regNumber reg, var_types type); bool IsRegNumInMask(regNumber reg, var_types type) const; #endif - void AddGprRegs(SingleTypeRegSet gprRegs); - void AddRegNum(regNumber reg, var_types type); - void AddRegNumInMask(regNumber reg); - void AddRegsetForType(SingleTypeRegSet regsToAdd, var_types type); - SingleTypeRegSet GetRegSetForType(var_types type) const; - bool IsRegNumInMask(regNumber reg) const; - bool IsRegNumPresent(regNumber reg, var_types type) const; - void RemoveRegNum(regNumber reg, var_types type); - void RemoveRegNumFromMask(regNumber reg); - void RemoveRegsetForType(SingleTypeRegSet regsToRemove, var_types type); - - regMaskTP(regMaskSmall lowMask, regMaskSmall highMask) + void AddGprRegs(SingleTypeRegSet gprRegs); + void AddRegNum(regNumber reg, var_types type); + void AddRegNumInMask(regNumber reg); + void AddRegsetForType(SingleTypeRegSet regsToAdd, var_types type); + SingleTypeRegSet GetRegSetForType(var_types type) const; + bool IsRegNumInMask(regNumber reg) const; + bool IsRegNumPresent(regNumber reg, var_types type) const; + void RemoveRegNum(regNumber reg, var_types type); + void RemoveRegNumFromMask(regNumber reg); + void RemoveRegsetForType(SingleTypeRegSet regsToRemove, var_types type); + static constexpr regMaskTP CreateFromRegNum(regNumber reg, regMaskSmall mask) + { +#ifdef HAS_MORE_THAN_64_REGISTERS + if (reg < 64) + { + return regMaskTP(mask, RBM_NONE); + } + else + { + return regMaskTP(RBM_NONE, mask); + } +#else + return regMaskTP(mask, RBM_NONE); +#endif + } + + constexpr regMaskTP(regMaskSmall lowMask, regMaskSmall highMask) : low(lowMask) #ifdef HAS_MORE_THAN_64_REGISTERS , high(highMask) @@ -279,7 +288,7 @@ struct regMaskTP { } - regMaskTP(regMaskSmall regMask) + constexpr regMaskTP(regMaskSmall regMask) : low(regMask) #ifdef HAS_MORE_THAN_64_REGISTERS , high(RBM_NONE) @@ -326,13 +335,13 @@ struct regMaskTP } #endif - regMaskSmall getLow() const + constexpr regMaskSmall getLow() const { return low; } #ifdef HAS_MORE_THAN_64_REGISTERS - regMaskSmall getHigh() const + constexpr regMaskSmall getHigh() const { return high; } @@ -396,25 +405,70 @@ struct regMaskTP } }; +#if defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) + +#define REGDEF(name, rnum, mask, sname) \ + static constexpr regMaskTP RBM_##name = \ + regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); +#define REGALIAS(alias, realname) static constexpr regMaskTP RBM_##alias = RBM_##realname; +#include "register.h" + +#elif defined(TARGET_ARM64) + +#define REGDEF(name, rnum, mask, xname, wname) \ + static constexpr regMaskTP RBM_##name = \ + regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); +#define REGALIAS(alias, realname) static constexpr regMaskTP RBM_##alias = RBM_##realname; +#include "register.h" + +#elif defined(TARGET_AMD64) + +#define REGDEF(name, rnum, mask, sname) \ + static constexpr regMaskTP RBM_##name = \ + regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); +#define REGALIAS(alias, realname) static constexpr regMaskTP RBM_##alias = RBM_##realname; +#include "register.h" + +#elif defined(TARGET_X86) + +#define REGDEF(name, rnum, mask, sname) \ + static constexpr regMaskTP RBM_##name = \ + regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); +#define REGALIAS(alias, realname) static constexpr regMaskTP RBM_##alias = RBM_##realname; +#include "register.h" + +#else +#error Unsupported target architecture +#endif + static regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) { - regMaskTP result(first.getLow() ^ second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() ^ second.getHigh())); - return result; +#ifdef HAS_MORE_THAN_64_REGISTERS + return regMaskTP(first.getLow() ^ second.getLow(), first.getHigh() ^ second.getHigh()); +#else + return regMaskTP(first.getLow() ^ second.getLow()); +#endif } -static regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) +static constexpr regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) { - regMaskTP result(first.getLow() & second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() & second.getHigh())); - return result; +#ifdef HAS_MORE_THAN_64_REGISTERS + return regMaskTP(first.getLow() & second.getLow(), first.getHigh() & second.getHigh()); +#else + return regMaskTP(first.getLow() & second.getLow()); +#endif } -static regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) +static constexpr regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) { - regMaskTP result(first.getLow() | second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() | second.getHigh())); - return result; +#ifdef HAS_MORE_THAN_64_REGISTERS + return regMaskTP(first.getLow() | second.getLow(), first.getHigh() | second.getHigh()); +#else + return regMaskTP(first.getLow() | second.getLow()); +#endif } -static bool operator==(const regMaskTP& first, const regMaskTP& second) +static constexpr bool operator==(const regMaskTP& first, const regMaskTP& second) { return (first.getLow() == second.getLow()) #ifdef HAS_MORE_THAN_64_REGISTERS @@ -423,7 +477,7 @@ static bool operator==(const regMaskTP& first, const regMaskTP& second) ; } -static bool operator!=(const regMaskTP& first, const regMaskTP& second) +static constexpr bool operator!=(const regMaskTP& first, const regMaskTP& second) { return !(first == second); } @@ -453,10 +507,9 @@ static regMaskTP& operator<<=(regMaskTP& first, const int b) } #endif -static regMaskTP operator>>(regMaskTP first, const int b) +static constexpr regMaskTP operator>>(regMaskTP first, const int b) { - regMaskTP result(first.getLow() >> b); - return result; + return regMaskTP(first.getLow() >> b); } static regMaskTP& operator>>=(regMaskTP& first, const int b) @@ -465,10 +518,13 @@ static regMaskTP& operator>>=(regMaskTP& first, const int b) return first; } -static regMaskTP operator~(const regMaskTP& first) +static constexpr regMaskTP operator~(const regMaskTP first) { - regMaskTP result(~first.getLow() MORE_THAN_64_REGISTERS_ARG(~first.getHigh())); - return result; +#ifdef HAS_MORE_THAN_64_REGISTERS + return regMaskTP(~first.getLow(), ~first.getHigh()); +#else + return regMaskTP(~first.getLow()); +#endif } static uint32_t PopCount(SingleTypeRegSet value) @@ -478,11 +534,11 @@ static uint32_t PopCount(SingleTypeRegSet value) static uint32_t PopCount(const regMaskTP& value) { - return BitOperations::PopCount(value.getLow()) + uint32_t result = BitOperations::PopCount(value.getLow()); #ifdef HAS_MORE_THAN_64_REGISTERS - + BitOperations::PopCount(value.getHigh()) + result += BitOperations::PopCount(value.getHigh()); #endif - ; + return result; } static uint32_t BitScanForward(SingleTypeRegSet value) @@ -722,7 +778,7 @@ inline regNumber theFixedRetBuffReg(CorInfoCallConvExtension callConv) // theFixedRetBuffMask: // Returns the regNumber to use for the fixed return buffer // -inline SingleTypeRegSet theFixedRetBuffMask(CorInfoCallConvExtension callConv) +inline regMaskTP theFixedRetBuffMask(CorInfoCallConvExtension callConv) { assert(hasFixedRetBuffReg(callConv)); // This predicate should be checked before calling this method #if defined(TARGET_ARM64) @@ -757,9 +813,9 @@ inline unsigned theFixedRetBuffArgNum(CorInfoCallConvExtension callConv) // Returns the full mask of all possible integer registers // Note this includes the fixed return buffer register on Arm64 // -inline SingleTypeRegSet fullIntArgRegMask(CorInfoCallConvExtension callConv) +inline regMaskTP fullIntArgRegMask(CorInfoCallConvExtension callConv) { - SingleTypeRegSet result = RBM_ARG_REGS; + regMaskTP result = RBM_ARG_REGS; if (hasFixedRetBuffReg(callConv)) { result |= theFixedRetBuffMask(callConv); @@ -1016,10 +1072,10 @@ inline SingleTypeRegSet getSingleTypeRegMask(regNumber reg, var_types regType) * These arrays list the callee-saved register numbers (and bitmaps, respectively) for * the current architecture. */ -extern const regMaskSmall raRbmCalleeSaveOrder[CNT_CALL_GC_REGS]; +extern const regMaskTP raRbmCalleeSaveOrder[CNT_CALL_GC_REGS]; // This method takes a "compact" bitset of the callee-saved registers, and "expands" it to a full register mask. -regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short); +regMaskTP genRegMaskFromCalleeSavedMask(unsigned short); /***************************************************************************** *