From 4f81f693a322abf6a44c99d64c704c9dd5547c5b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 6 Jun 2024 18:38:09 -0700 Subject: [PATCH 01/20] first cut --- src/coreclr/jit/compiler.h | 6 +- src/coreclr/jit/emit.cpp | 2 +- src/coreclr/jit/emitinl.h | 4 +- src/coreclr/jit/gcencode.cpp | 8 +-- src/coreclr/jit/lsra.cpp | 77 ++++++++++------------ src/coreclr/jit/lsraarm64.cpp | 61 ++++++++++-------- src/coreclr/jit/lsraarmarch.cpp | 21 +++--- src/coreclr/jit/lsrabuild.cpp | 29 +++++---- src/coreclr/jit/regset.cpp | 6 +- src/coreclr/jit/target.h | 110 +++++++++++++++++++++++++------- 10 files changed, 194 insertions(+), 130 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9281f55471ec78..b79b8042e3b604 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8317,14 +8317,14 @@ class Compiler return reg; } - _regMask_enum GetRegMask() const + regMaskTP GetRegMask() const { return regMask; } private: regNumber reg; - _regMask_enum regMask; + regMaskTP regMask; }; VirtualStubParamInfo* virtualStubParamInfo; @@ -9851,7 +9851,7 @@ class Compiler // On these platforms we assume the register that the target is // passed in is preserved by the validator and take care to get the // target from the register for the call (even in debug mode). - static_assert_no_msg((RBM_VALIDATE_INDIRECT_CALL_TRASH & (1 << REG_VALIDATE_INDIRECT_CALL_ADDR)) == 0); + //static_assert_no_msg((RBM_VALIDATE_INDIRECT_CALL_TRASH & (1 << REG_VALIDATE_INDIRECT_CALL_ADDR)) == 0); if (JitConfig.JitForceControlFlowGuard()) return true; diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 022e3aa492a0c0..9e68a57805e42a 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -10062,7 +10062,7 @@ void emitter::emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callIn // of callee-saved registers only). for (unsigned calleeSavedRegIdx = 0; calleeSavedRegIdx < CNT_CALL_GC_REGS; calleeSavedRegIdx++) { - regMaskSmall calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx]; + regMaskTP calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx]; if (emitThisGCrefRegs & calleeSavedRbm) { gcrefRegs |= (1 << calleeSavedRegIdx); diff --git a/src/coreclr/jit/emitinl.h b/src/coreclr/jit/emitinl.h index 66a33b813d58fa..2d8c1cde28226a 100644 --- a/src/coreclr/jit/emitinl.h +++ b/src/coreclr/jit/emitinl.h @@ -403,7 +403,7 @@ inline ssize_t emitter::emitGetInsAmdAny(const instrDesc* id) const /*static*/ inline unsigned emitter::emitDecodeCallGCregs(instrDesc* id) { - unsigned regmask = 0; + regMaskTP regmask = RBM_NONE; unsigned encodeMask; #ifdef TARGET_X86 @@ -568,7 +568,7 @@ inline ssize_t emitter::emitGetInsAmdAny(const instrDesc* id) const NYI("unknown target"); #endif - return regmask; + return (unsigned int)regmask.getLow(); } #ifdef TARGET_XARCH diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index 902029791f20c0..68592d175da340 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -4471,8 +4471,8 @@ void GCInfo::gcMakeRegPtrTable( assert(call->u1.cdArgMask == 0 && call->cdArgCnt == 0); // Other than that, we just have to deal with the regmasks. - regMaskSmall gcrefRegMask = call->cdGCrefRegs & RBM_CALL_GC_REGS; - regMaskSmall byrefRegMask = call->cdByrefRegs & RBM_CALL_GC_REGS; + regMaskSmall gcrefRegMask = call->cdGCrefRegs & RBM_CALL_GC_REGS.GetIntRegSet(); + regMaskSmall byrefRegMask = call->cdByrefRegs & RBM_CALL_GC_REGS.GetIntRegSet(); assert((gcrefRegMask & byrefRegMask) == 0); @@ -4558,9 +4558,9 @@ void GCInfo::gcMakeRegPtrTable( { // This is a true call site. - regMaskSmall gcrefRegMask = genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallGCrefRegs); + regMaskSmall gcrefRegMask = genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallGCrefRegs).GetIntRegSet(); - regMaskSmall byrefRegMask = genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallByrefRegs); + regMaskSmall byrefRegMask = genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallByrefRegs).GetIntRegSet(); assert((gcrefRegMask & byrefRegMask) == 0); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 39c9251c74b6eb..6273da615d2e81 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -449,11 +449,7 @@ SingleTypeRegSet LinearScan::internalFloatRegCandidates() } else { -#ifdef TARGET_AMD64 return RBM_FLT_CALLEE_TRASH.GetFloatRegSet(); -#else - return RBM_FLT_CALLEE_TRASH; -#endif // TARGET_AMD64 } } @@ -526,34 +522,34 @@ SingleTypeRegSet LinearScan::getConstrainedRegMask(RefPosition* refPosition, #if defined(TARGET_AMD64) #ifdef UNIX_AMD64_ABI // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers. -static const SingleTypeRegSet LsraLimitSmallIntSet = +static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13); #else // !UNIX_AMD64_ABI // On Windows Amd64 use the RDI and RSI as callee saved registers. -static const SingleTypeRegSet LsraLimitSmallIntSet = +static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI); #endif // !UNIX_AMD64_ABI -static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); -static const SingleTypeRegSet LsraLimitUpperSimdSet = +static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); +static const regMaskTP LsraLimitUpperSimdSet = (RBM_XMM16 | RBM_XMM17 | RBM_XMM18 | RBM_XMM19 | RBM_XMM20 | RBM_XMM21 | RBM_XMM22 | RBM_XMM23 | RBM_XMM24 | RBM_XMM25 | RBM_XMM26 | RBM_XMM27 | RBM_XMM28 | RBM_XMM29 | RBM_XMM30 | RBM_XMM31); #elif defined(TARGET_ARM) // On ARM, we may need two registers to set up the target register for a virtual call, so we need // to have at least the maximum number of arg registers, plus 2. -static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); -static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); +static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); +static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); #elif defined(TARGET_ARM64) -static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); -static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); +static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); +static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); #elif defined(TARGET_X86) -static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); -static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); +static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); +static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); #elif defined(TARGET_LOONGARCH64) -static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); -static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); +static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); +static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); #elif defined(TARGET_RISCV64) -static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); -static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); +static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); +static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); #else #error Unsupported or unset target architecture #endif // target @@ -596,29 +592,26 @@ SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, RegisterT case LSRA_LIMIT_CALLEE: if (!compiler->opts.compDbgEnC) { - mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_SAVED, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_SAVED.GetRegSetForType(regType), minRegCount); } break; case LSRA_LIMIT_CALLER: { -#ifdef TARGET_XARCH mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_TRASH.GetRegSetForType(regType), minRegCount); -#else - mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_TRASH, minRegCount); -#endif // TARGET_AMD64 } break; case LSRA_LIMIT_SMALL_SET: if ((mask & LsraLimitSmallIntSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, regType, mask, LsraLimitSmallIntSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, LsraLimitSmallIntSet.GetRegSetForType(regType), minRegCount); } else if ((mask & LsraLimitSmallFPSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, regType, mask, LsraLimitSmallFPSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, + LsraLimitSmallFPSet.GetRegSetForType(regType), minRegCount); } break; @@ -847,29 +840,27 @@ LinearScan::LinearScan(Compiler* theCompiler) // Note: one known reason why we exclude LR is because NativeAOT has dependency on not // using LR as a GPR. See: https://github.com/dotnet/runtime/issues/101932 // Once that is addressed, we may consider allowing LR in availableIntRegs. - availableIntRegs = ((RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd.getLow())); + availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd).GetIntRegSet(); #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd.getLow()); + availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd).getIntRegSet(); #else - availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd.getLow()); + availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd).getIntRegSet(); #endif #if ETW_EBP_FRAMED - availableIntRegs &= ~RBM_FPBASE; + availableIntRegs &= ~RBM_FPBASE.GetIntRegSet(); #endif // ETW_EBP_FRAMED #ifdef TARGET_AMD64 - availableFloatRegs = RBM_ALLFLOAT.GetFloatRegSet(); - availableDoubleRegs = RBM_ALLDOUBLE.GetFloatRegSet(); -#else availableFloatRegs = RBM_ALLFLOAT; availableDoubleRegs = RBM_ALLDOUBLE; +#else + availableFloatRegs = RBM_ALLFLOAT.GetFloatRegSet(); + availableDoubleRegs = RBM_ALLDOUBLE.GetFloatRegSet(); #endif -#if defined(TARGET_XARCH) +#if defined(TARGET_XARCH) || defined(TARGET_ARM64) availableMaskRegs = RBM_ALLMASK.GetPredicateRegSet(); -#elif defined(TARGET_ARM64) - availableMaskRegs = RBM_ALLMASK; #endif #if defined(TARGET_AMD64) || defined(TARGET_ARM64) @@ -877,9 +868,9 @@ LinearScan::LinearScan(Compiler* theCompiler) { // When the EnC option is set we have an exact set of registers that we always save // that are also available in future versions. - availableIntRegs &= ~RBM_INT_CALLEE_SAVED | RBM_ENC_CALLEE_SAVED; - availableFloatRegs &= ~RBM_FLT_CALLEE_SAVED; - availableDoubleRegs &= ~RBM_FLT_CALLEE_SAVED; + availableIntRegs &= ~(RBM_INT_CALLEE_SAVED | RBM_ENC_CALLEE_SAVED).GetIntRegSet(); + availableFloatRegs &= ~(RBM_FLT_CALLEE_SAVED).GetFloatRegSet(); + availableDoubleRegs &= ~RBM_FLT_CALLEE_SAVED.GetFloatRegSet(); #if defined(TARGET_XARCH) availableMaskRegs &= ~RBM_MSK_CALLEE_SAVED; #endif // TARGET_XARCH @@ -2813,7 +2804,7 @@ void LinearScan::setFrameType() SingleTypeRegSet removeMask = RBM_NONE; if (frameType == FT_EBP_FRAME) { - removeMask |= RBM_FPBASE; + removeMask |= RBM_FPBASE.GetIntRegSet(); } compiler->rpFrameType = frameType; @@ -2826,7 +2817,7 @@ void LinearScan::setFrameType() compiler->codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD; assert(REG_OPT_RSVD != REG_FP); JITDUMP(" Reserved REG_OPT_RSVD (%s) due to large frame\n", getRegName(REG_OPT_RSVD)); - removeMask |= RBM_OPT_RSVD; + removeMask |= RBM_OPT_RSVD.GetIntRegSet(); } #endif // TARGET_ARMARCH || TARGET_RISCV64 @@ -8822,13 +8813,9 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, else { // Prefer a callee-trashed register if possible to prevent new prolog/epilog saves/restores. - if ((freeRegs & RBM_CALLEE_TRASH) != 0) + if ((freeRegs & RBM_CALLEE_TRASH.GetRegSetForType(type)) != 0) { -#ifdef TARGET_XARCH freeRegs &= RBM_CALLEE_TRASH.GetRegSetForType(type); -#else - freeRegs &= RBM_CALLEE_TRASH; -#endif } regNumber tempReg = genRegNumFromMask(genFindLowestBit(freeRegs), type); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 53c543291671ec..88a249baf30ea5 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -238,7 +238,7 @@ SingleTypeRegSet LinearScan::filterConsecutiveCandidates(SingleTypeRegSet float currAvailableRegs &= ~endMask; } while (currAvailableRegs != RBM_NONE); - SingleTypeRegSet v0_v31_mask = RBM_V0 | RBM_V31; + SingleTypeRegSet v0_v31_mask = (RBM_V0 | RBM_V31).GetFloatRegSet(); if ((floatCandidates & v0_v31_mask) == v0_v31_mask) { // Finally, check for round robin case where sequence of last register @@ -255,48 +255,56 @@ SingleTypeRegSet LinearScan::filterConsecutiveCandidates(SingleTypeRegSet float { if ((floatCandidates & v0_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V31; + consecutiveResult |= RBM_V31.GetFloatRegSet(); overallResult |= v0_v31_mask; } break; } case 3: { - SingleTypeRegSet v0_v30_v31_mask = RBM_V0 | RBM_V30 | RBM_V31; + SingleTypeRegSet v0_v30_v31_mask = + (RBM_V0 | RBM_V30 | RBM_V31).GetFloatRegSet(); if ((floatCandidates & v0_v30_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V30; + consecutiveResult |= RBM_V30.GetFloatRegSet(); overallResult |= v0_v30_v31_mask; } - SingleTypeRegSet v0_v1_v31_mask = RBM_V0 | RBM_V1 | RBM_V31; + SingleTypeRegSet v0_v1_v31_mask = + (RBM_V0 | RBM_V1 | RBM_V31).GetFloatRegSet(); if ((floatCandidates & v0_v1_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V31; + consecutiveResult |= RBM_V31.GetFloatRegSet(); overallResult |= v0_v1_v31_mask; } break; } case 4: { - SingleTypeRegSet v0_v29_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + SingleTypeRegSet v0_v29_v30_v31_mask = + (RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31) + .GetFloatRegSet(); if ((floatCandidates & v0_v29_v30_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V29; + consecutiveResult |= RBM_V29.GetFloatRegSet(); overallResult |= v0_v29_v30_v31_mask; } - SingleTypeRegSet v0_v1_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + SingleTypeRegSet v0_v1_v30_v31_mask = + (RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31) + .GetFloatRegSet(); if ((floatCandidates & v0_v1_v30_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V30; + consecutiveResult |= RBM_V30.GetFloatRegSet(); overallResult |= v0_v1_v30_v31_mask; } - SingleTypeRegSet v0_v1_v2_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + SingleTypeRegSet v0_v1_v2_v31_mask = + (RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31) + .GetFloatRegSet(); if ((floatCandidates & v0_v1_v2_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V31; + consecutiveResult |= RBM_V31.GetFloatRegSet(); overallResult |= v0_v1_v2_v31_mask; } break; @@ -430,7 +438,8 @@ SingleTypeRegSet LinearScan::getConsecutiveCandidates(SingleTypeRegSet allCandi { // For stress, make only alternate registers available so we can stress the selection of free/busy registers. floatFreeCandidates &= (RBM_V0 | RBM_V2 | RBM_V4 | RBM_V6 | RBM_V8 | RBM_V10 | RBM_V12 | RBM_V14 | RBM_V16 | - RBM_V18 | RBM_V20 | RBM_V22 | RBM_V24 | RBM_V26 | RBM_V28 | RBM_V30); + RBM_V18 | RBM_V20 | RBM_V22 | RBM_V24 | RBM_V26 | RBM_V28 | RBM_V30) + .GetFloatRegSet(); } #endif @@ -744,7 +753,7 @@ int LinearScan::BuildNode(GenTree* tree) #ifdef SWIFT_SUPPORT case GT_SWIFT_ERROR_RET: - BuildUse(tree->gtGetOp1(), RBM_SWIFT_ERROR); + BuildUse(tree->gtGetOp1(), RBM_SWIFT_ERROR.GetIntRegSet()); // Plus one for error register srcCount = BuildReturn(tree) + 1; killMask = getKillSetForReturn(); @@ -762,7 +771,7 @@ int LinearScan::BuildNode(GenTree* tree) { assert(tree->TypeGet() == TYP_INT); srcCount = 1; - BuildUse(tree->gtGetOp1(), RBM_INTRET); + BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } break; @@ -1266,7 +1275,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CATCH_ARG: srcCount = 0; assert(dstCount == 1); - BuildDef(tree, RBM_EXCEPTION_OBJECT); + BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; case GT_INDEX_ADDR: @@ -1302,7 +1311,7 @@ int LinearScan::BuildNode(GenTree* tree) // and we know REG_SWIFT_ERROR should be busy up to this point, anyway. // By forcing LSRA to use REG_SWIFT_ERROR as both the source and destination register, // we can ensure the redundant move is elided. - BuildDef(tree, RBM_SWIFT_ERROR); + BuildDef(tree, RBM_SWIFT_ERROR.GetIntRegSet()); break; #endif // SWIFT_SUPPORT @@ -1583,7 +1592,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else if (HWIntrinsicInfo::IsMaskedOperation(intrin.id)) { - SingleTypeRegSet predMask = RBM_ALLMASK; + SingleTypeRegSet predMask = RBM_ALLMASK.GetPredicateRegSet(); if (intrin.id == NI_Sve_ConditionalSelect) { // If this is conditional select, make sure to check the embedded @@ -1597,13 +1606,13 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou const HWIntrinsic intrinEmb(embOp2Node); if (HWIntrinsicInfo::IsLowMaskedOperation(intrinEmb.id)) { - predMask = RBM_LOWMASK; + predMask = RBM_LOWMASK.GetPredicateRegSet(); } } } else if (HWIntrinsicInfo::IsLowMaskedOperation(intrin.id)) { - predMask = RBM_LOWMASK; + predMask = RBM_LOWMASK.GetPredicateRegSet(); } srcCount += BuildOperandUses(intrin.op1, predMask); @@ -1639,12 +1648,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (isRMW) { srcCount += BuildDelayFreeUses(intrin.op2, nullptr); - srcCount += BuildDelayFreeUses(intrin.op3, nullptr, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS); + srcCount += BuildDelayFreeUses(intrin.op3, nullptr, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS.GetFloatRegSet()); } else { srcCount += BuildOperandUses(intrin.op2); - srcCount += BuildOperandUses(intrin.op3, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS); + srcCount += BuildOperandUses(intrin.op3, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS.GetFloatRegSet()); } if (intrin.op4 != nullptr) @@ -1659,7 +1668,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { assert(!isRMW); - srcCount += BuildOperandUses(intrin.op2, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS); + srcCount += BuildOperandUses(intrin.op2, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS.GetFloatRegSet()); if (intrin.op3 != nullptr) { @@ -1976,7 +1985,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (intrin.op2->gtType == TYP_MASK) { assert(lowVectorOperandNum != 2); - candidates = RBM_ALLMASK; + candidates = RBM_ALLMASK.GetPredicateRegSet(); } if (forceOp2DelayFree) @@ -2309,12 +2318,12 @@ void LinearScan::getLowVectorOperandAndCandidates(HWIntrinsic intrin, size_t* op if (baseElementSize == 8) { - *candidates = RBM_SVE_INDEXED_D_ELEMENT_ALLOWED_REGS; + *candidates = RBM_SVE_INDEXED_D_ELEMENT_ALLOWED_REGS.GetFloatRegSet(); } else { assert(baseElementSize == 4); - *candidates = RBM_SVE_INDEXED_S_ELEMENT_ALLOWED_REGS; + *candidates = RBM_SVE_INDEXED_S_ELEMENT_ALLOWED_REGS.GetFloatRegSet(); } switch (intrin.id) diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 323dea8d4809a9..bd54be8fd961c9 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -171,7 +171,8 @@ int LinearScan::BuildCall(GenTreeCall* call) { // Fast tail call - make sure that call target is always computed in volatile registers // that will not be overridden by epilog sequence. - ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH & ~RBM_LR; + ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH.GetIntRegSet() & + ~RBM_LR.GetIntRegSet(); if (compiler->getNeedsGSSecurityCookie()) { ctrlExprCandidates &= @@ -186,7 +187,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // For R2R and VSD we have stub address in REG_R2R_INDIRECT_PARAM // and will load call address into the temp register from this register. - SingleTypeRegSet candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + SingleTypeRegSet candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH.getLow(); assert(candidates != RBM_NONE); buildInternalIntRegisterDefForNode(call, candidates); } @@ -236,15 +237,15 @@ int LinearScan::BuildCall(GenTreeCall* call) { if (varTypeUsesFloatArgReg(registerType)) { - singleDstCandidates = RBM_FLOATRET; + singleDstCandidates = RBM_FLOATRET.GetFloatRegSet(); } else if (registerType == TYP_LONG) { - singleDstCandidates = RBM_LNGRET; + singleDstCandidates = RBM_LNGRET.GetIntRegSet(); } else { - singleDstCandidates = RBM_INTRET; + singleDstCandidates = RBM_INTRET.GetIntRegSet(); } } @@ -370,7 +371,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // Don't assign the call target to any of the argument registers because // we will use them to also pass floating point arguments as required // by Arm64 ABI. - ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS); + ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS.GetIntRegSet()); } if (ctrlExpr != nullptr) @@ -698,7 +699,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // a temporary register to perform the sequence of loads and stores. // We can't use the special Write Barrier registers, so exclude them from the mask SingleTypeRegSet internalIntCandidates = - allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); + allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF) + .GetRegSetForType(IntRegisterType); buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); if (size >= 2 * REGSIZE_BYTES) @@ -716,7 +718,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) } // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. - dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF; + dstAddrRegMask = + RBM_WRITE_BARRIER_DST_BYREF.GetIntRegSet(); // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, @@ -724,7 +727,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (srcAddrOrFill != nullptr) { assert(!srcAddrOrFill->isContained()); - srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF; + srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF.GetIntRegSet(); } } break; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index e130d9fc600cf6..117f7edfcfcacf 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -626,7 +626,7 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, if (theInterval != nullptr && theInterval->isLocalVar && compiler->compMethodRequiresPInvokeFrame() && theInterval->varNum == compiler->genReturnLocal) { - mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME); + mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME).GetRegSetForType(theInterval->registerType); noway_assert(mask != RBM_NONE); } #endif // !TARGET_AMD64 @@ -861,7 +861,7 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) #endif // TARGET_AMD64 #else - killMask.RemoveRegsetForType(RBM_FLT_CALLEE_TRASH, FloatRegisterType); + killMask.RemoveRegsetForType(RBM_FLT_CALLEE_TRASH.GetFloatRegSet(), FloatRegisterType); #endif // TARGET_XARCH } #ifdef TARGET_ARM @@ -883,7 +883,7 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) // so don't use the register post-call until it is consumed by SwiftError. if (call->HasSwiftErrorHandling()) { - killMask.AddGprRegs(RBM_SWIFT_ERROR); + killMask.AddGprRegs(RBM_SWIFT_ERROR.GetIntRegSet()); } #endif // SWIFT_SUPPORT @@ -1200,7 +1200,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo if (compiler->killGCRefs(tree)) { RefPosition* pos = - newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree, (availableIntRegs & ~RBM_ARG_REGS)); + newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree, (availableIntRegs & ~RBM_ARG_REGS.GetIntRegSet())); insertedKills = true; } @@ -1518,7 +1518,7 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, { Interval* upperVectorInterval = getUpperVectorInterval(varIndex); RefPosition* pos = - newRefPosition(upperVectorInterval, currentLoc, RefTypeUpperVectorSave, tree, RBM_FLT_CALLEE_SAVED); + newRefPosition(upperVectorInterval, currentLoc, RefTypeUpperVectorSave, tree, RBM_FLT_CALLEE_SAVED.GetFloatRegSet()); varInterval->isPartiallySpilled = true; pos->skipSaveRestore = blockAlwaysReturn; pos->liveVarUpperSave = VarSetOps::IsMember(compiler, liveLargeVectors, varIndex); @@ -1575,7 +1575,7 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, if (listNode->ref->getInterval()->recentRefPosition->refType != RefTypeUpperVectorSave) { RefPosition* pos = newRefPosition(listNode->ref->getInterval(), currentLoc, RefTypeUpperVectorSave, - tree, RBM_FLT_CALLEE_SAVED); + tree, RBM_FLT_CALLEE_SAVED.GetFloatRegSet()); } } } @@ -2393,7 +2393,7 @@ void LinearScan::buildIntervals() // If there is a secret stub param, it is also live in if (compiler->info.compPublishStubParam) { - intRegState->rsCalleeRegArgMaskLiveIn.AddGprRegs(RBM_SECRET_STUB_PARAM); + intRegState->rsCalleeRegArgMaskLiveIn.AddGprRegs(RBM_SECRET_STUB_PARAM.GetIntRegSet()); LclVarDsc* stubParamDsc = compiler->lvaGetDesc(compiler->lvaStubArgumentVar); if (isCandidateVar(stubParamDsc)) @@ -4214,7 +4214,7 @@ int LinearScan::BuildReturn(GenTree* tree) #ifdef TARGET_ARM64 if (varTypeIsSIMD(tree) && !op1->IsMultiRegLclVar()) { - BuildUse(op1, RBM_DOUBLERET); + BuildUse(op1, RBM_DOUBLERET.GetFloatRegSet()); return 1; } #endif // TARGET_ARM64 @@ -4300,21 +4300,22 @@ int LinearScan::BuildReturn(GenTree* tree) useCandidates = RBM_NONE; break; case TYP_FLOAT: - useCandidates = RBM_FLOATRET; + useCandidates = RBM_FLOATRET.GetFloatRegSet(); break; case TYP_DOUBLE: // We ONLY want the valid double register in the RBM_DOUBLERET mask. #ifdef TARGET_AMD64 useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE).GetFloatRegSet(); #else - useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE); + useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE) + .GetFloatRegSet(); #endif // TARGET_AMD64 break; case TYP_LONG: - useCandidates = RBM_LNGRET; + useCandidates = RBM_LNGRET.GetIntRegSet(); break; default: - useCandidates = RBM_INTRET; + useCandidates = RBM_INTRET.GetIntRegSet(); break; } BuildUse(op1, useCandidates); @@ -4504,8 +4505,8 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) // is an indir through an lea, we need to actually instantiate the // lea in a register assert(!addr->isContained() && !src->isContained()); - SingleTypeRegSet addrCandidates = RBM_WRITE_BARRIER_DST; - SingleTypeRegSet srcCandidates = RBM_WRITE_BARRIER_SRC; + SingleTypeRegSet addrCandidates = RBM_WRITE_BARRIER_DST.GetIntRegSet(); + SingleTypeRegSet srcCandidates = RBM_WRITE_BARRIER_SRC.GetIntRegSet(); #if defined(TARGET_X86) && NOGC_WRITE_BARRIERS diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index a033e49fcad1fd..9589527c5cac65 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -950,11 +950,11 @@ regNumber genRegArgNext(regNumber argReg) * are encoded in GC information at call sites. */ -const regMaskSmall raRbmCalleeSaveOrder[] = {RBM_CALL_GC_REGS_ORDER}; +const regMaskTP raRbmCalleeSaveOrder[] = {RBM_CALL_GC_REGS_ORDER}; -regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask) +regMaskTP genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask) { - regMaskSmall res = 0; + regMaskTP res = 0; for (int i = 0; i < CNT_CALL_GC_REGS; i++) { if ((calleeSaveMask & (1 << i)) != 0) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 64a0a6864bbb1d..9574c1f8ba28e5 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -139,13 +139,27 @@ enum _regNumber_enum : unsigned ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs) }; -enum _regMask_enum : uint64_t -{ - RBM_NONE = 0, -#define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask, -#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, -#include "register.h" -}; +typedef uint64_t _regMask_enum; +typedef _regNumber_enum regNumber; +typedef unsigned char regNumberSmall; +typedef uint64_t regMaskSmall; + + +//enum _regMask_enum : uint64_t{ +// RBM_NONE = 0, +//#define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask, +//#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, +//#include "register.h" +//}; + +//struct regMaskTP; +// +//#define REGDEF(name, index, value, x_name, w_name) \ +// static constexpr regMaskTP RBM_##name = regMaskTP::CreateFromRegNum(static_cast(index), static_cast(value)); +// +// #include "register.h" +static constexpr uint64_t RBM_NONE = 0ULL; + #elif defined(TARGET_AMD64) @@ -209,7 +223,8 @@ enum _regMask_enum : unsigned // be lost. typedef _regNumber_enum regNumber; -typedef unsigned char regNumberSmall; + + #if REGMASK_BITS == 8 typedef unsigned char regMaskSmall; @@ -270,8 +285,39 @@ struct regMaskTP void RemoveRegNum(regNumber reg, var_types type); void RemoveRegNumFromMask(regNumber reg); void RemoveRegsetForType(SingleTypeRegSet regsToRemove, var_types type); + static constexpr regMaskTP CreateFromRegNum(regNumber reg, regMaskSmall mask) + { +#ifdef HAS_MORE_THAN_64_REGISTERS + if (reg < 64) + { + return regMaskTP(mask, RBM_NONE); + } + else + { + return regMaskTP(RBM_NONE, mask); + } +#else + return regMaskTP(mask, RBM_NONE); +#endif + } - regMaskTP(regMaskSmall lowMask, regMaskSmall highMask) + static regMaskTP CreateFromRegSet(SingleTypeRegSet regSet, var_types type) + { +#ifdef HAS_MORE_THAN_64_REGISTERS + if (!varTypeIsMask(type)) + { + return regMaskTP(regSet, RBM_NONE); + } + else + { + return regMaskTP(RBM_NONE, regSet); + } +#else + return regMaskTP(regSet, RBM_NONE); +#endif + } + + constexpr regMaskTP(regMaskSmall lowMask, regMaskSmall highMask) : low(lowMask) #ifdef HAS_MORE_THAN_64_REGISTERS , high(highMask) @@ -326,13 +372,13 @@ struct regMaskTP } #endif - regMaskSmall getLow() const + constexpr regMaskSmall getLow() const { return low; } #ifdef HAS_MORE_THAN_64_REGISTERS - regMaskSmall getHigh() const + constexpr regMaskSmall getHigh() const { return high; } @@ -396,13 +442,31 @@ struct regMaskTP } }; +#ifdef TARGET_ARM64 + +static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); +#define REGDEF(name, index, value, x_name, w_name) \ + static constexpr regMaskTP RBM_##name = regMaskTP::CreateFromRegNum(static_cast(index), static_cast(value)); +#define REGALIAS(alias, realname) static constexpr regMaskTP RBM_##alias = RBM_##realname; + #include "register.h" +#endif + + +// enum _regMask_enum : uint64_t{ +// RBM_NONE = 0, +// #define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask, +// #define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, +// #include "register.h" +// }; + + static regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) { regMaskTP result(first.getLow() ^ second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() ^ second.getHigh())); return result; } -static regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) +static constexpr regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) { regMaskTP result(first.getLow() & second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() & second.getHigh())); return result; @@ -414,7 +478,7 @@ static regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) return result; } -static bool operator==(const regMaskTP& first, const regMaskTP& second) +static constexpr bool operator==(const regMaskTP& first, const regMaskTP& second) { return (first.getLow() == second.getLow()) #ifdef HAS_MORE_THAN_64_REGISTERS @@ -423,7 +487,7 @@ static bool operator==(const regMaskTP& first, const regMaskTP& second) ; } -static bool operator!=(const regMaskTP& first, const regMaskTP& second) +static constexpr bool operator!=(const regMaskTP& first, const regMaskTP& second) { return !(first == second); } @@ -722,7 +786,7 @@ inline regNumber theFixedRetBuffReg(CorInfoCallConvExtension callConv) // theFixedRetBuffMask: // Returns the regNumber to use for the fixed return buffer // -inline SingleTypeRegSet theFixedRetBuffMask(CorInfoCallConvExtension callConv) +inline regMaskTP theFixedRetBuffMask(CorInfoCallConvExtension callConv) { assert(hasFixedRetBuffReg(callConv)); // This predicate should be checked before calling this method #if defined(TARGET_ARM64) @@ -757,9 +821,9 @@ inline unsigned theFixedRetBuffArgNum(CorInfoCallConvExtension callConv) // Returns the full mask of all possible integer registers // Note this includes the fixed return buffer register on Arm64 // -inline SingleTypeRegSet fullIntArgRegMask(CorInfoCallConvExtension callConv) +inline regMaskTP fullIntArgRegMask(CorInfoCallConvExtension callConv) { - SingleTypeRegSet result = RBM_ARG_REGS; + regMaskTP result = RBM_ARG_REGS; if (hasFixedRetBuffReg(callConv)) { result |= theFixedRetBuffMask(callConv); @@ -1016,10 +1080,10 @@ inline SingleTypeRegSet getSingleTypeRegMask(regNumber reg, var_types regType) * These arrays list the callee-saved register numbers (and bitmaps, respectively) for * the current architecture. */ -extern const regMaskSmall raRbmCalleeSaveOrder[CNT_CALL_GC_REGS]; +extern const regMaskTP raRbmCalleeSaveOrder[CNT_CALL_GC_REGS]; // This method takes a "compact" bitset of the callee-saved registers, and "expands" it to a full register mask. -regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short); +regMaskTP genRegMaskFromCalleeSavedMask(unsigned short); /***************************************************************************** * @@ -1095,13 +1159,13 @@ inline bool isFloatRegType(var_types type) /*****************************************************************************/ // Some sanity checks on some of the register masks // Stack pointer is never part of RBM_ALLINT -C_ASSERT((RBM_ALLINT & RBM_SPBASE) == RBM_NONE); -C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_SPBASE) == RBM_NONE); +//C_ASSERT((RBM_ALLINT & RBM_SPBASE) == RegMaskTP_NONE); +//C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_SPBASE) == RegMaskTP_NONE); #if ETW_EBP_FRAMED // Frame pointer isn't either if we're supporting ETW frame chaining -C_ASSERT((RBM_ALLINT & RBM_FPBASE) == RBM_NONE); -C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RBM_NONE); +//C_ASSERT((RBM_ALLINT & RBM_FPBASE) == RegMaskTP_NONE); +//C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RegMaskTP_NONE); #endif /*****************************************************************************/ From c8f9b15141f2f7f49b7d8138244582d0aa60b012 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 7 Jun 2024 14:03:34 -0700 Subject: [PATCH 02/20] make clrjit/clrjit_universal_arm64_x64 working, introduce SRBM_* --- src/coreclr/jit/codegenxarch.cpp | 8 +-- src/coreclr/jit/lsra.cpp | 17 +++--- src/coreclr/jit/lsraarm64.cpp | 37 +++++--------- src/coreclr/jit/lsrabuild.cpp | 4 +- src/coreclr/jit/lsraxarch.cpp | 88 ++++++++++++++++---------------- src/coreclr/jit/target.h | 80 ++++++++++++++++++----------- 6 files changed, 122 insertions(+), 112 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 757491b95034df..aa6e9ab94319c7 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2449,8 +2449,8 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG, /* canSkip */ false); } #else // !TARGET_X86 - static_assert_no_msg((RBM_STACK_PROBE_HELPER_ARG & (RBM_SECRET_STUB_PARAM | RBM_DEFAULT_HELPER_CALL_TARGET)) == - RBM_NONE); + //static_assert_no_msg((RBM_STACK_PROBE_HELPER_ARG & (RBM_SECRET_STUB_PARAM | RBM_DEFAULT_HELPER_CALL_TARGET)) == + // RegMaskTP_NONE); GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, -(int)frameSize); regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG); @@ -2462,7 +2462,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni *pInitRegZeroed = false; } - static_assert_no_msg((RBM_STACK_PROBE_HELPER_TRASH & RBM_STACK_PROBE_HELPER_ARG) == RBM_NONE); + //static_assert_no_msg((RBM_STACK_PROBE_HELPER_TRASH & RBM_STACK_PROBE_HELPER_ARG) == RegMaskTP_NONE); GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG, /* canSkip */ false); #endif // !TARGET_X86 @@ -8841,7 +8841,7 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize // -saved bool for synchronized methods // slots for ret address + FP + EnC callee-saves - int preservedAreaSize = (2 + genCountBits((uint64_t)RBM_ENC_CALLEE_SAVED)) * REGSIZE_BYTES; + int preservedAreaSize = (2 + genCountBits(RBM_ENC_CALLEE_SAVED)) * REGSIZE_BYTES; if (compiler->info.compFlags & CORINFO_FLG_SYNCH) { diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 6273da615d2e81..d1bf29e88d928d 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -265,7 +265,7 @@ SingleTypeRegSet LinearScan::allSIMDRegs() SingleTypeRegSet LinearScan::lowSIMDRegs() { #if defined(TARGET_AMD64) - return (availableFloatRegs & RBM_LOWFLOAT); + return (availableFloatRegs & RBM_LOWFLOAT.GetFloatRegSet()); #else return availableFloatRegs; #endif @@ -619,7 +619,8 @@ SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, RegisterT case LSRA_LIMIT_UPPER_SIMD_SET: if ((mask & LsraLimitUpperSimdSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, regType, mask, LsraLimitUpperSimdSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, + LsraLimitUpperSimdSet.GetRegSetForType(regType), minRegCount); } break; #endif @@ -842,9 +843,9 @@ LinearScan::LinearScan(Compiler* theCompiler) // Once that is addressed, we may consider allowing LR in availableIntRegs. availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd).GetIntRegSet(); #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd).getIntRegSet(); + availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd).GetIntRegSet(); #else - availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd).getIntRegSet(); + availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd).GetIntRegSet(); #endif #if ETW_EBP_FRAMED @@ -852,8 +853,8 @@ LinearScan::LinearScan(Compiler* theCompiler) #endif // ETW_EBP_FRAMED #ifdef TARGET_AMD64 - availableFloatRegs = RBM_ALLFLOAT; - availableDoubleRegs = RBM_ALLDOUBLE; + availableFloatRegs = RBM_ALLFLOAT.GetFloatRegSet(); + availableDoubleRegs = RBM_ALLDOUBLE.GetFloatRegSet(); #else availableFloatRegs = RBM_ALLFLOAT.GetFloatRegSet(); availableDoubleRegs = RBM_ALLDOUBLE.GetFloatRegSet(); @@ -880,8 +881,8 @@ LinearScan::LinearScan(Compiler* theCompiler) #if defined(TARGET_AMD64) if (compiler->canUseEvexEncoding()) { - availableFloatRegs |= RBM_HIGHFLOAT; - availableDoubleRegs |= RBM_HIGHFLOAT; + availableFloatRegs |= RBM_HIGHFLOAT.GetFloatRegSet(); + availableDoubleRegs |= RBM_HIGHFLOAT.GetFloatRegSet(); } #endif diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 88a249baf30ea5..de76729139070c 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -238,7 +238,7 @@ SingleTypeRegSet LinearScan::filterConsecutiveCandidates(SingleTypeRegSet float currAvailableRegs &= ~endMask; } while (currAvailableRegs != RBM_NONE); - SingleTypeRegSet v0_v31_mask = (RBM_V0 | RBM_V31).GetFloatRegSet(); + SingleTypeRegSet v0_v31_mask = SRBM_V0 | SRBM_V31; if ((floatCandidates & v0_v31_mask) == v0_v31_mask) { // Finally, check for round robin case where sequence of last register @@ -255,56 +255,48 @@ SingleTypeRegSet LinearScan::filterConsecutiveCandidates(SingleTypeRegSet float { if ((floatCandidates & v0_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V31.GetFloatRegSet(); + consecutiveResult |= SRBM_V31; overallResult |= v0_v31_mask; } break; } case 3: { - SingleTypeRegSet v0_v30_v31_mask = - (RBM_V0 | RBM_V30 | RBM_V31).GetFloatRegSet(); + SingleTypeRegSet v0_v30_v31_mask = SRBM_V0 | SRBM_V30 | SRBM_V31; if ((floatCandidates & v0_v30_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V30.GetFloatRegSet(); + consecutiveResult |= SRBM_V30; overallResult |= v0_v30_v31_mask; } - SingleTypeRegSet v0_v1_v31_mask = - (RBM_V0 | RBM_V1 | RBM_V31).GetFloatRegSet(); + SingleTypeRegSet v0_v1_v31_mask = SRBM_V0 | SRBM_V1 | SRBM_V31; if ((floatCandidates & v0_v1_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V31.GetFloatRegSet(); + consecutiveResult |= SRBM_V31; overallResult |= v0_v1_v31_mask; } break; } case 4: { - SingleTypeRegSet v0_v29_v30_v31_mask = - (RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31) - .GetFloatRegSet(); + SingleTypeRegSet v0_v29_v30_v31_mask = SRBM_V0 | SRBM_V29 | SRBM_V30 | SRBM_V31; if ((floatCandidates & v0_v29_v30_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V29.GetFloatRegSet(); + consecutiveResult |= SRBM_V29; overallResult |= v0_v29_v30_v31_mask; } - SingleTypeRegSet v0_v1_v30_v31_mask = - (RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31) - .GetFloatRegSet(); + SingleTypeRegSet v0_v1_v30_v31_mask = SRBM_V0 | SRBM_V29 | SRBM_V30 | SRBM_V31; if ((floatCandidates & v0_v1_v30_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V30.GetFloatRegSet(); + consecutiveResult |= SRBM_V30; overallResult |= v0_v1_v30_v31_mask; } - SingleTypeRegSet v0_v1_v2_v31_mask = - (RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31) - .GetFloatRegSet(); + SingleTypeRegSet v0_v1_v2_v31_mask = SRBM_V0 | SRBM_V29 | SRBM_V30 | SRBM_V31; if ((floatCandidates & v0_v1_v2_v31_mask) != RBM_NONE) { - consecutiveResult |= RBM_V31.GetFloatRegSet(); + consecutiveResult |= SRBM_V31; overallResult |= v0_v1_v2_v31_mask; } break; @@ -437,9 +429,8 @@ SingleTypeRegSet LinearScan::getConsecutiveCandidates(SingleTypeRegSet allCandi if (getStressLimitRegs() != LSRA_LIMIT_NONE) { // For stress, make only alternate registers available so we can stress the selection of free/busy registers. - floatFreeCandidates &= (RBM_V0 | RBM_V2 | RBM_V4 | RBM_V6 | RBM_V8 | RBM_V10 | RBM_V12 | RBM_V14 | RBM_V16 | - RBM_V18 | RBM_V20 | RBM_V22 | RBM_V24 | RBM_V26 | RBM_V28 | RBM_V30) - .GetFloatRegSet(); + floatFreeCandidates &= SRBM_V0 | SRBM_V2 | SRBM_V4 | SRBM_V6 | SRBM_V8 | SRBM_V10 | SRBM_V12 | SRBM_V14 | SRBM_V16 | + SRBM_V18 | SRBM_V20 | SRBM_V22 | SRBM_V24 | SRBM_V26 | SRBM_V28 | SRBM_V30; } #endif diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 117f7edfcfcacf..4669b0f297ae7e 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -919,7 +919,7 @@ regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode) if (isCopyBlk) { // rep movs kills RCX, RDI and RSI - killMask.AddGprRegs(RBM_RCX | RBM_RDI | RBM_RSI); + killMask.AddGprRegs(SRBM_RCX | SRBM_RDI | SRBM_RSI); } else { @@ -927,7 +927,7 @@ regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode) // (Note that the Data() node, if not constant, will be assigned to // RCX, but it's find that this kills it, as the value is not available // after this node in any case.) - killMask.AddGprRegs(RBM_RDI | RBM_RCX); + killMask.AddGprRegs(SRBM_RDI | SRBM_RCX); } break; #endif diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index d3a7f075fdd08c..4d2e0fd1b2bb5d 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -213,7 +213,7 @@ int LinearScan::BuildNode(GenTree* tree) { assert(tree->TypeGet() == TYP_INT); srcCount = 1; - BuildUse(tree->gtGetOp1(), RBM_INTRET); + BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } break; @@ -442,11 +442,11 @@ int LinearScan::BuildNode(GenTree* tree) // Comparand is preferenced to RAX. // The remaining two operands can be in any reg other than RAX. - const SingleTypeRegSet nonRaxCandidates = availableIntRegs & ~RBM_RAX; + SingleTypeRegSet nonRaxCandidates = availableIntRegs & ~RBM_RAX.GetIntRegSet(); BuildUse(addr, nonRaxCandidates); - BuildUse(data, varTypeIsByte(tree) ? (nonRaxCandidates & RBM_BYTE_REGS) : nonRaxCandidates); - BuildUse(comparand, RBM_RAX); - BuildDef(tree, RBM_RAX); + BuildUse(data, varTypeIsByte(tree) ? (nonRaxCandidates & RBM_BYTE_REGS.GetIntRegSet()) : nonRaxCandidates); + BuildUse(comparand, RBM_RAX.GetIntRegSet()); + BuildDef(tree, RBM_RAX.GetIntRegSet()); } break; @@ -461,10 +461,10 @@ int LinearScan::BuildNode(GenTree* tree) assert(!varTypeIsByte(data)); // if tree's value is used, we'll emit a cmpxchg-loop idiom (requires RAX) - buildInternalIntRegisterDefForNode(tree, availableIntRegs & ~RBM_RAX); - BuildUse(addr, availableIntRegs & ~RBM_RAX); - BuildUse(data, availableIntRegs & ~RBM_RAX); - BuildDef(tree, RBM_RAX); + buildInternalIntRegisterDefForNode(tree, availableIntRegs & ~RBM_RAX.GetIntRegSet()); + BuildUse(addr, availableIntRegs & ~RBM_RAX.GetIntRegSet()); + BuildUse(data, availableIntRegs & ~RBM_RAX.GetIntRegSet()); + BuildDef(tree, RBM_RAX.GetIntRegSet()); buildInternalRegisterUses(); srcCount = 2; assert(dstCount == 1); @@ -485,7 +485,7 @@ int LinearScan::BuildNode(GenTree* tree) setDelayFree(addrUse); tgtPrefUse = addrUse; assert(!data->isContained()); - BuildUse(data, varTypeIsByte(tree) ? RBM_BYTE_REGS : RBM_NONE); + BuildUse(data, varTypeIsByte(tree) ? RBM_BYTE_REGS.GetIntRegSet() : RBM_NONE); srcCount = 2; assert(dstCount == 1); BuildDef(tree); @@ -597,7 +597,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CATCH_ARG: srcCount = 0; assert(dstCount == 1); - BuildDef(tree, RBM_EXCEPTION_OBJECT); + BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; #if defined(FEATURE_EH_WINDOWS_X86) @@ -1065,8 +1065,8 @@ int LinearScan::BuildShiftRotate(GenTree* tree) #endif else { - srcCandidates = availableIntRegs & ~RBM_RCX; - dstCandidates = availableIntRegs & ~RBM_RCX; + srcCandidates = availableIntRegs & ~SRBM_RCX; + dstCandidates = availableIntRegs & ~SRBM_RCX; } // Note that Rotate Left/Right instructions don't set ZF and SF flags. @@ -1120,8 +1120,8 @@ int LinearScan::BuildShiftRotate(GenTree* tree) { if (!shiftBy->isContained()) { - srcCount += BuildDelayFreeUses(shiftBy, source, RBM_RCX); - buildKillPositionsForNode(tree, currentLoc + 1, RBM_RCX); + srcCount += BuildDelayFreeUses(shiftBy, source, SRBM_RCX); + buildKillPositionsForNode(tree, currentLoc + 1, SRBM_RCX); } BuildDef(tree, dstCandidates); } @@ -1129,8 +1129,8 @@ int LinearScan::BuildShiftRotate(GenTree* tree) { if (!shiftBy->isContained()) { - srcCount += BuildOperandUses(shiftBy, RBM_RCX); - buildKillPositionsForNode(tree, currentLoc + 1, RBM_RCX); + srcCount += BuildOperandUses(shiftBy, SRBM_RCX); + buildKillPositionsForNode(tree, currentLoc + 1, SRBM_RCX); } } return srcCount; @@ -1197,7 +1197,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // The return value will be on the X87 stack, and we will need to move it. singleDstCandidates = allRegs(registerType); #else // !TARGET_X86 - singleDstCandidates = RBM_FLOATRET; + singleDstCandidates = RBM_FLOATRET.GetFloatRegSet(); #endif // !TARGET_X86 } else @@ -1206,11 +1206,11 @@ int LinearScan::BuildCall(GenTreeCall* call) if (registerType == TYP_LONG) { - singleDstCandidates = RBM_LNGRET; + singleDstCandidates = RBM_LNGRET.GetIntRegSet(); } else { - singleDstCandidates = RBM_INTRET; + singleDstCandidates = RBM_INTRET.GetIntRegSet(); } } } @@ -1329,7 +1329,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // Fast tail call - make sure that call target is always computed in volatile registers // that will not be restored in the epilog sequence. - ctrlExprCandidates = RBM_INT_CALLEE_TRASH; + ctrlExprCandidates = RBM_INT_CALLEE_TRASH.GetIntRegSet(); } #ifdef TARGET_X86 else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT)) @@ -1353,7 +1353,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // Don't assign the call target to any of the argument registers because // we will use them to also pass floating point arguments as required // by Amd64 ABI. - ctrlExprCandidates = availableIntRegs & ~(RBM_ARG_REGS); + ctrlExprCandidates = availableIntRegs & ~(RBM_ARG_REGS.GetIntRegSet()); } srcCount += BuildOperandUses(ctrlExpr, ctrlExprCandidates); } @@ -1487,9 +1487,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) break; case GenTreeBlk::BlkOpKindRepInstr: - dstAddrRegMask = RBM_RDI; - srcRegMask = RBM_RAX; - sizeRegMask = RBM_RCX; + dstAddrRegMask = SRBM_RDI; + srcRegMask = SRBM_RAX; + sizeRegMask = SRBM_RCX; break; case GenTreeBlk::BlkOpKindLoop: @@ -1513,13 +1513,13 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) { case GenTreeBlk::BlkOpKindCpObjRepInstr: // We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq. - sizeRegMask = RBM_RCX; + sizeRegMask = SRBM_RCX; FALLTHROUGH; case GenTreeBlk::BlkOpKindCpObjUnroll: // The srcAddr must be in a register. If it was under a GT_IND, we need to subsume all of its sources. - dstAddrRegMask = RBM_RDI; - srcRegMask = RBM_RSI; + dstAddrRegMask = SRBM_RDI; + srcRegMask = SRBM_RSI; break; case GenTreeBlk::BlkOpKindUnroll: @@ -1612,9 +1612,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) break; case GenTreeBlk::BlkOpKindRepInstr: - dstAddrRegMask = RBM_RDI; - srcRegMask = RBM_RSI; - sizeRegMask = RBM_RCX; + dstAddrRegMask = SRBM_RDI; + srcRegMask = SRBM_RSI; + sizeRegMask = SRBM_RCX; break; default: @@ -1931,13 +1931,13 @@ int LinearScan::BuildModDiv(GenTree* tree) { // We are interested in just the remainder. // RAX is used as a trashable register during computation of remainder. - dstCandidates = RBM_RDX; + dstCandidates = SRBM_RDX; } else { // We are interested in just the quotient. // RDX gets used as trashable register during computation of quotient - dstCandidates = RBM_RAX; + dstCandidates = SRBM_RAX; } #ifdef TARGET_X86 @@ -1964,12 +1964,12 @@ int LinearScan::BuildModDiv(GenTree* tree) #endif { // If possible would like to have op1 in RAX to avoid a register move. - RefPosition* op1Use = BuildUse(op1, RBM_EAX); + RefPosition* op1Use = BuildUse(op1, SRBM_EAX); tgtPrefUse = op1Use; srcCount = 1; } - srcCount += BuildDelayFreeUses(op2, op1, availableIntRegs & ~(RBM_RAX | RBM_RDX)); + srcCount += BuildDelayFreeUses(op2, op1, availableIntRegs & ~(SRBM_RAX | SRBM_RDX)); buildInternalRegisterUses(); @@ -2312,7 +2312,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // MaskMove hardcodes the destination (op3) in DI/EDI/RDI srcCount += BuildOperandUses(op1, BuildEvexIncompatibleMask(op1)); srcCount += BuildOperandUses(op2, BuildEvexIncompatibleMask(op2)); - srcCount += BuildOperandUses(op3, RBM_EDI); + srcCount += BuildOperandUses(op3, SRBM_EDI); buildUses = false; break; @@ -2332,7 +2332,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou srcCount += 1; srcCount += op2->isContained() ? BuildOperandUses(op2, BuildEvexIncompatibleMask(op2)) : BuildDelayFreeUses(op2, op1, BuildEvexIncompatibleMask(op2)); - srcCount += BuildDelayFreeUses(op3, op1, RBM_XMM0); + srcCount += BuildDelayFreeUses(op3, op1, SRBM_XMM0); buildUses = false; } @@ -2381,8 +2381,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(isRMW); // DIV implicitly put op1(lower) to EAX and op2(upper) to EDX - srcCount += BuildOperandUses(op1, RBM_EAX); - srcCount += BuildOperandUses(op2, RBM_EDX); + srcCount += BuildOperandUses(op1, SRBM_EAX); + srcCount += BuildOperandUses(op2, SRBM_EDX); if (!op3->isContained()) { @@ -2405,8 +2405,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } // result put in EAX and EDX - BuildDef(intrinsicTree, RBM_EAX, 0); - BuildDef(intrinsicTree, RBM_EDX, 1); + BuildDef(intrinsicTree, SRBM_EAX, 0); + BuildDef(intrinsicTree, SRBM_EDX, 1); buildUses = false; break; @@ -2416,7 +2416,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_BMI2_X64_MultiplyNoFlags: { assert(numArgs == 2 || numArgs == 3); - srcCount += BuildOperandUses(op1, RBM_EDX); + srcCount += BuildOperandUses(op1, SRBM_EDX); srcCount += BuildOperandUses(op2); if (numArgs == 3) { @@ -3073,13 +3073,13 @@ int LinearScan::BuildMul(GenTree* tree) // Here we set RAX as the only destination candidate // In LSRA we set the kill set for this operation to RBM_RAX|RBM_RDX // - dstCandidates = RBM_RAX; + dstCandidates = SRBM_RAX; } else if (tree->OperGet() == GT_MULHI) { // Have to use the encoding:RDX:RAX = RAX * rm. Since we only care about the // upper 32 bits of the result set the destination candidate to REG_RDX. - dstCandidates = RBM_RDX; + dstCandidates = SRBM_RDX; } #if defined(TARGET_X86) else if (tree->OperGet() == GT_MUL_LONG) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 9574c1f8ba28e5..11f8ec1e937f4f 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -121,8 +121,8 @@ enum _regNumber_enum : unsigned enum _regMask_enum : uint64_t { RBM_NONE = 0, -#define REGDEF(name, rnum, mask, sname) RBM_##name = mask, -#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, +#define REGDEF(name, rnum, mask, sname) SRBM_##name = mask, +#define REGALIAS(alias, realname) SRBM_##alias = SRBM_##realname, #include "register.h" }; @@ -139,26 +139,13 @@ enum _regNumber_enum : unsigned ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs) }; -typedef uint64_t _regMask_enum; -typedef _regNumber_enum regNumber; -typedef unsigned char regNumberSmall; -typedef uint64_t regMaskSmall; - -//enum _regMask_enum : uint64_t{ -// RBM_NONE = 0, -//#define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask, -//#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, -//#include "register.h" -//}; - -//struct regMaskTP; -// -//#define REGDEF(name, index, value, x_name, w_name) \ -// static constexpr regMaskTP RBM_##name = regMaskTP::CreateFromRegNum(static_cast(index), static_cast(value)); -// -// #include "register.h" -static constexpr uint64_t RBM_NONE = 0ULL; +enum _regMask_enum : uint64_t{ + RBM_NONE = 0, +#define REGDEF(name, rnum, mask, xname, wname) SRBM_##name = mask, +#define REGALIAS(alias, realname) SRBM_##alias = SRBM_##realname, +#include "register.h" +}; #elif defined(TARGET_AMD64) @@ -178,8 +165,8 @@ enum _regMask_enum : uint64_t { RBM_NONE = 0, -#define REGDEF(name, rnum, mask, sname) RBM_##name = mask, -#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, +#define REGDEF(name, rnum, mask, sname) SRBM_##name = mask, +#define REGALIAS(alias, realname) SRBM_##alias = SRBM_##realname, #include "register.h" }; @@ -200,8 +187,8 @@ enum _regMask_enum : unsigned { RBM_NONE = 0, -#define REGDEF(name, rnum, mask, sname) RBM_##name = mask, -#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, +#define REGDEF(name, rnum, mask, sname) SRBM_##name = mask, +#define REGALIAS(alias, realname) SRBM_##alias = SRBM_##realname, #include "register.h" }; @@ -209,6 +196,9 @@ enum _regMask_enum : unsigned #error Unsupported target architecture #endif +typedef _regNumber_enum regNumber; +typedef unsigned char regNumberSmall; + #define AVAILABLE_REG_COUNT get_AVAILABLE_REG_COUNT() /*****************************************************************************/ @@ -222,9 +212,6 @@ enum _regMask_enum : unsigned // In any case, we believe that is OK to freely cast between these types; no information will // be lost. -typedef _regNumber_enum regNumber; - - #if REGMASK_BITS == 8 typedef unsigned char regMaskSmall; @@ -442,13 +429,44 @@ struct regMaskTP } }; -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); -#define REGDEF(name, index, value, x_name, w_name) \ - static constexpr regMaskTP RBM_##name = regMaskTP::CreateFromRegNum(static_cast(index), static_cast(value)); +#define REGDEF(name, rnum, mask, sname) \ + static constexpr regMaskTP RBM_##name = \ + regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); #define REGALIAS(alias, realname) static constexpr regMaskTP RBM_##alias = RBM_##realname; #include "register.h" + +#elif defined(TARGET_ARM64) + +static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); +#define REGDEF(name, rnum, mask, xname, wname) \ + static constexpr regMaskTP RBM_##name = \ + regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); +#define REGALIAS(alias, realname) static constexpr regMaskTP RBM_##alias = RBM_##realname; + #include "register.h" + +#elif defined(TARGET_AMD64) + +static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); +#define REGDEF(name, rnum, mask, sname) \ + static constexpr regMaskTP RBM_##name = \ + regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); +#define REGALIAS(alias, realname) static constexpr regMaskTP RBM_##alias = RBM_##realname; + #include "register.h" + +#elif defined(TARGET_X86) + +static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); +#define REGDEF(name, rnum, mask, sname) \ + static constexpr regMaskTP RBM_##name = \ + regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); +#define REGALIAS(alias, realname) static constexpr regMaskTP RBM_##alias = RBM_##realname; + #include "register.h" + +#else +#error Unsupported target architecture #endif From e150ec5f5f9c2233077ca2bc7df8d9f2df7f48c8 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 7 Jun 2024 14:08:46 -0700 Subject: [PATCH 03/20] make clrjit_universal_arm_x64 build --- src/coreclr/jit/emitarm.cpp | 6 +++--- src/coreclr/jit/lsra.cpp | 6 +++--- src/coreclr/jit/lsraarm.cpp | 4 ++-- src/coreclr/jit/lsraarmarch.cpp | 6 +++--- src/coreclr/jit/lsrabuild.cpp | 6 +++--- src/coreclr/jit/targetarm.cpp | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/emitarm.cpp b/src/coreclr/jit/emitarm.cpp index f714e49bf4e3c6..fd0a27f61818e1 100644 --- a/src/coreclr/jit/emitarm.cpp +++ b/src/coreclr/jit/emitarm.cpp @@ -6867,9 +6867,9 @@ void emitter::emitDispRegmask(int imm, bool encodedPC_LR) } else { - hasPC = (imm & RBM_PC) != 0; - hasLR = (imm & RBM_LR) != 0; - imm &= ~(RBM_PC | RBM_LR); + hasPC = (imm & SRBM_PC) != 0; + hasLR = (imm & SRBM_LR) != 0; + imm &= ~(SRBM_PC | SRBM_LR); } regNumber reg = REG_R0; diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index d1bf29e88d928d..204d16852e6dce 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -8803,7 +8803,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, { // Exclude any doubles for which the odd half isn't in freeRegs, // and restrict down to just the even part of the even/odd pair. - freeRegs &= (freeRegs & RBM_ALLDOUBLE_HIGH) >> 1; + freeRegs &= (freeRegs & RBM_ALLDOUBLE_HIGH.GetFloatRegSet()) >> 1; } #endif @@ -13583,7 +13583,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // clause below creates a mask to do this. if (currentInterval->registerType == TYP_DOUBLE) { - candidates &= ~((busyRegs & RBM_ALLDOUBLE_HIGH) >> 1); + candidates &= ~((busyRegs & RBM_ALLDOUBLE_HIGH.GetFloatRegSet()) >> 1); } #endif // TARGET_ARM @@ -13906,7 +13906,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( // clause below creates a mask to do this. if (currentInterval->registerType == TYP_DOUBLE) { - candidates &= ~((busyRegs & RBM_ALLDOUBLE_HIGH) >> 1); + candidates &= ~((busyRegs & RBM_ALLDOUBLE_HIGH.GetFloatRegSet()) >> 1); } #endif // TARGET_ARM diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index fc77279eabb75e..a0ed0524b0e4e5 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -482,7 +482,7 @@ int LinearScan::BuildNode(GenTree* tree) { assert(tree->TypeGet() == TYP_INT); srcCount = 1; - BuildUse(tree->gtGetOp1(), RBM_INTRET); + BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } break; @@ -628,7 +628,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CATCH_ARG: srcCount = 0; assert(dstCount == 1); - BuildDef(tree, RBM_EXCEPTION_OBJECT); + BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; case GT_COPY: diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index bd54be8fd961c9..54b57649128715 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -215,7 +215,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // the target. We do not handle these constraints on the same // refposition too well so we help ourselves a bit here by forcing the // null check with LR. - SingleTypeRegSet candidates = call->IsFastTailCall() ? RBM_LR : RBM_NONE; + SingleTypeRegSet candidates = call->IsFastTailCall() ? SRBM_LR : RBM_NONE; buildInternalIntRegisterDefForNode(call, candidates); } #endif // TARGET_ARM @@ -229,7 +229,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // The ARM CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers. - singleDstCandidates = RBM_PINVOKE_TCB; + singleDstCandidates = RBM_PINVOKE_TCB.GetIntRegSet(); } else #endif // TARGET_ARM @@ -884,7 +884,7 @@ int LinearScan::BuildCast(GenTreeCast* cast) // Floating point to integer casts requires a temporary register. if (varTypeIsFloating(srcType) && !varTypeIsFloating(castType)) { - buildInternalFloatRegisterDefForNode(cast, RBM_ALLFLOAT); + buildInternalFloatRegisterDefForNode(cast, RBM_ALLFLOAT.GetFloatRegSet()); setInternalRegsDelayFree = true; } #endif diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 4669b0f297ae7e..3f651a12ab98ad 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -867,7 +867,7 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) #ifdef TARGET_ARM if (call->IsVirtualStub()) { - killMask.AddGprRegs(compiler->virtualStubParamInfo->GetRegMask()); + killMask.AddGprRegs(compiler->virtualStubParamInfo->GetRegMask().GetIntRegSet()); } #else // !TARGET_ARM // Verify that the special virtual stub call registers are in the kill mask. @@ -4200,8 +4200,8 @@ int LinearScan::BuildReturn(GenTree* tree) assert((op1->OperGet() == GT_LONG) && op1->isContained()); GenTree* loVal = op1->gtGetOp1(); GenTree* hiVal = op1->gtGetOp2(); - BuildUse(loVal, RBM_LNGRET_LO); - BuildUse(hiVal, RBM_LNGRET_HI); + BuildUse(loVal, RBM_LNGRET_LO.GetIntRegSet()); + BuildUse(hiVal, RBM_LNGRET_HI.GetIntRegSet()); return 2; } else diff --git a/src/coreclr/jit/targetarm.cpp b/src/coreclr/jit/targetarm.cpp index 675fd04230d531..0833bfbaf0a106 100644 --- a/src/coreclr/jit/targetarm.cpp +++ b/src/coreclr/jit/targetarm.cpp @@ -24,7 +24,7 @@ const regNumber fltArgRegs [] = {REG_F0, REG_F1, REG_F2, REG_F3, REG_F4, REG_F5, const regMaskTP fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7, RBM_F8, RBM_F9, RBM_F10, RBM_F11, RBM_F12, RBM_F13, RBM_F14, RBM_F15 }; // clang-format on -static_assert_no_msg(RBM_ALLDOUBLE == (RBM_ALLDOUBLE_HIGH >> 1)); +//static_assert_no_msg(RBM_ALLDOUBLE == (RBM_ALLDOUBLE_HIGH >> 1)); //----------------------------------------------------------------------------- // Arm32Classifier: From f7e759fc08a28781b0554c5d680583e263569435 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 7 Jun 2024 16:32:28 -0700 Subject: [PATCH 04/20] make everything else build --- src/coreclr/jit/emit.cpp | 2 +- src/coreclr/jit/lsra.cpp | 4 ++-- src/coreclr/jit/lsrabuild.cpp | 12 ++++++------ src/coreclr/jit/lsraxarch.cpp | 26 +++++++++++++------------- src/coreclr/jit/targetamd64.h | 2 +- src/coreclr/jit/targetx86.h | 2 +- 6 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 9e68a57805e42a..5faaae35ebdd63 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -8787,7 +8787,7 @@ void emitter::emitRecordGCcall(BYTE* codePos, unsigned char callInstrSize) callDsc* call; #ifdef JIT32_GCENCODER - unsigned regs = (unsigned)(emitThisGCrefRegs | emitThisByrefRegs) & ~RBM_INTRET; + unsigned regs = (unsigned)((emitThisGCrefRegs | emitThisByrefRegs) & ~RBM_INTRET).GetIntRegSet(); // The JIT32 GCInfo encoder allows us to (as the comment previously here said): // "Bail if this is a totally boring call", but the GCInfoEncoder/Decoder interface diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 204d16852e6dce..de190417c8b546 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -243,7 +243,7 @@ SingleTypeRegSet LinearScan::allRegs(RegisterType rt) SingleTypeRegSet LinearScan::allByteRegs() { #ifdef TARGET_X86 - return availableIntRegs & RBM_BYTE_REGS; + return availableIntRegs & RBM_BYTE_REGS.GetIntRegSet(); #else return availableIntRegs; #endif @@ -870,7 +870,7 @@ LinearScan::LinearScan(Compiler* theCompiler) // When the EnC option is set we have an exact set of registers that we always save // that are also available in future versions. availableIntRegs &= ~(RBM_INT_CALLEE_SAVED | RBM_ENC_CALLEE_SAVED).GetIntRegSet(); - availableFloatRegs &= ~(RBM_FLT_CALLEE_SAVED).GetFloatRegSet(); + availableFloatRegs &= ~RBM_FLT_CALLEE_SAVED.GetFloatRegSet(); availableDoubleRegs &= ~RBM_FLT_CALLEE_SAVED.GetFloatRegSet(); #if defined(TARGET_XARCH) availableMaskRegs &= ~RBM_MSK_CALLEE_SAVED; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 3f651a12ab98ad..0a5300f1ce2810 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -853,10 +853,10 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call) #if defined(TARGET_XARCH) #ifdef TARGET_AMD64 - killMask.RemoveRegsetForType(RBM_FLT_CALLEE_TRASH.getLow(), FloatRegisterType); - killMask.RemoveRegsetForType(RBM_MSK_CALLEE_TRASH.getLow(), MaskRegisterType); + killMask.RemoveRegsetForType(RBM_FLT_CALLEE_TRASH.GetFloatRegSet(), FloatRegisterType); + killMask.RemoveRegsetForType(RBM_MSK_CALLEE_TRASH.GetPredicateRegSet(), MaskRegisterType); #else - killMask.RemoveRegsetForType(RBM_FLT_CALLEE_TRASH, FloatRegisterType); + killMask.RemoveRegsetForType(RBM_FLT_CALLEE_TRASH.GetFloatRegSet(), FloatRegisterType); killMask &= ~RBM_MSK_CALLEE_TRASH; #endif // TARGET_AMD64 @@ -3121,7 +3121,7 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, SingleTypeRegSet dstCandidates, { dstCandidates = availableIntRegs; } - dstCandidates &= ~RBM_NON_BYTE_REGS; + dstCandidates &= ~RBM_NON_BYTE_REGS.GetIntRegSet(); assert(dstCandidates != RBM_NONE); } #endif // TARGET_X86 @@ -4516,8 +4516,8 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) // Special write barrier: // op1 (addr) goes into REG_OPTIMIZED_WRITE_BARRIER_DST (rdx) and // op2 (src) goes into any int register. - addrCandidates = RBM_OPTIMIZED_WRITE_BARRIER_DST; - srcCandidates = RBM_OPTIMIZED_WRITE_BARRIER_SRC; + addrCandidates = RBM_OPTIMIZED_WRITE_BARRIER_DST.GetIntRegSet(); + srcCandidates = RBM_OPTIMIZED_WRITE_BARRIER_SRC.GetIntRegSet(); } #endif // defined(TARGET_X86) && NOGC_WRITE_BARRIERS diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 4d2e0fd1b2bb5d..763f60a29629e9 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -195,7 +195,7 @@ int LinearScan::BuildNode(GenTree* tree) #ifdef SWIFT_SUPPORT case GT_SWIFT_ERROR_RET: - BuildUse(tree->gtGetOp1(), RBM_SWIFT_ERROR); + BuildUse(tree->gtGetOp1(), RBM_SWIFT_ERROR.GetIntRegSet()); // Plus one for error register srcCount = BuildReturn(tree) + 1; killMask = getKillSetForReturn(); @@ -577,7 +577,7 @@ int LinearScan::BuildNode(GenTree* tree) if (varTypeIsByte(tree)) { // on X86 we have to use byte-able regs for byte-wide loads - BuildUse(tree->gtGetOp1(), RBM_BYTE_REGS); + BuildUse(tree->gtGetOp1(), RBM_BYTE_REGS.GetIntRegSet()); srcCount = 1; break; } @@ -652,7 +652,7 @@ int LinearScan::BuildNode(GenTree* tree) // and we know REG_SWIFT_ERROR should be busy up to this point, anyway. // By forcing LSRA to use REG_SWIFT_ERROR as both the source and destination register, // we can ensure the redundant move is elided. - BuildDef(tree, RBM_SWIFT_ERROR); + BuildDef(tree, RBM_SWIFT_ERROR.GetIntRegSet()); break; #endif // SWIFT_SUPPORT @@ -1185,7 +1185,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the // correct argument registers. - singleDstCandidates = RBM_PINVOKE_TCB; + singleDstCandidates = RBM_PINVOKE_TCB.GetIntRegSet(); } else #endif // TARGET_X86 @@ -1342,7 +1342,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // Where EAX is also used as an argument to the stub dispatch helper. Make // sure that the call target address is computed into EAX in this case. assert(ctrlExpr->isIndir() && ctrlExpr->isContained()); - ctrlExprCandidates = RBM_VIRTUAL_STUB_TARGET; + ctrlExprCandidates = RBM_VIRTUAL_STUB_TARGET.GetIntRegSet(); } #endif // TARGET_X86 @@ -1676,7 +1676,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (internalIsByte && (useCount >= BYTE_REG_COUNT)) { noway_assert(internalIntDef != nullptr); - internalIntDef->registerAssignment = RBM_RAX; + internalIntDef->registerAssignment = SRBM_RAX; } #endif @@ -1834,9 +1834,9 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk) #ifndef TARGET_X86 case GenTreePutArgStk::Kind::PartialRepInstr: #endif - buildInternalIntRegisterDefForNode(putArgStk, RBM_RDI); - buildInternalIntRegisterDefForNode(putArgStk, RBM_RCX); - buildInternalIntRegisterDefForNode(putArgStk, RBM_RSI); + buildInternalIntRegisterDefForNode(putArgStk, SRBM_RDI); + buildInternalIntRegisterDefForNode(putArgStk, SRBM_RCX); + buildInternalIntRegisterDefForNode(putArgStk, SRBM_RSI); break; #ifdef TARGET_X86 @@ -1956,8 +1956,8 @@ int LinearScan::BuildModDiv(GenTree* tree) // This situation also requires an internal register. buildInternalIntRegisterDefForNode(tree); - BuildUse(loVal, RBM_EAX); - BuildUse(hiVal, RBM_EDX); + BuildUse(loVal, SRBM_EAX); + BuildUse(hiVal, SRBM_EDX); srcCount = 2; } else @@ -2963,7 +2963,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) } if ((nonMemSource != nullptr) && !nonMemSource->isContained() && varTypeIsByte(indirTree)) { - srcCandidates = RBM_BYTE_REGS; + srcCandidates = RBM_BYTE_REGS.GetIntRegSet(); } if (otherIndir != nullptr) { @@ -3085,7 +3085,7 @@ int LinearScan::BuildMul(GenTree* tree) else if (tree->OperGet() == GT_MUL_LONG) { // have to use the encoding:RDX:RAX = RAX * rm - dstCandidates = RBM_RAX | RBM_RDX; + dstCandidates = SRBM_RAX | SRBM_RDX; dstCount = 2; } #endif diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 1575177643eb1e..11c928003a5118 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -130,7 +130,7 @@ #define RBM_INT_CALLEE_SAVED (RBM_EBX|RBM_ETW_FRAMED_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15) #define RBM_INT_CALLEE_TRASH (RBM_EAX|RBM_RDI|RBM_RSI|RBM_EDX|RBM_ECX|RBM_R8|RBM_R9|RBM_R10|RBM_R11) - #define RBM_FLT_CALLEE_SAVED (0) + #define RBM_FLT_CALLEE_SAVED RegMaskTP_NONE /* NOTE: Sync with variable name defined in compiler.h */ #define RBM_FLT_CALLEE_TRASH_INIT (RBM_XMM0|RBM_XMM1|RBM_XMM2|RBM_XMM3|RBM_XMM4|RBM_XMM5|RBM_XMM6|RBM_XMM7| \ diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h index aa142f6cff0de4..a7567dbe02e9c1 100644 --- a/src/coreclr/jit/targetx86.h +++ b/src/coreclr/jit/targetx86.h @@ -289,7 +289,7 @@ #define RBM_LNGRET_HI RBM_EDX #define REG_FLOATRET REG_NA - #define RBM_FLOATRET RBM_NONE + #define RBM_FLOATRET RegMaskTP_NONE #define RBM_DOUBLERET RBM_NONE // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper From f887a6d84297165022d66a3b793828602e5659f1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 7 Jun 2024 17:06:07 -0700 Subject: [PATCH 05/20] fix some static asserts --- src/coreclr/jit/codegenxarch.cpp | 6 +++--- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/target.h | 16 ++++++++-------- src/coreclr/jit/targetarm.cpp | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index aa6e9ab94319c7..c933c7e14d0d21 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2449,8 +2449,8 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG, /* canSkip */ false); } #else // !TARGET_X86 - //static_assert_no_msg((RBM_STACK_PROBE_HELPER_ARG & (RBM_SECRET_STUB_PARAM | RBM_DEFAULT_HELPER_CALL_TARGET)) == - // RegMaskTP_NONE); + static_assert_no_msg((RBM_STACK_PROBE_HELPER_ARG & (RBM_SECRET_STUB_PARAM | RBM_DEFAULT_HELPER_CALL_TARGET)) == + RegMaskTP_NONE); GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, -(int)frameSize); regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG); @@ -2462,7 +2462,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni *pInitRegZeroed = false; } - //static_assert_no_msg((RBM_STACK_PROBE_HELPER_TRASH & RBM_STACK_PROBE_HELPER_ARG) == RegMaskTP_NONE); + static_assert_no_msg((RBM_STACK_PROBE_HELPER_TRASH & RBM_STACK_PROBE_HELPER_ARG) == RegMaskTP_NONE); GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG, /* canSkip */ false); #endif // !TARGET_X86 diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index b79b8042e3b604..94cbf581a8e578 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9851,7 +9851,7 @@ class Compiler // On these platforms we assume the register that the target is // passed in is preserved by the validator and take care to get the // target from the register for the call (even in debug mode). - //static_assert_no_msg((RBM_VALIDATE_INDIRECT_CALL_TRASH & (1 << REG_VALIDATE_INDIRECT_CALL_ADDR)) == 0); + static_assert_no_msg((RBM_VALIDATE_INDIRECT_CALL_TRASH & regMaskTP(1 << REG_VALIDATE_INDIRECT_CALL_ADDR, RBM_NONE)) == RegMaskTP_NONE); if (JitConfig.JitForceControlFlowGuard()) return true; diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 11f8ec1e937f4f..1dd09d2812b9db 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -312,7 +312,7 @@ struct regMaskTP { } - regMaskTP(regMaskSmall regMask) + constexpr regMaskTP(regMaskSmall regMask) : low(regMask) #ifdef HAS_MORE_THAN_64_REGISTERS , high(RBM_NONE) @@ -490,7 +490,7 @@ static constexpr regMaskTP operator&(const regMaskTP& first, const regMaskTP& se return result; } -static regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) +static constexpr regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) { regMaskTP result(first.getLow() | second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() | second.getHigh())); return result; @@ -535,7 +535,7 @@ static regMaskTP& operator<<=(regMaskTP& first, const int b) } #endif -static regMaskTP operator>>(regMaskTP first, const int b) +static constexpr regMaskTP operator>>(regMaskTP first, const int b) { regMaskTP result(first.getLow() >> b); return result; @@ -547,7 +547,7 @@ static regMaskTP& operator>>=(regMaskTP& first, const int b) return first; } -static regMaskTP operator~(const regMaskTP& first) +static constexpr regMaskTP operator~(const regMaskTP& first) { regMaskTP result(~first.getLow() MORE_THAN_64_REGISTERS_ARG(~first.getHigh())); return result; @@ -1177,13 +1177,13 @@ inline bool isFloatRegType(var_types type) /*****************************************************************************/ // Some sanity checks on some of the register masks // Stack pointer is never part of RBM_ALLINT -//C_ASSERT((RBM_ALLINT & RBM_SPBASE) == RegMaskTP_NONE); -//C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_SPBASE) == RegMaskTP_NONE); +C_ASSERT((RBM_ALLINT & RBM_SPBASE) == RegMaskTP_NONE); +C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_SPBASE) == RegMaskTP_NONE); #if ETW_EBP_FRAMED // Frame pointer isn't either if we're supporting ETW frame chaining -//C_ASSERT((RBM_ALLINT & RBM_FPBASE) == RegMaskTP_NONE); -//C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RegMaskTP_NONE); +C_ASSERT((RBM_ALLINT & RBM_FPBASE) == RegMaskTP_NONE); +C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RegMaskTP_NONE); #endif /*****************************************************************************/ diff --git a/src/coreclr/jit/targetarm.cpp b/src/coreclr/jit/targetarm.cpp index 0833bfbaf0a106..675fd04230d531 100644 --- a/src/coreclr/jit/targetarm.cpp +++ b/src/coreclr/jit/targetarm.cpp @@ -24,7 +24,7 @@ const regNumber fltArgRegs [] = {REG_F0, REG_F1, REG_F2, REG_F3, REG_F4, REG_F5, const regMaskTP fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7, RBM_F8, RBM_F9, RBM_F10, RBM_F11, RBM_F12, RBM_F13, RBM_F14, RBM_F15 }; // clang-format on -//static_assert_no_msg(RBM_ALLDOUBLE == (RBM_ALLDOUBLE_HIGH >> 1)); +static_assert_no_msg(RBM_ALLDOUBLE == (RBM_ALLDOUBLE_HIGH >> 1)); //----------------------------------------------------------------------------- // Arm32Classifier: From c50b0297127d3856b8c783850edc1e7accd55f1a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 7 Jun 2024 17:07:33 -0700 Subject: [PATCH 06/20] jit format --- src/coreclr/jit/compiler.h | 5 ++-- src/coreclr/jit/emitinl.h | 2 +- src/coreclr/jit/gcencode.cpp | 6 +++-- src/coreclr/jit/lsra.cpp | 15 ++++++----- src/coreclr/jit/lsraarm64.cpp | 7 ++--- src/coreclr/jit/lsraarmarch.cpp | 10 +++---- src/coreclr/jit/lsrabuild.cpp | 13 +++++----- src/coreclr/jit/lsraxarch.cpp | 2 +- src/coreclr/jit/target.h | 46 +++++++++++++++------------------ 9 files changed, 52 insertions(+), 54 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 94cbf581a8e578..9fc137ec610bd9 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8323,7 +8323,7 @@ class Compiler } private: - regNumber reg; + regNumber reg; regMaskTP regMask; }; @@ -9851,7 +9851,8 @@ class Compiler // On these platforms we assume the register that the target is // passed in is preserved by the validator and take care to get the // target from the register for the call (even in debug mode). - static_assert_no_msg((RBM_VALIDATE_INDIRECT_CALL_TRASH & regMaskTP(1 << REG_VALIDATE_INDIRECT_CALL_ADDR, RBM_NONE)) == RegMaskTP_NONE); + static_assert_no_msg((RBM_VALIDATE_INDIRECT_CALL_TRASH & + regMaskTP(1 << REG_VALIDATE_INDIRECT_CALL_ADDR, RBM_NONE)) == RegMaskTP_NONE); if (JitConfig.JitForceControlFlowGuard()) return true; diff --git a/src/coreclr/jit/emitinl.h b/src/coreclr/jit/emitinl.h index 2d8c1cde28226a..26c48e604830b5 100644 --- a/src/coreclr/jit/emitinl.h +++ b/src/coreclr/jit/emitinl.h @@ -404,7 +404,7 @@ inline ssize_t emitter::emitGetInsAmdAny(const instrDesc* id) const /*static*/ inline unsigned emitter::emitDecodeCallGCregs(instrDesc* id) { regMaskTP regmask = RBM_NONE; - unsigned encodeMask; + unsigned encodeMask; #ifdef TARGET_X86 assert(REGNUM_BITS >= 3); diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index 68592d175da340..a8194fb26c532e 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -4558,9 +4558,11 @@ void GCInfo::gcMakeRegPtrTable( { // This is a true call site. - regMaskSmall gcrefRegMask = genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallGCrefRegs).GetIntRegSet(); + regMaskSmall gcrefRegMask = + genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallGCrefRegs).GetIntRegSet(); - regMaskSmall byrefRegMask = genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallByrefRegs).GetIntRegSet(); + regMaskSmall byrefRegMask = + genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallByrefRegs).GetIntRegSet(); assert((gcrefRegMask & byrefRegMask) == 0); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index de190417c8b546..2237d8f810ad2f 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -522,12 +522,10 @@ SingleTypeRegSet LinearScan::getConstrainedRegMask(RefPosition* refPosition, #if defined(TARGET_AMD64) #ifdef UNIX_AMD64_ABI // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers. -static const regMaskTP LsraLimitSmallIntSet = - (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13); +static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13); #else // !UNIX_AMD64_ABI // On Windows Amd64 use the RDI and RSI as callee saved registers. -static const regMaskTP LsraLimitSmallIntSet = - (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI); +static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI); #endif // !UNIX_AMD64_ABI static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); static const regMaskTP LsraLimitUpperSimdSet = @@ -592,7 +590,8 @@ SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, RegisterT case LSRA_LIMIT_CALLEE: if (!compiler->opts.compDbgEnC) { - mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_SAVED.GetRegSetForType(regType), minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_SAVED.GetRegSetForType(regType), + minRegCount); } break; @@ -606,7 +605,8 @@ SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, RegisterT case LSRA_LIMIT_SMALL_SET: if ((mask & LsraLimitSmallIntSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, regType, mask, LsraLimitSmallIntSet.GetRegSetForType(regType), minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, + LsraLimitSmallIntSet.GetRegSetForType(regType), minRegCount); } else if ((mask & LsraLimitSmallFPSet) != RBM_NONE) { @@ -841,7 +841,8 @@ LinearScan::LinearScan(Compiler* theCompiler) // Note: one known reason why we exclude LR is because NativeAOT has dependency on not // using LR as a GPR. See: https://github.com/dotnet/runtime/issues/101932 // Once that is addressed, we may consider allowing LR in availableIntRegs. - availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd).GetIntRegSet(); + availableIntRegs = + (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd).GetIntRegSet(); #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd).GetIntRegSet(); #else diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index de76729139070c..ea444c5cea320c 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -429,8 +429,8 @@ SingleTypeRegSet LinearScan::getConsecutiveCandidates(SingleTypeRegSet allCandi if (getStressLimitRegs() != LSRA_LIMIT_NONE) { // For stress, make only alternate registers available so we can stress the selection of free/busy registers. - floatFreeCandidates &= SRBM_V0 | SRBM_V2 | SRBM_V4 | SRBM_V6 | SRBM_V8 | SRBM_V10 | SRBM_V12 | SRBM_V14 | SRBM_V16 | - SRBM_V18 | SRBM_V20 | SRBM_V22 | SRBM_V24 | SRBM_V26 | SRBM_V28 | SRBM_V30; + floatFreeCandidates &= SRBM_V0 | SRBM_V2 | SRBM_V4 | SRBM_V6 | SRBM_V8 | SRBM_V10 | SRBM_V12 | SRBM_V14 | + SRBM_V16 | SRBM_V18 | SRBM_V20 | SRBM_V22 | SRBM_V24 | SRBM_V26 | SRBM_V28 | SRBM_V30; } #endif @@ -1639,7 +1639,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (isRMW) { srcCount += BuildDelayFreeUses(intrin.op2, nullptr); - srcCount += BuildDelayFreeUses(intrin.op3, nullptr, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS.GetFloatRegSet()); + srcCount += + BuildDelayFreeUses(intrin.op3, nullptr, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS.GetFloatRegSet()); } else { diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 54b57649128715..ba74842dc8ac6e 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -171,8 +171,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // Fast tail call - make sure that call target is always computed in volatile registers // that will not be overridden by epilog sequence. - ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH.GetIntRegSet() & - ~RBM_LR.GetIntRegSet(); + ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH.GetIntRegSet() & ~RBM_LR.GetIntRegSet(); if (compiler->getNeedsGSSecurityCookie()) { ctrlExprCandidates &= @@ -699,8 +698,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // a temporary register to perform the sequence of loads and stores. // We can't use the special Write Barrier registers, so exclude them from the mask SingleTypeRegSet internalIntCandidates = - allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF) - .GetRegSetForType(IntRegisterType); + allRegs(TYP_INT) & + ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF).GetRegSetForType(IntRegisterType); buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); if (size >= 2 * REGSIZE_BYTES) @@ -718,8 +717,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) } // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. - dstAddrRegMask = - RBM_WRITE_BARRIER_DST_BYREF.GetIntRegSet(); + dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF.GetIntRegSet(); // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 0a5300f1ce2810..b3b78762ccac67 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1199,9 +1199,9 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo if (compiler->killGCRefs(tree)) { - RefPosition* pos = - newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree, (availableIntRegs & ~RBM_ARG_REGS.GetIntRegSet())); - insertedKills = true; + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree, + (availableIntRegs & ~RBM_ARG_REGS.GetIntRegSet())); + insertedKills = true; } return insertedKills; @@ -1517,8 +1517,8 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, if (!varInterval->isPartiallySpilled) { Interval* upperVectorInterval = getUpperVectorInterval(varIndex); - RefPosition* pos = - newRefPosition(upperVectorInterval, currentLoc, RefTypeUpperVectorSave, tree, RBM_FLT_CALLEE_SAVED.GetFloatRegSet()); + RefPosition* pos = newRefPosition(upperVectorInterval, currentLoc, RefTypeUpperVectorSave, tree, + RBM_FLT_CALLEE_SAVED.GetFloatRegSet()); varInterval->isPartiallySpilled = true; pos->skipSaveRestore = blockAlwaysReturn; pos->liveVarUpperSave = VarSetOps::IsMember(compiler, liveLargeVectors, varIndex); @@ -4307,8 +4307,7 @@ int LinearScan::BuildReturn(GenTree* tree) #ifdef TARGET_AMD64 useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE).GetFloatRegSet(); #else - useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE) - .GetFloatRegSet(); + useCandidates = (RBM_DOUBLERET & RBM_ALLDOUBLE).GetFloatRegSet(); #endif // TARGET_AMD64 break; case TYP_LONG: diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 763f60a29629e9..c907a870a4142d 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1197,7 +1197,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // The return value will be on the X87 stack, and we will need to move it. singleDstCandidates = allRegs(registerType); #else // !TARGET_X86 - singleDstCandidates = RBM_FLOATRET.GetFloatRegSet(); + singleDstCandidates = RBM_FLOATRET.GetFloatRegSet(); #endif // !TARGET_X86 } else diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 1dd09d2812b9db..acf5974a595eb1 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -139,15 +139,14 @@ enum _regNumber_enum : unsigned ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs) }; - -enum _regMask_enum : uint64_t{ +enum _regMask_enum : uint64_t +{ RBM_NONE = 0, #define REGDEF(name, rnum, mask, xname, wname) SRBM_##name = mask, #define REGALIAS(alias, realname) SRBM_##alias = SRBM_##realname, #include "register.h" }; - #elif defined(TARGET_AMD64) enum _regNumber_enum : unsigned @@ -212,7 +211,6 @@ typedef unsigned char regNumberSmall; // In any case, we believe that is OK to freely cast between these types; no information will // be lost. - #if REGMASK_BITS == 8 typedef unsigned char regMaskSmall; #define REG_MASK_INT_FMT "%02X" @@ -262,17 +260,17 @@ struct regMaskTP void RemoveRegNumFromMask(regNumber reg, var_types type); bool IsRegNumInMask(regNumber reg, var_types type) const; #endif - void AddGprRegs(SingleTypeRegSet gprRegs); - void AddRegNum(regNumber reg, var_types type); - void AddRegNumInMask(regNumber reg); - void AddRegsetForType(SingleTypeRegSet regsToAdd, var_types type); - SingleTypeRegSet GetRegSetForType(var_types type) const; - bool IsRegNumInMask(regNumber reg) const; - bool IsRegNumPresent(regNumber reg, var_types type) const; - void RemoveRegNum(regNumber reg, var_types type); - void RemoveRegNumFromMask(regNumber reg); - void RemoveRegsetForType(SingleTypeRegSet regsToRemove, var_types type); - static constexpr regMaskTP CreateFromRegNum(regNumber reg, regMaskSmall mask) + void AddGprRegs(SingleTypeRegSet gprRegs); + void AddRegNum(regNumber reg, var_types type); + void AddRegNumInMask(regNumber reg); + void AddRegsetForType(SingleTypeRegSet regsToAdd, var_types type); + SingleTypeRegSet GetRegSetForType(var_types type) const; + bool IsRegNumInMask(regNumber reg) const; + bool IsRegNumPresent(regNumber reg, var_types type) const; + void RemoveRegNum(regNumber reg, var_types type); + void RemoveRegNumFromMask(regNumber reg); + void RemoveRegsetForType(SingleTypeRegSet regsToRemove, var_types type); + static constexpr regMaskTP CreateFromRegNum(regNumber reg, regMaskSmall mask) { #ifdef HAS_MORE_THAN_64_REGISTERS if (reg < 64) @@ -432,44 +430,43 @@ struct regMaskTP #if defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); -#define REGDEF(name, rnum, mask, sname) \ +#define REGDEF(name, rnum, mask, sname) \ static constexpr regMaskTP RBM_##name = \ regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); #define REGALIAS(alias, realname) static constexpr regMaskTP RBM_##alias = RBM_##realname; - #include "register.h" +#include "register.h" #elif defined(TARGET_ARM64) static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); -#define REGDEF(name, rnum, mask, xname, wname) \ +#define REGDEF(name, rnum, mask, xname, wname) \ static constexpr regMaskTP RBM_##name = \ regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); #define REGALIAS(alias, realname) static constexpr regMaskTP RBM_##alias = RBM_##realname; - #include "register.h" +#include "register.h" #elif defined(TARGET_AMD64) static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); -#define REGDEF(name, rnum, mask, sname) \ +#define REGDEF(name, rnum, mask, sname) \ static constexpr regMaskTP RBM_##name = \ regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); #define REGALIAS(alias, realname) static constexpr regMaskTP RBM_##alias = RBM_##realname; - #include "register.h" +#include "register.h" #elif defined(TARGET_X86) static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); -#define REGDEF(name, rnum, mask, sname) \ +#define REGDEF(name, rnum, mask, sname) \ static constexpr regMaskTP RBM_##name = \ regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); #define REGALIAS(alias, realname) static constexpr regMaskTP RBM_##alias = RBM_##realname; - #include "register.h" +#include "register.h" #else #error Unsupported target architecture #endif - // enum _regMask_enum : uint64_t{ // RBM_NONE = 0, // #define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask, @@ -477,7 +474,6 @@ static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); // #include "register.h" // }; - static regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) { regMaskTP result(first.getLow() ^ second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() ^ second.getHigh())); From a3bea8856776eae637453db803f20d4777a32c6d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 7 Jun 2024 17:08:53 -0700 Subject: [PATCH 07/20] minor static assert fix --- src/coreclr/jit/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9fc137ec610bd9..191f281739138f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9852,7 +9852,7 @@ class Compiler // passed in is preserved by the validator and take care to get the // target from the register for the call (even in debug mode). static_assert_no_msg((RBM_VALIDATE_INDIRECT_CALL_TRASH & - regMaskTP(1 << REG_VALIDATE_INDIRECT_CALL_ADDR, RBM_NONE)) == RegMaskTP_NONE); + regMaskTP(1 << REG_VALIDATE_INDIRECT_CALL_ADDR)) == RegMaskTP_NONE); if (JitConfig.JitForceControlFlowGuard()) return true; From 2e2d48c51bcd910c2463480e980e43a5413d29a4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 7 Jun 2024 17:19:49 -0700 Subject: [PATCH 08/20] Remove RegMaskTP_NONE --- src/coreclr/jit/codegenxarch.cpp | 5 ++--- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/target.h | 12 ++++-------- src/coreclr/jit/targetamd64.h | 2 +- src/coreclr/jit/targetx86.h | 4 ++-- 5 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index c933c7e14d0d21..9dadf4454f2762 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2449,8 +2449,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG, /* canSkip */ false); } #else // !TARGET_X86 - static_assert_no_msg((RBM_STACK_PROBE_HELPER_ARG & (RBM_SECRET_STUB_PARAM | RBM_DEFAULT_HELPER_CALL_TARGET)) == - RegMaskTP_NONE); + static_assert_no_msg((RBM_STACK_PROBE_HELPER_ARG & (RBM_SECRET_STUB_PARAM | RBM_DEFAULT_HELPER_CALL_TARGET)) == RBM_NONE); GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, -(int)frameSize); regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG); @@ -2462,7 +2461,7 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni *pInitRegZeroed = false; } - static_assert_no_msg((RBM_STACK_PROBE_HELPER_TRASH & RBM_STACK_PROBE_HELPER_ARG) == RegMaskTP_NONE); + static_assert_no_msg((RBM_STACK_PROBE_HELPER_TRASH & RBM_STACK_PROBE_HELPER_ARG) == RBM_NONE); GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG, /* canSkip */ false); #endif // !TARGET_X86 diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 191f281739138f..760f2e802a9a47 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9852,7 +9852,7 @@ class Compiler // passed in is preserved by the validator and take care to get the // target from the register for the call (even in debug mode). static_assert_no_msg((RBM_VALIDATE_INDIRECT_CALL_TRASH & - regMaskTP(1 << REG_VALIDATE_INDIRECT_CALL_ADDR)) == RegMaskTP_NONE); + regMaskTP(1 << REG_VALIDATE_INDIRECT_CALL_ADDR)) == RBM_NONE); if (JitConfig.JitForceControlFlowGuard()) return true; diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index acf5974a595eb1..9371a9a2ab54c4 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -429,7 +429,6 @@ struct regMaskTP #if defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) -static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); #define REGDEF(name, rnum, mask, sname) \ static constexpr regMaskTP RBM_##name = \ regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); @@ -438,7 +437,6 @@ static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); #elif defined(TARGET_ARM64) -static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); #define REGDEF(name, rnum, mask, xname, wname) \ static constexpr regMaskTP RBM_##name = \ regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); @@ -447,7 +445,6 @@ static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); #elif defined(TARGET_AMD64) -static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); #define REGDEF(name, rnum, mask, sname) \ static constexpr regMaskTP RBM_##name = \ regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); @@ -456,7 +453,6 @@ static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); #elif defined(TARGET_X86) -static constexpr regMaskTP RegMaskTP_NONE = regMaskTP(RBM_NONE, RBM_NONE); #define REGDEF(name, rnum, mask, sname) \ static constexpr regMaskTP RBM_##name = \ regMaskTP::CreateFromRegNum(static_cast(rnum), static_cast(mask)); @@ -1173,13 +1169,13 @@ inline bool isFloatRegType(var_types type) /*****************************************************************************/ // Some sanity checks on some of the register masks // Stack pointer is never part of RBM_ALLINT -C_ASSERT((RBM_ALLINT & RBM_SPBASE) == RegMaskTP_NONE); -C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_SPBASE) == RegMaskTP_NONE); +C_ASSERT((RBM_ALLINT & RBM_SPBASE) == RBM_NONE); +C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_SPBASE) == RBM_NONE); #if ETW_EBP_FRAMED // Frame pointer isn't either if we're supporting ETW frame chaining -C_ASSERT((RBM_ALLINT & RBM_FPBASE) == RegMaskTP_NONE); -C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RegMaskTP_NONE); +C_ASSERT((RBM_ALLINT & RBM_FPBASE) == RBM_NONE); +C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RBM_NONE); #endif /*****************************************************************************/ diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 11c928003a5118..0635935fe1e20d 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -130,7 +130,7 @@ #define RBM_INT_CALLEE_SAVED (RBM_EBX|RBM_ETW_FRAMED_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15) #define RBM_INT_CALLEE_TRASH (RBM_EAX|RBM_RDI|RBM_RSI|RBM_EDX|RBM_ECX|RBM_R8|RBM_R9|RBM_R10|RBM_R11) - #define RBM_FLT_CALLEE_SAVED RegMaskTP_NONE + #define RBM_FLT_CALLEE_SAVED RBM_NONE /* NOTE: Sync with variable name defined in compiler.h */ #define RBM_FLT_CALLEE_TRASH_INIT (RBM_XMM0|RBM_XMM1|RBM_XMM2|RBM_XMM3|RBM_XMM4|RBM_XMM5|RBM_XMM6|RBM_XMM7| \ diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h index a7567dbe02e9c1..79c3b60866d8ba 100644 --- a/src/coreclr/jit/targetx86.h +++ b/src/coreclr/jit/targetx86.h @@ -289,7 +289,7 @@ #define RBM_LNGRET_HI RBM_EDX #define REG_FLOATRET REG_NA - #define RBM_FLOATRET RegMaskTP_NONE + #define RBM_FLOATRET RBM_NONE #define RBM_DOUBLERET RBM_NONE // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper @@ -360,4 +360,4 @@ #define RBM_STACK_PROBE_HELPER_ARG RBM_EAX #define RBM_STACK_PROBE_HELPER_TRASH RBM_NONE -// clang-format on + // clang-format on From bbf7da1e59b0a0f84c636b9ca7e65f2a18276add Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 7 Jun 2024 17:28:26 -0700 Subject: [PATCH 09/20] fix some build errors --- src/coreclr/jit/lsra.cpp | 5 +++++ src/coreclr/jit/lsrabuild.cpp | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 2237d8f810ad2f..6284c5bf83b85d 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -871,8 +871,13 @@ LinearScan::LinearScan(Compiler* theCompiler) // When the EnC option is set we have an exact set of registers that we always save // that are also available in future versions. availableIntRegs &= ~(RBM_INT_CALLEE_SAVED | RBM_ENC_CALLEE_SAVED).GetIntRegSet(); +#if defined(UNIX_AMD64_ABI) + availableFloatRegs &= ~RBM_FLT_CALLEE_SAVED; + availableDoubleRegs &= ~RBM_FLT_CALLEE_SAVED; +#else availableFloatRegs &= ~RBM_FLT_CALLEE_SAVED.GetFloatRegSet(); availableDoubleRegs &= ~RBM_FLT_CALLEE_SAVED.GetFloatRegSet(); +#endif // UNIX_AMD64_ABI #if defined(TARGET_XARCH) availableMaskRegs &= ~RBM_MSK_CALLEE_SAVED; #endif // TARGET_XARCH diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index b3b78762ccac67..bef605bc26ccbe 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -4300,7 +4300,11 @@ int LinearScan::BuildReturn(GenTree* tree) useCandidates = RBM_NONE; break; case TYP_FLOAT: +#ifdef TARGET_X86 + useCandidates = RBM_FLOATRET; +#else useCandidates = RBM_FLOATRET.GetFloatRegSet(); +#endif break; case TYP_DOUBLE: // We ONLY want the valid double register in the RBM_DOUBLERET mask. From 80b658cef73f729b48b6679f2b81af10b99c8a47 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 7 Jun 2024 17:29:07 -0700 Subject: [PATCH 10/20] jit format --- src/coreclr/jit/codegenxarch.cpp | 3 ++- src/coreclr/jit/compiler.h | 4 ++-- src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/jit/targetx86.h | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 9dadf4454f2762..5e05109dbded53 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2449,7 +2449,8 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG, /* canSkip */ false); } #else // !TARGET_X86 - static_assert_no_msg((RBM_STACK_PROBE_HELPER_ARG & (RBM_SECRET_STUB_PARAM | RBM_DEFAULT_HELPER_CALL_TARGET)) == RBM_NONE); + static_assert_no_msg((RBM_STACK_PROBE_HELPER_ARG & (RBM_SECRET_STUB_PARAM | RBM_DEFAULT_HELPER_CALL_TARGET)) == + RBM_NONE); GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, -(int)frameSize); regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 760f2e802a9a47..3d2979e7389355 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9851,8 +9851,8 @@ class Compiler // On these platforms we assume the register that the target is // passed in is preserved by the validator and take care to get the // target from the register for the call (even in debug mode). - static_assert_no_msg((RBM_VALIDATE_INDIRECT_CALL_TRASH & - regMaskTP(1 << REG_VALIDATE_INDIRECT_CALL_ADDR)) == RBM_NONE); + static_assert_no_msg((RBM_VALIDATE_INDIRECT_CALL_TRASH & regMaskTP(1 << REG_VALIDATE_INDIRECT_CALL_ADDR)) == + RBM_NONE); if (JitConfig.JitForceControlFlowGuard()) return true; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index bef605bc26ccbe..40c921db85915b 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -4303,7 +4303,7 @@ int LinearScan::BuildReturn(GenTree* tree) #ifdef TARGET_X86 useCandidates = RBM_FLOATRET; #else - useCandidates = RBM_FLOATRET.GetFloatRegSet(); + useCandidates = RBM_FLOATRET.GetFloatRegSet(); #endif break; case TYP_DOUBLE: diff --git a/src/coreclr/jit/targetx86.h b/src/coreclr/jit/targetx86.h index 79c3b60866d8ba..aa142f6cff0de4 100644 --- a/src/coreclr/jit/targetx86.h +++ b/src/coreclr/jit/targetx86.h @@ -360,4 +360,4 @@ #define RBM_STACK_PROBE_HELPER_ARG RBM_EAX #define RBM_STACK_PROBE_HELPER_TRASH RBM_NONE - // clang-format on +// clang-format on From 8ee0adcac3cda3b238e4be826a68026de5abb663 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 7 Jun 2024 17:42:48 -0700 Subject: [PATCH 11/20] some more fixes --- src/coreclr/jit/lsraarmarch.cpp | 2 +- src/coreclr/jit/lsraxarch.cpp | 14 +++++++------- src/coreclr/jit/target.h | 29 +++-------------------------- src/coreclr/jit/targetamd64.h | 2 +- 4 files changed, 12 insertions(+), 35 deletions(-) diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index ba74842dc8ac6e..a99ef06cc3e578 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -171,7 +171,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // Fast tail call - make sure that call target is always computed in volatile registers // that will not be overridden by epilog sequence. - ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH.GetIntRegSet() & ~RBM_LR.GetIntRegSet(); + ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH.GetIntRegSet() & ~SRBM_LR; if (compiler->getNeedsGSSecurityCookie()) { ctrlExprCandidates &= diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index c907a870a4142d..006128f8263c8a 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -442,11 +442,11 @@ int LinearScan::BuildNode(GenTree* tree) // Comparand is preferenced to RAX. // The remaining two operands can be in any reg other than RAX. - SingleTypeRegSet nonRaxCandidates = availableIntRegs & ~RBM_RAX.GetIntRegSet(); + SingleTypeRegSet nonRaxCandidates = availableIntRegs & ~SRBM_RAX; BuildUse(addr, nonRaxCandidates); BuildUse(data, varTypeIsByte(tree) ? (nonRaxCandidates & RBM_BYTE_REGS.GetIntRegSet()) : nonRaxCandidates); - BuildUse(comparand, RBM_RAX.GetIntRegSet()); - BuildDef(tree, RBM_RAX.GetIntRegSet()); + BuildUse(comparand, SRBM_RAX); + BuildDef(tree, SRBM_RAX); } break; @@ -461,10 +461,10 @@ int LinearScan::BuildNode(GenTree* tree) assert(!varTypeIsByte(data)); // if tree's value is used, we'll emit a cmpxchg-loop idiom (requires RAX) - buildInternalIntRegisterDefForNode(tree, availableIntRegs & ~RBM_RAX.GetIntRegSet()); - BuildUse(addr, availableIntRegs & ~RBM_RAX.GetIntRegSet()); - BuildUse(data, availableIntRegs & ~RBM_RAX.GetIntRegSet()); - BuildDef(tree, RBM_RAX.GetIntRegSet()); + buildInternalIntRegisterDefForNode(tree, availableIntRegs & ~SRBM_RAX); + BuildUse(addr, availableIntRegs & ~SRBM_RAX); + BuildUse(data, availableIntRegs & ~SRBM_RAX); + BuildDef(tree, SRBM_RAX); buildInternalRegisterUses(); srcCount = 2; assert(dstCount == 1); diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 9371a9a2ab54c4..590a96593c68af 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -195,9 +195,6 @@ enum _regMask_enum : unsigned #error Unsupported target architecture #endif -typedef _regNumber_enum regNumber; -typedef unsigned char regNumberSmall; - #define AVAILABLE_REG_COUNT get_AVAILABLE_REG_COUNT() /*****************************************************************************/ @@ -211,6 +208,9 @@ typedef unsigned char regNumberSmall; // In any case, we believe that is OK to freely cast between these types; no information will // be lost. +typedef _regNumber_enum regNumber; +typedef unsigned char regNumberSmall; + #if REGMASK_BITS == 8 typedef unsigned char regMaskSmall; #define REG_MASK_INT_FMT "%02X" @@ -286,22 +286,6 @@ struct regMaskTP #endif } - static regMaskTP CreateFromRegSet(SingleTypeRegSet regSet, var_types type) - { -#ifdef HAS_MORE_THAN_64_REGISTERS - if (!varTypeIsMask(type)) - { - return regMaskTP(regSet, RBM_NONE); - } - else - { - return regMaskTP(RBM_NONE, regSet); - } -#else - return regMaskTP(regSet, RBM_NONE); -#endif - } - constexpr regMaskTP(regMaskSmall lowMask, regMaskSmall highMask) : low(lowMask) #ifdef HAS_MORE_THAN_64_REGISTERS @@ -463,13 +447,6 @@ struct regMaskTP #error Unsupported target architecture #endif -// enum _regMask_enum : uint64_t{ -// RBM_NONE = 0, -// #define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask, -// #define REGALIAS(alias, realname) RBM_##alias = RBM_##realname, -// #include "register.h" -// }; - static regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) { regMaskTP result(first.getLow() ^ second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() ^ second.getHigh())); diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 0635935fe1e20d..1575177643eb1e 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -130,7 +130,7 @@ #define RBM_INT_CALLEE_SAVED (RBM_EBX|RBM_ETW_FRAMED_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15) #define RBM_INT_CALLEE_TRASH (RBM_EAX|RBM_RDI|RBM_RSI|RBM_EDX|RBM_ECX|RBM_R8|RBM_R9|RBM_R10|RBM_R11) - #define RBM_FLT_CALLEE_SAVED RBM_NONE + #define RBM_FLT_CALLEE_SAVED (0) /* NOTE: Sync with variable name defined in compiler.h */ #define RBM_FLT_CALLEE_TRASH_INIT (RBM_XMM0|RBM_XMM1|RBM_XMM2|RBM_XMM3|RBM_XMM4|RBM_XMM5|RBM_XMM6|RBM_XMM7| \ From 1534499cb8f3697912d21d27034ed13659698ed9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 7 Jun 2024 19:22:28 -0700 Subject: [PATCH 12/20] Use regMaskTP for kill RefPositions --- src/coreclr/jit/lsra.cpp | 10 +++++++--- src/coreclr/jit/lsra.h | 32 +++++++++++++++++++++++++------- src/coreclr/jit/lsrabuild.cpp | 4 ++++ 3 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 6284c5bf83b85d..904a1edbe2afec 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -278,7 +278,11 @@ void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPo RefPosition* kill = nextKill; while ((kill != nullptr) && (kill->nodeLocation < nextLocation)) { +#ifdef HAS_MORE_THAN_64_REGISTERS + if (kill->killRegisterAssignment.IsRegNumInMask(regRecord->regNum)) +#else if ((kill->registerAssignment & genSingleTypeRegMask(regRecord->regNum)) != RBM_NONE) +#endif { nextLocation = kill->nodeLocation; break; @@ -4037,7 +4041,7 @@ void LinearScan::processKills(RefPosition* killRefPosition) { RefPosition* nextKill = killRefPosition->nextRefPosition; - regMaskTP killedRegs = killRefPosition->registerAssignment; + regMaskTP killedRegs = killRefPosition->getKillRegisterAssignment(); while (killedRegs.IsNonEmpty()) { regNumber killedReg = genFirstRegNumFromMaskAndToggle(killedRegs); @@ -4057,9 +4061,9 @@ void LinearScan::processKills(RefPosition* killRefPosition) updateNextFixedRef(regRecord, regNextRefPos, nextKill); } - regsBusyUntilKill &= ~killRefPosition->registerAssignment; + regsBusyUntilKill &= ~killRefPosition->getKillRegisterAssignment(); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, nullptr, NONE, - killRefPosition->registerAssignment)); + killRefPosition->getKillRegisterAssignment())); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 249d60e051dcf2..c1843bdda94c9c 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2464,16 +2464,25 @@ class RefPosition RefPosition* nextRefPosition; // The remaining fields are common to both options - GenTree* treeNode; + union + { + struct + { + GenTree* treeNode; + + // Prior to the allocation pass, registerAssignment captures the valid registers + // for this RefPosition. + // After the allocation pass, this contains the actual assignment + SingleTypeRegSet registerAssignment; + }; +#ifdef HAS_MORE_THAN_64_REGISTERS + regMaskTP killRegisterAssignment; +#endif + }; unsigned int bbNum; LsraLocation nodeLocation; - // Prior to the allocation pass, registerAssignment captures the valid registers - // for this RefPosition. - // After the allocation pass, this contains the actual assignment - SingleTypeRegSet registerAssignment; - RefType refType; // NOTE: C++ only packs bitfields if the base type is the same. So make all the base @@ -2584,9 +2593,9 @@ class RefPosition : referent(nullptr) , nextRefPosition(nullptr) , treeNode(treeNode) + , registerAssignment(RBM_NONE) , bbNum(bbNum) , nodeLocation(nodeLocation) - , registerAssignment(RBM_NONE) , refType(refType) , multiRegIdx(0) #ifdef TARGET_ARM64 @@ -2651,6 +2660,15 @@ class RefPosition return referent->registerType; } + regMaskTP getKillRegisterAssignment() + { +#ifdef HAS_MORE_THAN_64_REGISTERS + return killRegisterAssignment; +#else + return registerAssignment; +#endif + } + // Returns true if it is a reference on a GenTree node. bool IsActualRef() { diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 40c921db85915b..d711922718d81a 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -710,6 +710,10 @@ void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask.getLow()); +#ifdef HAS_MORE_THAN_64_REGISTERS + pos->killRegisterAssignment = mask; +#endif + *killTail = pos; killTail = &pos->nextRefPosition; } From f5ceca189553f6f84bbe30698e3cbf0a7c80478b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sat, 8 Jun 2024 19:53:25 -0700 Subject: [PATCH 13/20] fix a typo --- src/coreclr/jit/lsra.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 904a1edbe2afec..b47ffb394b80bb 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -874,7 +874,7 @@ LinearScan::LinearScan(Compiler* theCompiler) { // When the EnC option is set we have an exact set of registers that we always save // that are also available in future versions. - availableIntRegs &= ~(RBM_INT_CALLEE_SAVED | RBM_ENC_CALLEE_SAVED).GetIntRegSet(); + availableIntRegs &= (~RBM_INT_CALLEE_SAVED | RBM_ENC_CALLEE_SAVED).GetIntRegSet(); #if defined(UNIX_AMD64_ABI) availableFloatRegs &= ~RBM_FLT_CALLEE_SAVED; availableDoubleRegs &= ~RBM_FLT_CALLEE_SAVED; From 21e3c2f4da67aa9502ef2d74059dc00b13597add Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 10 Jun 2024 06:45:58 -0700 Subject: [PATCH 14/20] Fix build errors for riscv64/loongarch64 --- src/coreclr/jit/lsraloongarch64.cpp | 20 ++++++++++---------- src/coreclr/jit/lsrariscv64.cpp | 22 +++++++++++----------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index dd2805be4bcbe1..f084ae9771220e 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -183,7 +183,7 @@ int LinearScan::BuildNode(GenTree* tree) { assert(tree->TypeGet() == TYP_INT); srcCount = 1; - BuildUse(tree->gtGetOp1(), RBM_INTRET); + BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } break; @@ -560,7 +560,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CATCH_ARG: srcCount = 0; assert(dstCount == 1); - BuildDef(tree, RBM_EXCEPTION_OBJECT); + BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; case GT_INDEX_ADDR: @@ -734,7 +734,7 @@ int LinearScan::BuildCall(GenTreeCall* call) SingleTypeRegSet candidates = RBM_NONE; if (call->IsFastTailCall()) { - candidates = (allRegs(TYP_INT) & (RBM_INT_CALLEE_TRASH & ~RBM_GSCOOKIE_TMP)); + candidates = (allRegs(TYP_INT) & (RBM_INT_CALLEE_TRASH & ~RBM_GSCOOKIE_TMP).GetIntRegSet()); assert(candidates != RBM_NONE); } @@ -749,15 +749,15 @@ int LinearScan::BuildCall(GenTreeCall* call) { if (varTypeUsesFloatArgReg(registerType)) { - singleDstCandidates = RBM_FLOATRET; + singleDstCandidates = RBM_FLOATRET.GetFloatRegSet(); } else if (registerType == TYP_LONG) { - singleDstCandidates = RBM_LNGRET; + singleDstCandidates = RBM_LNGRET.GetIntRegSet(); } else { - singleDstCandidates = RBM_INTRET; + singleDstCandidates = RBM_INTRET.GetIntRegSet(); } } @@ -857,7 +857,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // Don't assign the call target to any of the argument registers because // we will use them to also pass floating point arguments as required // by LOONGARCH64 ABI. - ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS); + ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS.GetIntRegSet()); } if (ctrlExpr != nullptr) @@ -1141,7 +1141,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // a temporary register to perform the sequence of loads and stores. // We can't use the special Write Barrier registers, so exclude them from the mask SingleTypeRegSet internalIntCandidates = - allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); + allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF).GetRegSetForType(IntRegisterType); buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); if (size >= 2 * REGSIZE_BYTES) @@ -1152,7 +1152,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) } // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. - dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF; + dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF.GetIntRegSet(); // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, @@ -1160,7 +1160,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (srcAddrOrFill != nullptr) { assert(!srcAddrOrFill->isContained()); - srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF; + srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF.GetIntRegSet(); } } break; diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 2983c035fe8d1a..379c98744c4ba9 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -184,7 +184,7 @@ int LinearScan::BuildNode(GenTree* tree) { assert(tree->TypeGet() == TYP_INT); srcCount = 1; - BuildUse(tree->gtGetOp1(), RBM_INTRET); + BuildUse(tree->gtGetOp1(), RBM_INTRET.GetIntRegSet()); } break; @@ -702,7 +702,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_CATCH_ARG: srcCount = 0; assert(dstCount == 1); - BuildDef(tree, RBM_EXCEPTION_OBJECT); + BuildDef(tree, RBM_EXCEPTION_OBJECT.GetIntRegSet()); break; case GT_INDEX_ADDR: @@ -882,7 +882,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // Fast tail call - make sure that call target is always computed in volatile registers // that will not be overridden by epilog sequence. - ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + ctrlExprCandidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH.GetIntRegSet(); if (compiler->getNeedsGSSecurityCookie()) { ctrlExprCandidates &= @@ -898,7 +898,7 @@ int LinearScan::BuildCall(GenTreeCall* call) SingleTypeRegSet candidates = RBM_NONE; if (call->IsFastTailCall()) { - candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH.GetIntRegSet(); assert(candidates != RBM_NONE); } @@ -913,15 +913,15 @@ int LinearScan::BuildCall(GenTreeCall* call) { if (varTypeUsesFloatArgReg(registerType)) { - singleDstCandidates = RBM_FLOATRET; + singleDstCandidates = RBM_FLOATRET.GetFloatRegSet(); } else if (registerType == TYP_LONG) { - singleDstCandidates = RBM_LNGRET; + singleDstCandidates = RBM_LNGRET.GetIntRegSet(); } else { - singleDstCandidates = RBM_INTRET; + singleDstCandidates = RBM_INTRET.GetIntRegSet(); } } @@ -1021,7 +1021,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // Don't assign the call target to any of the argument registers because // we will use them to also pass floating point arguments as required // by RISCV64 ABI. - ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS); + ctrlExprCandidates = allRegs(TYP_INT) & ~(RBM_ARG_REGS.GetIntRegSet()); } if (ctrlExpr != nullptr) @@ -1298,7 +1298,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // a temporary register to perform the sequence of loads and stores. // We can't use the special Write Barrier registers, so exclude them from the mask SingleTypeRegSet internalIntCandidates = - allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); + allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF).GetRegSetForType(IntRegisterType); buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); if (size >= 2 * REGSIZE_BYTES) @@ -1309,7 +1309,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) } // If we have a dest address we want it in RBM_WRITE_BARRIER_DST_BYREF. - dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF; + dstAddrRegMask = RBM_WRITE_BARRIER_DST_BYREF.GetIntRegSet(); // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF. // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF, @@ -1317,7 +1317,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) if (srcAddrOrFill != nullptr) { assert(!srcAddrOrFill->isContained()); - srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF; + srcRegMask = RBM_WRITE_BARRIER_SRC_BYREF.GetIntRegSet(); } } break; From b68f2d317d4f1c6cfb4dd02445335025c1780f31 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 10 Jun 2024 07:47:28 -0700 Subject: [PATCH 15/20] try to fix gcc errors --- src/coreclr/jit/target.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 25a0647187e5c6..dc4f11e8c10a50 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -233,12 +233,6 @@ typedef uint64_t regMaskSmall; // #define HAS_MORE_THAN_64_REGISTERS 1 #endif // TARGET_ARM64 -#ifdef HAS_MORE_THAN_64_REGISTERS -#define MORE_THAN_64_REGISTERS_ARG(x) , x -#else -#define MORE_THAN_64_REGISTERS_ARG(x) -#endif - // TODO: Rename regMaskSmall as RegSet64 (at least for 64-bit) typedef regMaskSmall SingleTypeRegSet; inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg); @@ -449,19 +443,19 @@ struct regMaskTP static regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) { - regMaskTP result(first.getLow() ^ second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() ^ second.getHigh())); + regMaskTP result(first.getLow() ^ second.getLow(), first.getHigh() ^ second.getHigh()); return result; } static constexpr regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) { - regMaskTP result(first.getLow() & second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() & second.getHigh())); + regMaskTP result(first.getLow() & second.getLow(), first.getHigh() & second.getHigh()); return result; } static constexpr regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) { - regMaskTP result(first.getLow() | second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() | second.getHigh())); + regMaskTP result(first.getLow() | second.getLow(), first.getHigh() | second.getHigh()); return result; } @@ -518,7 +512,7 @@ static regMaskTP& operator>>=(regMaskTP& first, const int b) static constexpr regMaskTP operator~(const regMaskTP& first) { - regMaskTP result(~first.getLow() MORE_THAN_64_REGISTERS_ARG(~first.getHigh())); + regMaskTP result(~first.getLow(), ~first.getHigh()); return result; } From 7f5a54240e5104f93d1cfb512aed5c03eee1c567 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 10 Jun 2024 07:53:51 -0700 Subject: [PATCH 16/20] jit format --- src/coreclr/jit/lsraloongarch64.cpp | 3 ++- src/coreclr/jit/lsrariscv64.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index f084ae9771220e..0d5c629b7843c6 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -1141,7 +1141,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // a temporary register to perform the sequence of loads and stores. // We can't use the special Write Barrier registers, so exclude them from the mask SingleTypeRegSet internalIntCandidates = - allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF).GetRegSetForType(IntRegisterType); + allRegs(TYP_INT) & + ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF).GetRegSetForType(IntRegisterType); buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); if (size >= 2 * REGSIZE_BYTES) diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 379c98744c4ba9..2ea90408f41342 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -1298,7 +1298,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // a temporary register to perform the sequence of loads and stores. // We can't use the special Write Barrier registers, so exclude them from the mask SingleTypeRegSet internalIntCandidates = - allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF).GetRegSetForType(IntRegisterType); + allRegs(TYP_INT) & + ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF).GetRegSetForType(IntRegisterType); buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); if (size >= 2 * REGSIZE_BYTES) From 3d02d69502e7081ced132cc3d15344c94d74aa08 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 10 Jun 2024 07:54:47 -0700 Subject: [PATCH 17/20] Revert "try to fix gcc errors" This reverts commit b68f2d317d4f1c6cfb4dd02445335025c1780f31. --- src/coreclr/jit/target.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index dc4f11e8c10a50..25a0647187e5c6 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -233,6 +233,12 @@ typedef uint64_t regMaskSmall; // #define HAS_MORE_THAN_64_REGISTERS 1 #endif // TARGET_ARM64 +#ifdef HAS_MORE_THAN_64_REGISTERS +#define MORE_THAN_64_REGISTERS_ARG(x) , x +#else +#define MORE_THAN_64_REGISTERS_ARG(x) +#endif + // TODO: Rename regMaskSmall as RegSet64 (at least for 64-bit) typedef regMaskSmall SingleTypeRegSet; inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg); @@ -443,19 +449,19 @@ struct regMaskTP static regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) { - regMaskTP result(first.getLow() ^ second.getLow(), first.getHigh() ^ second.getHigh()); + regMaskTP result(first.getLow() ^ second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() ^ second.getHigh())); return result; } static constexpr regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) { - regMaskTP result(first.getLow() & second.getLow(), first.getHigh() & second.getHigh()); + regMaskTP result(first.getLow() & second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() & second.getHigh())); return result; } static constexpr regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) { - regMaskTP result(first.getLow() | second.getLow(), first.getHigh() | second.getHigh()); + regMaskTP result(first.getLow() | second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() | second.getHigh())); return result; } @@ -512,7 +518,7 @@ static regMaskTP& operator>>=(regMaskTP& first, const int b) static constexpr regMaskTP operator~(const regMaskTP& first) { - regMaskTP result(~first.getLow(), ~first.getHigh()); + regMaskTP result(~first.getLow() MORE_THAN_64_REGISTERS_ARG(~first.getHigh())); return result; } From f1b00a65b690e76a786a409618303687256d9541 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 10 Jun 2024 09:24:19 -0700 Subject: [PATCH 18/20] fix gcc build failure --- src/coreclr/jit/target.h | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 25a0647187e5c6..0d3c94ace7b666 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -233,12 +233,6 @@ typedef uint64_t regMaskSmall; // #define HAS_MORE_THAN_64_REGISTERS 1 #endif // TARGET_ARM64 -#ifdef HAS_MORE_THAN_64_REGISTERS -#define MORE_THAN_64_REGISTERS_ARG(x) , x -#else -#define MORE_THAN_64_REGISTERS_ARG(x) -#endif - // TODO: Rename regMaskSmall as RegSet64 (at least for 64-bit) typedef regMaskSmall SingleTypeRegSet; inline SingleTypeRegSet genSingleTypeRegMask(regNumber reg); @@ -449,20 +443,29 @@ struct regMaskTP static regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) { - regMaskTP result(first.getLow() ^ second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() ^ second.getHigh())); - return result; +#ifdef HAS_MORE_THAN_64_REGISTERS + return regMaskTP(first.getLow() ^ second.getLow(), first.getHigh() ^ second.getHigh()); +#else + return regMaskTP(first.getLow() ^ second.getLow()); +#endif } static constexpr regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) { - regMaskTP result(first.getLow() & second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() & second.getHigh())); - return result; +#ifdef HAS_MORE_THAN_64_REGISTERS + return regMaskTP(first.getLow() & second.getLow(), first.getHigh() & second.getHigh()); +#else + return regMaskTP(first.getLow() & second.getLow()); +#endif } static constexpr regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) { - regMaskTP result(first.getLow() | second.getLow() MORE_THAN_64_REGISTERS_ARG(first.getHigh() | second.getHigh())); - return result; +#ifdef HAS_MORE_THAN_64_REGISTERS + return regMaskTP(first.getLow() | second.getLow(), first.getHigh() | second.getHigh()); +#else + return regMaskTP(first.getLow() | second.getLow()); +#endif } static constexpr bool operator==(const regMaskTP& first, const regMaskTP& second) @@ -506,8 +509,7 @@ static regMaskTP& operator<<=(regMaskTP& first, const int b) static constexpr regMaskTP operator>>(regMaskTP first, const int b) { - regMaskTP result(first.getLow() >> b); - return result; + return regMaskTP(first.getLow() >> b); } static regMaskTP& operator>>=(regMaskTP& first, const int b) @@ -516,10 +518,13 @@ static regMaskTP& operator>>=(regMaskTP& first, const int b) return first; } -static constexpr regMaskTP operator~(const regMaskTP& first) +static constexpr regMaskTP operator~(const regMaskTP first) { - regMaskTP result(~first.getLow() MORE_THAN_64_REGISTERS_ARG(~first.getHigh())); - return result; +#ifdef HAS_MORE_THAN_64_REGISTERS + return regMaskTP(~first.getLow(), ~first.getHigh()); +#else + return regMaskTP(~first.getLow()); +#endif } static uint32_t PopCount(SingleTypeRegSet value) From 491da62a99d45ab3ffbf88862b54ccbb859c01fc Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 10 Jun 2024 09:29:34 -0700 Subject: [PATCH 19/20] minor code cleanup --- src/coreclr/jit/target.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 0d3c94ace7b666..d5073aa9ce7f4e 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -534,11 +534,11 @@ static uint32_t PopCount(SingleTypeRegSet value) static uint32_t PopCount(const regMaskTP& value) { - return BitOperations::PopCount(value.getLow()) + uint32_t result = BitOperations::PopCount(value.getLow()); #ifdef HAS_MORE_THAN_64_REGISTERS - + BitOperations::PopCount(value.getHigh()) + result += BitOperations::PopCount(value.getHigh()); #endif - ; + return result; } static uint32_t BitScanForward(SingleTypeRegSet value) From b64e83576e4afb923ab74d257d7ff83884a18eb4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 10 Jun 2024 13:38:10 -0700 Subject: [PATCH 20/20] review comments --- src/coreclr/jit/lsra.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 14c6f8a9d9f0ec..d0a34f6ddcb53b 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2664,6 +2664,7 @@ class RefPosition regMaskTP getKillRegisterAssignment() { + assert(refType == RefTypeKill); #ifdef HAS_MORE_THAN_64_REGISTERS return killRegisterAssignment; #else