diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 47c9007ddee4fc..25ee67aa32a303 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -157,6 +157,25 @@ inline unsigned uhi32(unsigned __int64 value) return static_cast(value >> 32); } +#if HAS_PRIMITIVE_128 + +inline unsigned genLog2(unsigned __int128 value) +{ + // assert(genExactlyOneBit(value)); + return BitOperations::BitScanForward(value); +} + +/***************************************************************************** + * + * A rather simple routine that counts the number of bits in a given number. + */ + +inline unsigned genCountBits(unsigned __int128 bits) +{ + return BitOperations::PopCount(bits); +} +#endif + /***************************************************************************** * * A rather simple routine that counts the number of bits in a given number. diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index d0f7aeb8369695..4eb8bc101fb690 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -29,6 +29,12 @@ const unsigned int MaxLocation = UINT_MAX; const unsigned int MaxInternalRegisters = 8; const unsigned int RegisterTypeCount = 2; +#if HAS_PRIMITIVE_128 +#define UINT128(hi, lo) (((__uint128_t)(hi)) << 64 | (lo)) +#else +#define UINT128(hi, lo) lo +#endif // HAS_PRIMITIVE_128 + /***************************************************************************** * Register types *****************************************************************************/ diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index aed2de96d5e306..695f98c5389057 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -76,7 +76,7 @@ void LinearScan::assignConsecutiveRegisters(RefPosition* firstRefPosition, regNu assert(firstRefPosition->refType != RefTypeUpperVectorRestore); INDEBUG(int refPosCount = 1); - consecutiveRegsInUseThisLocation = (((1ULL << firstRefPosition->regCount) - 1) << firstRegAssigned); + consecutiveRegsInUseThisLocation = (UINT128(0, ((1ULL << firstRefPosition->regCount) - 1)) << firstRegAssigned); while (consecutiveRefPosition != nullptr) { @@ -192,8 +192,8 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, // available, create the mask only for Rm, Rm+1, ..., Rm+(k-n) to convey that it // is safe to assign any of those registers, but not beyond that. #define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ - regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ - regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ + regMaskTP selectionStartMask = UINT128(0, (1ULL << regAvailableStartIndex) - 1); \ + regMaskTP selectionEndMask = UINT128(0, (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1); \ consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ overallResult |= availableRegistersMask; @@ -202,11 +202,10 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, do { // From LSB, find the first available register (bit `1`) - regAvailableStartIndex = BitOperations::BitScanForward(static_cast(currAvailableRegs)); - regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; - + regAvailableStartIndex = BitOperations::BitScanForward(currAvailableRegs); + regMaskTP startMask = UINT128(0, (1ULL << regAvailableStartIndex) - 1); // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. - regMaskTP maskProcessed = ~(currAvailableRegs | startMask); + regMaskTP maskProcessed = UINT128(0, 0xFFFFFFFF) & ~(currAvailableRegs | startMask); // From regAvailableStart, find the first unavailable register (bit `0`). if (maskProcessed == RBM_NONE) @@ -220,9 +219,9 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, } else { - regAvailableEndIndex = BitOperations::BitScanForward(static_cast(maskProcessed)); + regAvailableEndIndex = BitOperations::BitScanForward(maskProcessed); } - regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; + regMaskTP endMask = UINT128(0, (1ULL << regAvailableEndIndex) - 1); // Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available. // If they are equal to or greater than our register requirements, then add all of them to the result. @@ -328,11 +327,11 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC regMaskTP unprocessedRegs = consecutiveCandidates; unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0; int maxSpillRegs = registersNeeded; - regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1; + regMaskTP registersNeededMask = UINT128(0, (1ULL << registersNeeded) - 1); do { // From LSB, find the first available register (bit `1`) - regAvailableStartIndex = BitOperations::BitScanForward(static_cast(unprocessedRegs)); + regAvailableStartIndex = BitOperations::BitScanForward(unprocessedRegs); // For the current range, find how many registers are free vs. busy regMaskTP maskForCurRange = RBM_NONE; @@ -356,7 +355,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC if (shouldCheckForRounding) { unsigned int roundedRegistersNeeded = registersNeeded - (63 - regAvailableStartIndex + 1); - maskForCurRange = (1ULL << roundedRegistersNeeded) - 1; + maskForCurRange = UINT128(0, (1ULL << roundedRegistersNeeded) - 1); } maskForCurRange |= (registersNeededMask << regAvailableStartIndex); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 7d9803c645799d..3016434192d502 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2799,7 +2799,17 @@ void LinearScan::buildIntervals() availableRegCount = REG_INT_COUNT; } - if (availableRegCount < (sizeof(regMaskTP) * 8)) +#if HAS_PRIMITIVE_128 + if ((sizeof(regMaskTP) * 8) > 64) + { + // Mask out the bits that are between 64 ~ availableRegCount + // unsigned __int128 a = ((UINT128(1, 0) << 64) - 1); + unsigned __int64 b = ~0; + actualRegistersMask = b; + } + else +#endif // HAS_PRIMITIVE_128 + if (availableRegCount < (sizeof(regMaskTP) * 8)) { // Mask out the bits that are between 64 ~ availableRegCount actualRegistersMask = (1ULL << availableRegCount) - 1; diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 638e55d56c7431..cd120189675841 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -64,6 +64,12 @@ inline bool compUnixX86Abi() #error Unsupported or unset target architecture #endif +#if defined(TARGET_ARM64) && defined(HOST_UNIX) +#define HAS_PRIMITIVE_128 1 +#else +#define HAS_PRIMITIVE_128 0 +#endif + /*****************************************************************************/ // The following are intended to capture only those #defines that cannot be replaced // with static const members of Target @@ -80,7 +86,11 @@ inline bool compUnixX86Abi() #define CSE_CONST_SHARED_LOW_BITS 12 #elif defined(TARGET_ARM64) +#if HAS_PRIMITIVE_128 +#define REGMASK_BITS 128 +#else #define REGMASK_BITS 64 +#endif // HAS_PRIMITIVE_128 #define CSE_CONST_SHARED_LOW_BITS 12 #elif defined(TARGET_LOONGARCH64) @@ -139,7 +149,11 @@ enum _regNumber_enum : unsigned ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs) }; +#if HAS_PRIMITIVE_128 +enum _regMask_enum : unsigned __int128 +#else enum _regMask_enum : unsigned __int64 +#endif // HAS_PRIMITIVE_128 { RBM_NONE = 0, #define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask, @@ -209,10 +223,16 @@ enum _regMask_enum : unsigned // In any case, we believe that is OK to freely cast between these types; no information will // be lost. -#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) +#if defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) typedef unsigned __int64 regMaskTP; +#elif defined(TARGET_ARM64) +#if HAS_PRIMITIVE_128 +typedef unsigned __int128 regMaskTP; +#else +typedef unsigned __int64 regMaskTP; +#endif // HAS_PRIMITIVE_128 #else -typedef unsigned regMaskTP; +typedef unsigned regMaskTP; #endif #if REGMASK_BITS == 8 @@ -220,7 +240,7 @@ typedef unsigned char regMaskSmall; #define REG_MASK_INT_FMT "%02X" #define REG_MASK_ALL_FMT "%02X" #elif REGMASK_BITS == 16 -typedef unsigned short regMaskSmall; +typedef unsigned short regMaskSmall; #define REG_MASK_INT_FMT "%04X" #define REG_MASK_ALL_FMT "%04X" #elif REGMASK_BITS == 32 @@ -228,7 +248,11 @@ typedef unsigned regMaskSmall; #define REG_MASK_INT_FMT "%08X" #define REG_MASK_ALL_FMT "%08X" #else +#if HAS_PRIMITIVE_128 +typedef unsigned __int128 regMaskSmall; +#else typedef unsigned __int64 regMaskSmall; +#endif #define REG_MASK_INT_FMT "%04llX" #define REG_MASK_ALL_FMT "%016llX" #endif diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 099155e85f0b98..685896690004e2 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -3332,6 +3332,22 @@ uint32_t BitOperations::PopCount(uint64_t value) #endif } +#if HAS_PRIMITIVE_128 +//------------------------------------------------------------------------ +// BitOperations::PopCount: Returns the population count (number of bits set) of a mask. +// +// Arguments: +// value - the value +// +// Return Value: +// The population count (number of bits set) of value +// +uint32_t BitOperations::PopCount(unsigned __int128 value) +{ + return BitOperations::PopCount(static_cast(value)); +} +#endif // HAS_PRIMITIVE_128 + //------------------------------------------------------------------------ // BitOperations::ReverseBits: Reverses the bits in an integer value // diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index 747daf9d719d46..05ae66d36b2836 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -827,6 +827,31 @@ class BitOperations #endif } +#if defined(TARGET_ARM64) && defined(HOST_UNIX) + static void print128x(unsigned __int128 n) + { + printf("%lx : ", static_cast(n)); + uint64_t lo = n; + uint64_t hi = (n >> 64); + if (hi) + { + printf("%lx", hi); + printf("%lx", lo); + } + else + { + printf("%lx", lo); + } + printf("\n"); + } + + FORCEINLINE static uint32_t BitScanForward(unsigned __int128 value) + { + uint32_t result = BitScanForward(static_cast(value)); + return result; + } +#endif // TARGET_ARM64 && HOST_UNIX + //------------------------------------------------------------------------ // BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit // (MSB) for a set bit (1) @@ -881,6 +906,10 @@ class BitOperations static uint32_t PopCount(uint64_t value); +#if defined(TARGET_ARM64) && defined(HOST_UNIX) + static uint32_t PopCount(unsigned __int128 value); +#endif // TARGET_ARM64 && HOST_UNIX + static uint32_t ReverseBits(uint32_t value); static uint64_t ReverseBits(uint64_t value);