Skip to content
Closed
19 changes: 19 additions & 0 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,25 @@ inline unsigned uhi32(unsigned __int64 value)
return static_cast<unsigned>(value >> 32);
}

#if HAS_PRIMITIVE_128

inline unsigned genLog2(unsigned __int128 value)
{
// assert(genExactlyOneBit(value));
return BitOperations::BitScanForward(value);
}

/*****************************************************************************
*
* A rather simple routine that counts the number of bits in a given number.
*/

inline unsigned genCountBits(unsigned __int128 bits)
{
return BitOperations::PopCount(bits);
}
#endif

/*****************************************************************************
*
* A rather simple routine that counts the number of bits in a given number.
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ const unsigned int MaxLocation = UINT_MAX;
const unsigned int MaxInternalRegisters = 8;
const unsigned int RegisterTypeCount = 2;

#if HAS_PRIMITIVE_128
#define UINT128(hi, lo) (((__uint128_t)(hi)) << 64 | (lo))
#else
#define UINT128(hi, lo) lo
#endif // HAS_PRIMITIVE_128

/*****************************************************************************
* Register types
*****************************************************************************/
Expand Down
23 changes: 11 additions & 12 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ void LinearScan::assignConsecutiveRegisters(RefPosition* firstRefPosition, regNu
assert(firstRefPosition->refType != RefTypeUpperVectorRestore);

INDEBUG(int refPosCount = 1);
consecutiveRegsInUseThisLocation = (((1ULL << firstRefPosition->regCount) - 1) << firstRegAssigned);
consecutiveRegsInUseThisLocation = (UINT128(0, ((1ULL << firstRefPosition->regCount) - 1)) << firstRegAssigned);

while (consecutiveRefPosition != nullptr)
{
Expand Down Expand Up @@ -192,8 +192,8 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates,
// available, create the mask only for Rm, Rm+1, ..., Rm+(k-n) to convey that it
// is safe to assign any of those registers, but not beyond that.
#define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \
regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \
regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \
regMaskTP selectionStartMask = UINT128(0, (1ULL << regAvailableStartIndex) - 1); \
regMaskTP selectionEndMask = UINT128(0, (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1); \
consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \
overallResult |= availableRegistersMask;

Expand All @@ -202,11 +202,10 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates,
do
{
// From LSB, find the first available register (bit `1`)
regAvailableStartIndex = BitOperations::BitScanForward(static_cast<DWORD64>(currAvailableRegs));
regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1;

regAvailableStartIndex = BitOperations::BitScanForward(currAvailableRegs);
regMaskTP startMask = UINT128(0, (1ULL << regAvailableStartIndex) - 1);
// Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`.
regMaskTP maskProcessed = ~(currAvailableRegs | startMask);
regMaskTP maskProcessed = UINT128(0, 0xFFFFFFFF) & ~(currAvailableRegs | startMask);

// From regAvailableStart, find the first unavailable register (bit `0`).
if (maskProcessed == RBM_NONE)
Expand All @@ -220,9 +219,9 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates,
}
else
{
regAvailableEndIndex = BitOperations::BitScanForward(static_cast<DWORD64>(maskProcessed));
regAvailableEndIndex = BitOperations::BitScanForward(maskProcessed);
}
regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1;
regMaskTP endMask = UINT128(0, (1ULL << regAvailableEndIndex) - 1);

// Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available.
// If they are equal to or greater than our register requirements, then add all of them to the result.
Expand Down Expand Up @@ -328,11 +327,11 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC
regMaskTP unprocessedRegs = consecutiveCandidates;
unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0;
int maxSpillRegs = registersNeeded;
regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1;
regMaskTP registersNeededMask = UINT128(0, (1ULL << registersNeeded) - 1);
do
{
// From LSB, find the first available register (bit `1`)
regAvailableStartIndex = BitOperations::BitScanForward(static_cast<DWORD64>(unprocessedRegs));
regAvailableStartIndex = BitOperations::BitScanForward(unprocessedRegs);

// For the current range, find how many registers are free vs. busy
regMaskTP maskForCurRange = RBM_NONE;
Expand All @@ -356,7 +355,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC
if (shouldCheckForRounding)
{
unsigned int roundedRegistersNeeded = registersNeeded - (63 - regAvailableStartIndex + 1);
maskForCurRange = (1ULL << roundedRegistersNeeded) - 1;
maskForCurRange = UINT128(0, (1ULL << roundedRegistersNeeded) - 1);
}

maskForCurRange |= (registersNeededMask << regAvailableStartIndex);
Expand Down
12 changes: 11 additions & 1 deletion src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2799,7 +2799,17 @@ void LinearScan::buildIntervals()
availableRegCount = REG_INT_COUNT;
}

if (availableRegCount < (sizeof(regMaskTP) * 8))
#if HAS_PRIMITIVE_128
if ((sizeof(regMaskTP) * 8) > 64)
{
// Mask out the bits that are between 64 ~ availableRegCount
// unsigned __int128 a = ((UINT128(1, 0) << 64) - 1);
unsigned __int64 b = ~0;
actualRegistersMask = b;
}
else
#endif // HAS_PRIMITIVE_128
if (availableRegCount < (sizeof(regMaskTP) * 8))
{
// Mask out the bits that are between 64 ~ availableRegCount
actualRegistersMask = (1ULL << availableRegCount) - 1;
Expand Down
30 changes: 27 additions & 3 deletions src/coreclr/jit/target.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ inline bool compUnixX86Abi()
#error Unsupported or unset target architecture
#endif

#if defined(TARGET_ARM64) && defined(HOST_UNIX)
#define HAS_PRIMITIVE_128 1
#else
#define HAS_PRIMITIVE_128 0
#endif

/*****************************************************************************/
// The following are intended to capture only those #defines that cannot be replaced
// with static const members of Target
Expand All @@ -80,7 +86,11 @@ inline bool compUnixX86Abi()
#define CSE_CONST_SHARED_LOW_BITS 12

#elif defined(TARGET_ARM64)
#if HAS_PRIMITIVE_128
#define REGMASK_BITS 128
#else
#define REGMASK_BITS 64
#endif // HAS_PRIMITIVE_128
#define CSE_CONST_SHARED_LOW_BITS 12

#elif defined(TARGET_LOONGARCH64)
Expand Down Expand Up @@ -139,7 +149,11 @@ enum _regNumber_enum : unsigned
ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs)
};

#if HAS_PRIMITIVE_128
enum _regMask_enum : unsigned __int128
#else
enum _regMask_enum : unsigned __int64
#endif // HAS_PRIMITIVE_128
{
RBM_NONE = 0,
#define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask,
Expand Down Expand Up @@ -209,26 +223,36 @@ enum _regMask_enum : unsigned
// In any case, we believe that is OK to freely cast between these types; no information will
// be lost.

#if defined(TARGET_AMD64) || defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
#if defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
typedef unsigned __int64 regMaskTP;
#elif defined(TARGET_ARM64)
#if HAS_PRIMITIVE_128
typedef unsigned __int128 regMaskTP;
#else
typedef unsigned __int64 regMaskTP;
#endif // HAS_PRIMITIVE_128
#else
typedef unsigned regMaskTP;
typedef unsigned regMaskTP;
#endif

#if REGMASK_BITS == 8
typedef unsigned char regMaskSmall;
#define REG_MASK_INT_FMT "%02X"
#define REG_MASK_ALL_FMT "%02X"
#elif REGMASK_BITS == 16
typedef unsigned short regMaskSmall;
typedef unsigned short regMaskSmall;
#define REG_MASK_INT_FMT "%04X"
#define REG_MASK_ALL_FMT "%04X"
#elif REGMASK_BITS == 32
typedef unsigned regMaskSmall;
#define REG_MASK_INT_FMT "%08X"
#define REG_MASK_ALL_FMT "%08X"
#else
#if HAS_PRIMITIVE_128
typedef unsigned __int128 regMaskSmall;
#else
typedef unsigned __int64 regMaskSmall;
#endif
#define REG_MASK_INT_FMT "%04llX"
#define REG_MASK_ALL_FMT "%016llX"
#endif
Expand Down
16 changes: 16 additions & 0 deletions src/coreclr/jit/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3332,6 +3332,22 @@ uint32_t BitOperations::PopCount(uint64_t value)
#endif
}

#if HAS_PRIMITIVE_128
//------------------------------------------------------------------------
// BitOperations::PopCount: Returns the population count (number of bits set) of a mask.
//
// Arguments:
// value - the value
//
// Return Value:
// The population count (number of bits set) of value
//
uint32_t BitOperations::PopCount(unsigned __int128 value)
{
return BitOperations::PopCount(static_cast<uint64_t>(value));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return BitOperations::PopCount(static_cast<uint64_t>(value));
return BitOperations::PopCount(static_cast<uint64_t>(value >> 64)) + BitOperations::PopCount(static_cast<uint64_t>(value));

Shouldn't this check all the bits?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it should. Currently, since we just use 64-bits, I want to quickly prototype and see the TP impact of changing the regMaskTP data type to 128-bits. I will be cleaning few things here and other places (e.g. BitScanForward) before marking it "ready for review".

}
#endif // HAS_PRIMITIVE_128

//------------------------------------------------------------------------
// BitOperations::ReverseBits: Reverses the bits in an integer value
//
Expand Down
29 changes: 29 additions & 0 deletions src/coreclr/jit/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,31 @@ class BitOperations
#endif
}

#if defined(TARGET_ARM64) && defined(HOST_UNIX)
static void print128x(unsigned __int128 n)
{
printf("%lx : ", static_cast<uint64_t>(n));
uint64_t lo = n;
uint64_t hi = (n >> 64);
if (hi)
{
printf("%lx", hi);
printf("%lx", lo);
}
else
{
printf("%lx", lo);
}
printf("\n");
}

FORCEINLINE static uint32_t BitScanForward(unsigned __int128 value)
{
uint32_t result = BitScanForward(static_cast<uint64_t>(value));
return result;
}
#endif // TARGET_ARM64 && HOST_UNIX

//------------------------------------------------------------------------
// BitOperations::BitScanForward: Search the mask data from least significant bit (LSB) to the most significant bit
// (MSB) for a set bit (1)
Expand Down Expand Up @@ -881,6 +906,10 @@ class BitOperations

static uint32_t PopCount(uint64_t value);

#if defined(TARGET_ARM64) && defined(HOST_UNIX)
static uint32_t PopCount(unsigned __int128 value);
#endif // TARGET_ARM64 && HOST_UNIX

static uint32_t ReverseBits(uint32_t value);

static uint64_t ReverseBits(uint64_t value);
Expand Down