diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 7caba07734be6d..17e3f21ddf040e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -10464,28 +10464,18 @@ GenTree* Compiler::fgOptimizeAddition(GenTreeOp* add) if (op2->IsIntegralConst()) { // ADD(NEG(x), CONST) => XOR(x, CONST) - - auto isSubToXorValid = [=](uint64_t cns, IntegralRange range) { - // cns - x, where x in [lo, hi] - uint64_t lo = IntegralRange::SymbolicToRealValue(range.GetLowerBound()); - uint64_t hi = IntegralRange::SymbolicToRealValue(range.GetUpperBound()); - - // OR of all numbers in [lo, hi] - uint64_t knownBits = (lo == hi) ? 0 : (UINT64_MAX >> BitOperations::LeadingZeroCount(lo ^ hi)); - knownBits = lo | knownBits; - - // Zero out bits outside of TYPE. This handles cases that rely on overflow - uint32_t sizeInBits = genTypeSize(add->TypeGet()) * BITS_PER_BYTE; - knownBits &= (1ULL << (sizeInBits - 1)) - 1; - - // At every bit pos with a 1 in knownBits, cns also needs 1. - // Otherwise borrowing occurs and XOR is not equivalent to SUB - return (cns & knownBits) == knownBits; - }; - - IntegralRange range = IntegralRange::ForNode(op1->gtGetOp1(), this); - uint64_t cns = (uint64_t)op2->AsIntConCommon()->IntegralValue(); - if (isSubToXorValid(cns, range)) + uint64_t cns = (uint64_t)op2->AsIntConCommon()->IntegralValue(); + IntegralRange range = IntegralRange::ForNode(op1->gtGetOp1(), this); + uint64_t lo = IntegralRange::SymbolicToRealValue(range.GetLowerBound()); + uint64_t hi = IntegralRange::SymbolicToRealValue(range.GetUpperBound()); + uint64_t knownBits = BitOperations::BitsetFromRange(lo, hi); + + // Zero out bits outside of TYPE. This handles cases that rely on overflow (int.MaxValue - x) + uint32_t sizeInBits = genTypeSize(add->TypeGet()) * BITS_PER_BYTE; + knownBits &= (1ULL << (sizeInBits - 1)) - 1; + + bool noCarry = (cns & knownBits) == knownBits; + if (noCarry) { add->SetOper(GT_XOR, GenTree::PRESERVE_VN); add->gtOp1 = op1->gtGetOp1(); diff --git a/src/coreclr/jit/rangecheck.cpp b/src/coreclr/jit/rangecheck.cpp index 85953433c066aa..d2c1b3989391b1 100644 --- a/src/coreclr/jit/rangecheck.cpp +++ b/src/coreclr/jit/rangecheck.cpp @@ -736,9 +736,23 @@ Range RangeCheck::GetRangeFromAssertionsWorker( break; case VNF_NEG: + case VNF_NOT: { Range r1 = GetRangeFromAssertionsWorker(comp, funcApp.m_args[0], assertions, --budget, visited); - Range unaryOpResult = RangeOps::Negate(r1); + Range unaryOpResult = Range(Limit(Limit::keUnknown)); + switch (funcApp.m_func) + { + case VNF_NEG: + unaryOpResult = RangeOps::Negate(r1); + break; + + case VNF_NOT: + unaryOpResult = RangeOps::Not(r1); + break; + + default: + unreached(); + } // We can use the result only if it never overflows. result = unaryOpResult.IsConstantRange() ? unaryOpResult : result; @@ -751,6 +765,7 @@ Range RangeCheck::GetRangeFromAssertionsWorker( case VNF_SUB: case VNF_AND: case VNF_OR: + case VNF_XOR: case VNF_RSH: case VNF_RSZ: case VNF_UMOD: @@ -776,6 +791,9 @@ Range RangeCheck::GetRangeFromAssertionsWorker( case VNF_OR: binOpResult = RangeOps::Or(r1, r2); break; + case VNF_XOR: + binOpResult = RangeOps::Xor(r1, r2); + break; case VNF_LSH: binOpResult = RangeOps::ShiftLeft(r1, r2); break; @@ -1553,7 +1571,9 @@ Range RangeCheck::ComputeRangeForBinOp(BasicBlock* block, GenTreeOp* binop, bool { assert(binop->OperIs(GT_ADD, GT_OR, GT_XOR, GT_AND, GT_RSH, GT_RSZ, GT_LSH, GT_UMOD, GT_MUL)); - // For XOR we only care about Log2 pattern for now + // To handle the Log2 pattern of "63 ^ LZCNT(x | 1)" we are missing precise + // range info for "LZCNT(x | 1)" (should be [0, 63]) and 64bit support. Special case it: + // https://github.com/dotnet/runtime/pull/113790 if (binop->OperIs(GT_XOR)) { int upperBound; @@ -1562,7 +1582,6 @@ Range RangeCheck::ComputeRangeForBinOp(BasicBlock* block, GenTreeOp* binop, bool assert(upperBound > 0); return Range(Limit(Limit::keConstant, 0), Limit(Limit::keConstant, upperBound)); } - return Range(Limit(Limit::keUnknown)); } GenTree* op1 = binop->gtGetOp1(); @@ -1632,6 +1651,9 @@ Range RangeCheck::ComputeRangeForBinOp(BasicBlock* block, GenTreeOp* binop, bool case GT_OR: r = RangeOps::Or(op1Range, op2Range); break; + case GT_XOR: + r = RangeOps::Xor(op1Range, op2Range); + break; case GT_UMOD: r = RangeOps::UnsignedMod(op1Range, op2Range); break; @@ -1964,15 +1986,10 @@ bool RangeCheck::ComputeDoesOverflow(BasicBlock* block, GenTree* expr, const Ran overflows = DoesBinOpOverflow(block, expr->AsOp(), range); } // These operators don't overflow. - else if (expr->OperIs(GT_AND, GT_RSH, GT_RSZ, GT_UMOD, GT_NEG)) + else if (expr->OperIs(GT_AND, GT_RSH, GT_RSZ, GT_UMOD, GT_NEG, GT_XOR, GT_NOT)) { overflows = false; } - else if (expr->OperIs(GT_XOR) && vnStore->IsVNLog2(m_compiler->vnStore->VNConservativeNormalValue(expr->gtVNPair))) - { - // For XOR we only care about Log2 pattern for now, which never overflows. - overflows = false; - } // Walk through phi arguments to check if phi arguments involve arithmetic that overflows. else if (expr->OperIs(GT_PHI)) { @@ -2070,6 +2087,12 @@ Range RangeCheck::ComputeRange(BasicBlock* block, GenTree* expr, bool monIncreas Range op1Range = GetRangeWorker(block, expr->gtGetOp1(), monIncreasing DEBUGARG(indent + 1)); range = RangeOps::Negate(op1Range); } + else if (expr->OperIs(GT_NOT)) + { + // Compute range for not, e.g: [2, 8] -> [-9..-3] + Range op1Range = GetRangeWorker(block, expr->gtGetOp1(), monIncreasing DEBUGARG(indent + 1)); + range = RangeOps::Not(op1Range); + } // If phi, then compute the range for arguments, calling the result "dependent" when looping begins. else if (expr->OperIs(GT_PHI)) { diff --git a/src/coreclr/jit/rangecheck.h b/src/coreclr/jit/rangecheck.h index cab98b29c85b60..6a561291021e41 100644 --- a/src/coreclr/jit/rangecheck.h +++ b/src/coreclr/jit/rangecheck.h @@ -479,6 +479,46 @@ struct RangeOps return Range(Limit(Limit::keUnknown)); } + static Range Xor(const Range& r1, const Range& r2) + { + int r1ConstVal; + int r2ConstVal; + bool r1IsConstVal = r1.IsSingleValueConstant(&r1ConstVal); + bool r2IsConstVal = r2.IsSingleValueConstant(&r2ConstVal); + + // Both ranges are single constant values. + // Example: [5..5] ^ [3..3] = [6..6] + if (r1IsConstVal && r2IsConstVal) + { + return Range(Limit(Limit::keConstant, r1ConstVal ^ r2ConstVal)); + } + + auto isSubToXorValid = [=](uint64_t cns, Range range) { + uint64_t lo = (uint64_t)range.LowerLimit().GetConstant(); + uint64_t hi = (uint64_t)range.UpperLimit().GetConstant(); + uint64_t knownBits = BitOperations::BitsetFromRange(lo, hi); + + // Zero out bits outside of TYPE. This handles cases that rely on overflow (int.MaxValue - x) + uint32_t sizeInBits = genTypeSize(TYP_INT) * BITS_PER_BYTE; + knownBits &= (1ULL << (sizeInBits - 1)) - 1; + + // Can sub be perfomed without carry? + return (cns & knownBits) == knownBits; + }; + + // Example: [3..5] ^ [-1..-1] = [-6..-4] + if (r1IsConstVal && r2.IsConstantRange() && isSubToXorValid(r1ConstVal, r2)) + { + return Subtract(r1, r2); + } + if (r2IsConstVal && r1.IsConstantRange() && isSubToXorValid(r2ConstVal, r1)) + { + return Subtract(r2, r1); + } + + return Range(Limit(Limit::keUnknown)); + } + static Range UnsignedMod(const Range& r1, const Range& r2) { // For X UMOD Y we only handle the case when Y is a fixed positive constant. @@ -661,6 +701,12 @@ struct RangeOps return result; } + static Range Not(const Range& range) + { + Range cns = Limit(Limit::keConstant, -1); + return Subtract(cns, range); + } + //------------------------------------------------------------------------ // EvalRelop: Evaluate the relation between two ranges for the given relop // Example: "x >= y" is AlwaysTrue when "x.LowerLimit() >= y.UpperLimit()" diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index f1380f35a176c0..02eeac5d64a48d 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -3601,6 +3601,29 @@ double BitOperations::UInt64BitsToDouble(uint64_t value) return result; } +//------------------------------------------------------------------------ +// BitOperations::BitsetFromRange: Gets a bitset from OR'ing all numbers in [lo, hi] +// +// Arguments: +// lo - The range minimum. +// hi - The range maximum. +// +// Return Value: +// The bitset +// +uint64_t BitOperations::BitsetFromRange(uint64_t lo, uint64_t hi) +{ + if (lo == hi) + { + return lo; + } + + uint64_t mask = UINT64_MAX >> BitOperations::LeadingZeroCount(lo ^ hi); + mask = lo | mask; + + return mask; +} + namespace MagicDivide { template diff --git a/src/coreclr/jit/utils.h b/src/coreclr/jit/utils.h index 38c819e0672896..e33c77336d07b5 100644 --- a/src/coreclr/jit/utils.h +++ b/src/coreclr/jit/utils.h @@ -1023,6 +1023,8 @@ class BitOperations static float UInt32BitsToSingle(uint32_t value); static double UInt64BitsToDouble(uint64_t value); + + static uint64_t BitsetFromRange(uint64_t lo, uint64_t hi); }; // The CLR requires that critical section locks be initialized via its ClrCreateCriticalSection API...but