From e515ef985325ba91324be6ba660c7285c2e67cb8 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 13 Sep 2022 15:21:28 -0700 Subject: [PATCH 01/13] start --- src/passes/OptimizeInstructions.cpp | 60 ++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 286bfd2b6ef..817cdc07b05 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -3612,10 +3612,22 @@ struct OptimizeInstructions bool canOverflow(Binary* binary) { using namespace Abstract; - // If we know nothing about a limit on the amount of bits on either side, - // give up. auto typeMaxBits = getBitsForType(binary->type); auto leftMaxBits = Bits::getMaxBits(binary->left, this); + + if (binary->op == getBinary(binary->type, Shl)) { + // The case of a constant shift is easy to reason about. + if (auto* c = binary->right->dynCast()) { + auto shifts = Bits::getEffectiveShifts(c); + return leftMaxBits + shifts > typeMaxBits; + } + + // TODO: check based on rightMaxBits, though it may only help rarely + return true; + } + + // If we know nothing about a limit on the amount of bits on either side, + // give up, as the following patterns depend on that. auto rightMaxBits = Bits::getMaxBits(binary->right, this); if (std::max(leftMaxBits, rightMaxBits) == typeMaxBits) { return true; @@ -3947,10 +3959,8 @@ struct OptimizeInstructions Binary* add; Const* c1; Const* c2; - if ((matches(curr, - binary(binary(&add, Add, any(), ival(&c1)), ival(&c2))) || - matches(curr, - binary(binary(&add, Add, any(), ival(&c1)), ival(&c2)))) && + if (matches(curr, + binary(binary(&add, Add, any(), ival(&c1)), ival(&c2))) && !canOverflow(add)) { if (c2->value.geU(c1->value).getInteger()) { // This is the first line above, we turn into x > (C2-C1) @@ -3971,6 +3981,44 @@ struct OptimizeInstructions } } + // (x >>> C1) ? C2 -> x ? (C2 << C1) + A if no overflowing + // (see details about the adjustment A below; it depends on the op) + { + Binary* shift; + Const* c1; + Const* c2; + if (matches(curr, + binary(binary(&shift, ShrU, any(), ival(&c1)), ival(&c2)))) { + // Construct a reversed shift and see if it would overflow. + Binary reversedShift; + reversedShift.op = Abstract::getBinary(c1->type, Shl); + reversedShift.left = c2; + reversedShift.right = c1; + if (!canOverflow(&reversedShift)) { + // Great, the reversed shift would not overflow, so we can in + // principle try to reverse the operation here. However, an + // adjustment is needed since the original ShrU is not a linear + // operation - it clears the lower bits. In particular, consider + // + // (x >>> C1) < C2 + // + // If x < (C2 << C1) then the original equation is true, but also, + // x can be even higher and it will still be true, since we can fill + // in those lower bits that would have been clears, ending up with + // + // x < (C2 << C1) + (1 << C1) - 1 + // + auto lowBits = Literal::makeFromInt64(1, c1->type); + lowBits = lowBits.shl(c1->value); + lowBits = loBits.sub(Literal::makeFromInt64(1, c1->type)); + c2->value = c2->value.shl(c1->value); + c2->value = c2->value.add(c1->value); + curr->left = shift->left; + return curr; + } + } + } + // Comparisons can sometimes be simplified depending on the number of // bits, e.g. (unsigned)x > y must be true if x has strictly more bits. // A common case is a constant on the right, e.g. (x & 255) < 256 must be From 6a4c8aa7d0541cb82bd43e0da6ff2db4852c4569 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 20 Sep 2022 12:49:33 -0700 Subject: [PATCH 02/13] work --- src/ir/bits.h | 8 ++ src/passes/OptimizeInstructions.cpp | 113 ++++++++++++++++++++++------ 2 files changed, 96 insertions(+), 25 deletions(-) diff --git a/src/ir/bits.h b/src/ir/bits.h index 25d80fba74b..c4b8a76de18 100644 --- a/src/ir/bits.h +++ b/src/ir/bits.h @@ -34,6 +34,14 @@ inline int32_t lowBitMask(int32_t bits) { return ret >> (32 - bits); } +inline int64_t lowBitMask64(int64_t bits) { + uint64_t ret = -1; + if (bits >= 64) { + return ret; + } + return ret >> (64 - bits); +} + // checks if the input is a mask of lower bits, i.e., all 1s up to some high // bit, and all zeros from there. returns the number of masked bits, or 0 if // this is not such a mask diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 8f9b952c16d..4eac882eeb9 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -3967,40 +3967,103 @@ struct OptimizeInstructions } } - // (x >>> C1) ? C2 -> x ? (C2 << C1) + A if no overflowing - // (see details about the adjustment A below; it depends on the op) + // (x >> C1) ? C2 -> x ? (C2 << C1) if no overflowing { Binary* shift; Const* c1; Const* c2; if (matches(curr, - binary(binary(&shift, ShrU, any(), ival(&c1)), ival(&c2)))) { - // Construct a reversed shift and see if it would overflow. - Binary reversedShift; - reversedShift.op = Abstract::getBinary(c1->type, Shl); - reversedShift.left = c2; - reversedShift.right = c1; - if (!canOverflow(&reversedShift)) { - // Great, the reversed shift would not overflow, so we can in - // principle try to reverse the operation here. However, an - // adjustment is needed since the original ShrU is not a linear - // operation - it clears the lower bits. In particular, consider + binary(binary(&shift, ShrS, any(), ival(&c1)), ival(&c2)))) { + // Construct a reversed shift and see if it can overflow or change the + // sign. + auto shifts = Bits::getEffectiveShifts(c1); + auto c2MaxBits = Bits::getMaxBits(c2, this); + auto typeMaxBits = getBitsForType(type); + if (c2MaxBits + shifts < typeMaxBits) { + // Great, the reversed shift is in a range that does not cause any + // problems, so we can in principle try to reverse the operation + // by shifting both sides to the left, which will "undo" the + // existing shift on x. However, an adjustment is needed since the + // original Shr is not a linear operation - it clears the lower + // That is, after the new shift, we have // - // (x >>> C1) < C2 + // ((x >> C1) << C1) ? (C2 << C1) + // (x & mask) ? (C2 << C1) // - // If x < (C2 << C1) then the original equation is true, but also, - // x can be even higher and it will still be true, since we can fill - // in those lower bits that would have been clears, ending up with + // Note that we don't want to optimize to this pattern, as it is + // strictly larger than the original (the mask is a larger constant + // than the shift, and the new constant on the right is larger). So + // we only want to optimize here if we can reduce this further, + // which we can in some cases. // - // x < (C2 << C1) + (1 << C1) - 1 + // To implement the necessary adjustment, consider that the mask + // makes the lower bits not matter. In other words, we are rounding + // x down, and comparing it to a number that is similarly rounded + // (since it is the result of a left shift). As a result, an + // adjustment may be needed as a larger or smaller x may be + // possible, e.g.: // - auto lowBits = Literal::makeFromInt64(1, c1->type); - lowBits = lowBits.shl(c1->value); - lowBits = loBits.sub(Literal::makeFromInt64(1, c1->type)); - c2->value = c2->value.shl(c1->value); - c2->value = c2->value.add(c1->value); - curr->left = shift->left; - return curr; + // signed(x & -4) < (100 << 2) = 400 + // + // Consider x = 400. It has no lower bits to mask off, and 400 < 400 + // which is false. For anything less than 400 the mask can only + // decrease x, so + // + // (x & -4) < x < 400 + // + // Thus we can optimize this to x < 400, and no special adjustment + // is necessary. However, consider <= instead of <: + // + // signed(x & -4) <= 400 + // + // x = 400 results in 400 <= 400 which is true. But x can also be + // larger since the bits would get masked off. That is, for x in + // [401, 403], we get (x & -4) == 400. Only 404 would be too large. + // And so we can optimize to x <= 403, basically adding the bits to + // the constant on the right. + // + // Note that we cannot optimize == or != here, as e.g. + // + // (x & -4) == 400 + // + // is true for all of [400, 403]. Only when we have a range can we + // extend the range with an adjustment, basically. + auto moveShift = [&]() { + // Helper function to remove the shift on the left and add a shift + // onto the constant on the right, + // (x >> C1) ? C2 => x ? (C2 << C1) + binary->left = shift->left; + c2->value = c2->value.shl(c1->value); + }; + auto orLowerBits [&]() { + c2->value = c->value.or_(Literal::fromInt64(Bits::lowBitMask64(shifts), type)); + }; + if (binary->op == Abstract::getBinary(type, LtS)) { + // Explained above. + moveShift(); + return curr; + } else if (binary->op == Abstract::getBinary(type, LeS)) { + // Explained above. + moveShift(); + orLowerBits(); + return curr; + } if (binary->op == Abstract::getBinary(type, GtS)) { + // E.g. + // signed(x & -4) > (100 << 2) = 400 + // signed(x & -4) >= 401 + // x & -4 is rounded down to a multiple of 4, so this is only true + // when x > 403. + moveShift(); + orLowerBits(); + return curr; + } else if (binary->op == Abstract::getBinary(type, LeS)) { + // E.g. + // signed(x & -4) >= (100 << 2) = 400 + // x & -4 is rounded down to a multiple of 4, so this is only true + // when x >= 400, and no adjustment is needed. + moveShift(); + return curr; + } } } } From 462e1b284c402cf12c789c9b779b6a94d18e7d69 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 20 Sep 2022 13:10:27 -0700 Subject: [PATCH 03/13] builds --- src/passes/OptimizeInstructions.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 4eac882eeb9..45d887c253b 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -4031,23 +4031,23 @@ struct OptimizeInstructions auto moveShift = [&]() { // Helper function to remove the shift on the left and add a shift // onto the constant on the right, - // (x >> C1) ? C2 => x ? (C2 << C1) - binary->left = shift->left; + // (x >> C1) ? C2 => x ? (C2 << C1) + curr->left = shift->left; c2->value = c2->value.shl(c1->value); }; - auto orLowerBits [&]() { - c2->value = c->value.or_(Literal::fromInt64(Bits::lowBitMask64(shifts), type)); + auto orLowerBits = [&]() { + c2->value = c2->value.or_(Literal::makeFromInt64(Bits::lowBitMask64(shifts), type)); }; - if (binary->op == Abstract::getBinary(type, LtS)) { + if (curr->op == Abstract::getBinary(type, LtS)) { // Explained above. moveShift(); return curr; - } else if (binary->op == Abstract::getBinary(type, LeS)) { + } else if (curr->op == Abstract::getBinary(type, LeS)) { // Explained above. moveShift(); orLowerBits(); return curr; - } if (binary->op == Abstract::getBinary(type, GtS)) { + } if (curr->op == Abstract::getBinary(type, GtS)) { // E.g. // signed(x & -4) > (100 << 2) = 400 // signed(x & -4) >= 401 @@ -4056,7 +4056,7 @@ struct OptimizeInstructions moveShift(); orLowerBits(); return curr; - } else if (binary->op == Abstract::getBinary(type, LeS)) { + } else if (curr->op == Abstract::getBinary(type, LeS)) { // E.g. // signed(x & -4) >= (100 << 2) = 400 // x & -4 is rounded down to a multiple of 4, so this is only true From 3bc744387647049dc4ece55e0519ba6c6b1e9cb4 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 20 Sep 2022 13:10:47 -0700 Subject: [PATCH 04/13] foamt --- src/passes/OptimizeInstructions.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 45d887c253b..b3b65bb9f29 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -3946,7 +3946,7 @@ struct OptimizeInstructions Const* c1; Const* c2; if (matches(curr, - binary(binary(&add, Add, any(), ival(&c1)), ival(&c2))) && + binary(binary(&add, Add, any(), ival(&c1)), ival(&c2))) && !canOverflow(add)) { if (c2->value.geU(c1->value).getInteger()) { // This is the first line above, we turn into x > (C2-C1) @@ -3972,8 +3972,9 @@ struct OptimizeInstructions Binary* shift; Const* c1; Const* c2; - if (matches(curr, - binary(binary(&shift, ShrS, any(), ival(&c1)), ival(&c2)))) { + if (matches( + curr, + binary(binary(&shift, ShrS, any(), ival(&c1)), ival(&c2)))) { // Construct a reversed shift and see if it can overflow or change the // sign. auto shifts = Bits::getEffectiveShifts(c1); @@ -4036,7 +4037,8 @@ struct OptimizeInstructions c2->value = c2->value.shl(c1->value); }; auto orLowerBits = [&]() { - c2->value = c2->value.or_(Literal::makeFromInt64(Bits::lowBitMask64(shifts), type)); + c2->value = c2->value.or_( + Literal::makeFromInt64(Bits::lowBitMask64(shifts), type)); }; if (curr->op == Abstract::getBinary(type, LtS)) { // Explained above. @@ -4047,7 +4049,8 @@ struct OptimizeInstructions moveShift(); orLowerBits(); return curr; - } if (curr->op == Abstract::getBinary(type, GtS)) { + } + if (curr->op == Abstract::getBinary(type, GtS)) { // E.g. // signed(x & -4) > (100 << 2) = 400 // signed(x & -4) >= 401 From 9a5aad72fe41c369e19465fd0d578ee20a47dacc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 20 Sep 2022 13:15:03 -0700 Subject: [PATCH 05/13] test --- test/lit/passes/optimize-instructions.wast | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/test/lit/passes/optimize-instructions.wast b/test/lit/passes/optimize-instructions.wast index c1835442d34..7907e985b58 100644 --- a/test/lit/passes/optimize-instructions.wast +++ b/test/lit/passes/optimize-instructions.wast @@ -2638,11 +2638,8 @@ ) ;; CHECK: (func $lt_s-sext-zero (param $0 i32) (result i32) ;; CHECK-NEXT: (i32.lt_s - ;; CHECK-NEXT: (i32.shr_s - ;; CHECK-NEXT: (i32.shl - ;; CHECK-NEXT: (local.get $0) - ;; CHECK-NEXT: (i32.const 24) - ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.shl + ;; CHECK-NEXT: (local.get $0) ;; CHECK-NEXT: (i32.const 24) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (i32.const 0) From 92e334eba3b63c8763280ff544f15600a0745af6 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 20 Sep 2022 13:40:01 -0700 Subject: [PATCH 06/13] comments --- src/passes/OptimizeInstructions.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index b3b65bb9f29..d77ab32a988 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -3967,7 +3967,8 @@ struct OptimizeInstructions } } - // (x >> C1) ? C2 -> x ? (C2 << C1) if no overflowing + // (x >> C1) ? C2 -> x ? (C2 << C1) if no overflow in << + // This may require an adjustment to the constant on the right, see below. { Binary* shift; Const* c1; @@ -3975,8 +3976,11 @@ struct OptimizeInstructions if (matches( curr, binary(binary(&shift, ShrS, any(), ival(&c1)), ival(&c2)))) { - // Construct a reversed shift and see if it can overflow or change the - // sign. + // Consider C2 << C1 and see if it can overflow or even change the + // sign. If it can't, then we know that removing the shift from the + // left side does not change the sign there (it's a signed-shift- + // right) and since neither does the left change sign, we can move the + // shift to the right side without altering the result. auto shifts = Bits::getEffectiveShifts(c1); auto c2MaxBits = Bits::getMaxBits(c2, this); auto typeMaxBits = getBitsForType(type); @@ -3984,11 +3988,15 @@ struct OptimizeInstructions // Great, the reversed shift is in a range that does not cause any // problems, so we can in principle try to reverse the operation // by shifting both sides to the left, which will "undo" the - // existing shift on x. However, an adjustment is needed since the - // original Shr is not a linear operation - it clears the lower - // That is, after the new shift, we have + // existing shift on x, causing us to only have a shift on the + // right: // // ((x >> C1) << C1) ? (C2 << C1) + // + // However, an adjustment may be needed since the original Shr is + // not a linear operation - it clears the lower bits. That is, after + // the new shift, we have: + // // (x & mask) ? (C2 << C1) // // Note that we don't want to optimize to this pattern, as it is From 55064528a293b63e35daa6b277f9d6dc33e01a2f Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 20 Sep 2022 13:49:00 -0700 Subject: [PATCH 07/13] test --- src/passes/OptimizeInstructions.cpp | 6 +- .../passes/optimize-instructions-shifts.wast | 72 +++++++++++++++++++ 2 files changed, 75 insertions(+), 3 deletions(-) create mode 100644 test/lit/passes/optimize-instructions-shifts.wast diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index d77ab32a988..0dca0cba599 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -4010,13 +4010,13 @@ struct OptimizeInstructions // x down, and comparing it to a number that is similarly rounded // (since it is the result of a left shift). As a result, an // adjustment may be needed as a larger or smaller x may be - // possible, e.g.: + // possible, e.g., consider for < : // // signed(x & -4) < (100 << 2) = 400 // // Consider x = 400. It has no lower bits to mask off, and 400 < 400 - // which is false. For anything less than 400 the mask can only - // decrease x, so + // which is false. For anything less than 400 the mask will round + // x down to something less than 400, so this will be true: // // (x & -4) < x < 400 // diff --git a/test/lit/passes/optimize-instructions-shifts.wast b/test/lit/passes/optimize-instructions-shifts.wast new file mode 100644 index 00000000000..fa61d64f366 --- /dev/null +++ b/test/lit/passes/optimize-instructions-shifts.wast @@ -0,0 +1,72 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s --optimize-instructions -S -o - | filecheck %s + +(module + (func $less-than-shifted (param $x i32) (param $y i64) + ;; (x >> 2) < 100 => x < 400 + (drop + (i32.lt_s + (i32.shr_s + (local.get $x) + (i32.const 2) + ) + (i32.const 100) + ) + ) + ;; As above, but with i64. + (drop + (i64.lt_s + (i64.shr_s + (local.get $y) + (i64.const 2) + ) + (i64.const 100) + ) + ) + ;; Borderline values: we don't want the constant on the right, when shifted + ;; by the number of shifts, to become signed, as that might alter the + ;; result. This case can be optimized, and the one after it not. + (drop + (i32.lt_s + (i32.shr_s + (local.get $x) + (i32.const 22) + ) + (i32.const 255) + ) + ) + (drop + (i32.lt_s + (i32.shr_s + (local.get $x) + (i32.const 23) + ) + (i32.const 255) + ) + ) + ) + + (func $less-than-shifted-todo (param $x i32) + ;; We don't optimize these yet. + ;; This comparison is unsigned. + (drop + (i32.lt_u + (i32.shr_s + (local.get $x) + (i32.const 2) + ) + (i32.const 100) + ) + ) + ;; This shift is unsigned. + (drop + (i32.lt_s + (i32.shr_u + (local.get $x) + (i32.const 2) + ) + (i32.const 100) + ) + ) + ) +) From e29eef6ba78e8f67f16f3f05182a69aebc90481b Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 20 Sep 2022 13:49:53 -0700 Subject: [PATCH 08/13] bad --- .../passes/optimize-instructions-shifts.wast | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/test/lit/passes/optimize-instructions-shifts.wast b/test/lit/passes/optimize-instructions-shifts.wast index fa61d64f366..53c51792ba0 100644 --- a/test/lit/passes/optimize-instructions-shifts.wast +++ b/test/lit/passes/optimize-instructions-shifts.wast @@ -2,6 +2,32 @@ ;; RUN: wasm-opt %s --optimize-instructions -S -o - | filecheck %s (module + ;; CHECK: (func $less-than-shifted (param $x i32) (param $y i64) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.lt_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 400) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i64.lt_s + ;; CHECK-NEXT: (local.get $y) + ;; CHECK-NEXT: (i64.const 400) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.lt_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 1069547520) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.lt_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 2139095040) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $less-than-shifted (param $x i32) (param $y i64) ;; (x >> 2) < 100 => x < 400 (drop @@ -46,6 +72,26 @@ ) ) + ;; CHECK: (func $less-than-shifted-todo (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.lt_u + ;; CHECK-NEXT: (i32.shr_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 100) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.lt_u + ;; CHECK-NEXT: (i32.shr_u + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 100) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) (func $less-than-shifted-todo (param $x i32) ;; We don't optimize these yet. ;; This comparison is unsigned. From bd619c5dd60d533ccdfb7791605d64ef853afe78 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 20 Sep 2022 14:01:09 -0700 Subject: [PATCH 09/13] almost --- src/passes/OptimizeInstructions.cpp | 2 +- .../passes/optimize-instructions-shifts.wast | 115 +++++++++++++++--- 2 files changed, 101 insertions(+), 16 deletions(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 0dca0cba599..70f2376bfc5 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -4067,7 +4067,7 @@ struct OptimizeInstructions moveShift(); orLowerBits(); return curr; - } else if (curr->op == Abstract::getBinary(type, LeS)) { + } else if (curr->op == Abstract::getBinary(type, GeS)) { // E.g. // signed(x & -4) >= (100 << 2) = 400 // x & -4 is rounded down to a multiple of 4, so this is only true diff --git a/test/lit/passes/optimize-instructions-shifts.wast b/test/lit/passes/optimize-instructions-shifts.wast index 53c51792ba0..1dc69133f1a 100644 --- a/test/lit/passes/optimize-instructions-shifts.wast +++ b/test/lit/passes/optimize-instructions-shifts.wast @@ -15,18 +15,6 @@ ;; CHECK-NEXT: (i64.const 400) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.lt_s - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (i32.const 1069547520) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (i32.lt_s - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (i32.const 2139095040) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $less-than-shifted (param $x i32) (param $y i64) ;; (x >> 2) < 100 => x < 400 @@ -49,14 +37,43 @@ (i64.const 100) ) ) + ) + + ;; CHECK: (func $less-than-shifted-overflow (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.lt_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 2139095040) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.lt_s + ;; CHECK-NEXT: (i32.shr_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 24) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 255) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.lt_s + ;; CHECK-NEXT: (i32.shr_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 25) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 255) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $less-than-shifted-overflow (param $x i32) ;; Borderline values: we don't want the constant on the right, when shifted ;; by the number of shifts, to become signed, as that might alter the - ;; result. This case can be optimized, and the one after it not. + ;; result. This case can be optimized, and the ones after it not. (drop (i32.lt_s (i32.shr_s (local.get $x) - (i32.const 22) + (i32.const 23) ) (i32.const 255) ) @@ -65,7 +82,16 @@ (i32.lt_s (i32.shr_s (local.get $x) - (i32.const 23) + (i32.const 24) + ) + (i32.const 255) + ) + ) + (drop + (i32.lt_s + (i32.shr_s + (local.get $x) + (i32.const 25) ) (i32.const 255) ) @@ -115,4 +141,63 @@ ) ) ) + + ;; CHECK: (func $other-comparisons (param $x i32) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.le_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 403) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.gt_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 403) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (i32.ge_s + ;; CHECK-NEXT: (i32.shr_s + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 2) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (i32.const 100) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $other-comparisons (param $x i32) + ;; <= : + ;; (x >> 2) <= 100 => x <= 403 + (drop + (i32.le_s + (i32.shr_s + (local.get $x) + (i32.const 2) + ) + (i32.const 100) + ) + ) + ;; > : + ;; (x >> 2) > 100 => x > 403 + (drop + (i32.gt_s + (i32.shr_s + (local.get $x) + (i32.const 2) + ) + (i32.const 100) + ) + ) + ;; >= : + ;; (x >> 2) >= 100 => x >= 400 + (drop + (i32.ge_s + (i32.shr_s + (local.get $x) + (i32.const 2) + ) + (i32.const 100) + ) + ) + ) ) From 91dccde4a9a9c393c55eb822dd8b6ed092d3aed9 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 20 Sep 2022 14:03:28 -0700 Subject: [PATCH 10/13] fix --- test/lit/passes/optimize-instructions-shifts.wast | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/test/lit/passes/optimize-instructions-shifts.wast b/test/lit/passes/optimize-instructions-shifts.wast index 1dc69133f1a..1ed271ae148 100644 --- a/test/lit/passes/optimize-instructions-shifts.wast +++ b/test/lit/passes/optimize-instructions-shifts.wast @@ -157,11 +157,8 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (i32.ge_s - ;; CHECK-NEXT: (i32.shr_s - ;; CHECK-NEXT: (local.get $x) - ;; CHECK-NEXT: (i32.const 2) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (i32.const 100) + ;; CHECK-NEXT: (local.get $x) + ;; CHECK-NEXT: (i32.const 400) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) From 582bf6c2446389f6f80aeb8ffe65afd2d5aa41b1 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 21 Sep 2022 13:38:20 -0700 Subject: [PATCH 11/13] fix --- src/passes/OptimizeInstructions.cpp | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 70f2376bfc5..ad7a5966fb3 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -3598,22 +3598,10 @@ struct OptimizeInstructions bool canOverflow(Binary* binary) { using namespace Abstract; + // If we know nothing about a limit on the amount of bits on either side, + // give up. auto typeMaxBits = getBitsForType(binary->type); auto leftMaxBits = Bits::getMaxBits(binary->left, this); - - if (binary->op == getBinary(binary->type, Shl)) { - // The case of a constant shift is easy to reason about. - if (auto* c = binary->right->dynCast()) { - auto shifts = Bits::getEffectiveShifts(c); - return leftMaxBits + shifts > typeMaxBits; - } - - // TODO: check based on rightMaxBits, though it may only help rarely - return true; - } - - // If we know nothing about a limit on the amount of bits on either side, - // give up, as the following patterns depend on that. auto rightMaxBits = Bits::getMaxBits(binary->right, this); if (std::max(leftMaxBits, rightMaxBits) == typeMaxBits) { return true; From 14d09dbd54139455cda3c7e0d52875beafed4728 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 21 Sep 2022 13:45:31 -0700 Subject: [PATCH 12/13] comments --- src/passes/OptimizeInstructions.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index ad7a5966fb3..3e3a2cfbfac 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -3957,6 +3957,7 @@ struct OptimizeInstructions // (x >> C1) ? C2 -> x ? (C2 << C1) if no overflow in << // This may require an adjustment to the constant on the right, see below. + // TODO: unsigned shift { Binary* shift; Const* c1; @@ -4009,7 +4010,7 @@ struct OptimizeInstructions // (x & -4) < x < 400 // // Thus we can optimize this to x < 400, and no special adjustment - // is necessary. However, consider <= instead of <: + // is necessary. However, consider <= instead of < : // // signed(x & -4) <= 400 // From 3e7493e8945e6d6a200d1304cd635379363dc683 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 21 Sep 2022 13:46:01 -0700 Subject: [PATCH 13/13] fix --- src/passes/OptimizeInstructions.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 3e3a2cfbfac..f4c20b287b3 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -4046,8 +4046,7 @@ struct OptimizeInstructions moveShift(); orLowerBits(); return curr; - } - if (curr->op == Abstract::getBinary(type, GtS)) { + } else if (curr->op == Abstract::getBinary(type, GtS)) { // E.g. // signed(x & -4) > (100 << 2) = 400 // signed(x & -4) >= 401