From 6e7babdd375f5802552a1001d31506e25f88aaea Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 15 Mar 2026 15:14:42 +0000
Subject: [PATCH 1/5] Initial plan


From 7b7fb8ced5ccb3a46eaeb6e181661361cf296768 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 15 Mar 2026 15:20:20 +0000
Subject: [PATCH 2/5] Broaden wasm cast fence for lossless_cast regression

Co-authored-by: alexreinking <169273+alexreinking@users.noreply.github.com>
---
 src/CodeGen_WebAssembly.cpp        | 26 ++++++++++----------------
 test/correctness/lossless_cast.cpp | 14 ++++++++++++++
 2 files changed, 24 insertions(+), 16 deletions(-)
diff --git a/src/CodeGen_WebAssembly.cpp b/src/CodeGen_WebAssembly.cpp
index 66c043ad32ef..b7c6e03a9784 100644
--- a/src/CodeGen_WebAssembly.cpp
+++ b/src/CodeGen_WebAssembly.cpp
@@ -182,27 +182,21 @@ void CodeGen_WebAssembly::visit(const Cast *op) {
         // WebAssemblyTargetLowering::isVectorLoadExtDesirable assumes the
         // operand of a vector extend is always a load, but LLVM's optimizer may
         // insert a freeze node between the load and the extend, causing a
-        // cast<LoadSDNode> assertion failure. Use an optimization fence to
-        // prevent the DAG combiner from seeing through to the load. See
+        // cast<LoadSDNode> assertion failure. Some expressions that originate
+        // from loads become less obviously load-like after lossless_cast()
+        // rewrites and vectorization, so fence all widening vector integer
+        // casts here to keep the DAG combiner from seeing through to the load.
+        // See
         // https://github.com/halide/Halide/issues/8928 and
         // https://github.com/llvm/llvm-project/issues/184676
         if (op->type.is_int_or_uint() &&
             op->value.type().is_int_or_uint() &&
             op->type.bits() > op->value.type().bits()) {
-            // Check if the value is a Load. Loads are sometimes hiding behind
-            // let bindings.
-            bool is_load = op->value.as<Load>();
-            if (const Variable *var = op->value.as<Variable>()) {
-                llvm::Value *v = sym_get(var->name, false);
-                is_load = v && llvm::isa<llvm::LoadInst>(v);
-            }
-            if (is_load) {
-                llvm::Value *v = codegen(op->value);
-                v = optimization_fence(v);
-                value = builder->CreateIntCast(v, llvm_type_of(op->type),
-                                               op->value.type().is_int());
-                return;
-            }
+            llvm::Value *v = codegen(op->value);
+            v = optimization_fence(v);
+            value = builder->CreateIntCast(v, llvm_type_of(op->type),
+                                           op->value.type().is_int());
+            return;
         }
     }
 
diff --git a/test/correctness/lossless_cast.cpp b/test/correctness/lossless_cast.cpp
index 7633954fb003..9870b1122d02 100644
--- a/test/correctness/lossless_cast.cpp
+++ b/test/correctness/lossless_cast.cpp
@@ -453,6 +453,16 @@ int test_one(uint32_t seed) {
     return 0;
 }
 
+int regression_test() {
+    for (uint32_t seed : {1926104395u, 3082698823u}) {
+        if (test_one(seed)) {
+            std::cout << "lossless_cast regression failed for seed " << seed << "\n";
+            return 1;
+        }
+    }
+    return 0;
+}
+
 int fuzz_test(uint32_t root_seed) {
     std::mt19937 seed_generator(root_seed);
 
@@ -475,6 +485,10 @@ int main(int argc, char **argv) {
         std::cout << "lossless_cast test failed!\n";
         return 1;
     }
+    if (regression_test()) {
+        std::cout << "lossless_cast regression test failed!\n";
+        return 1;
+    }
     if (fuzz_test(time(NULL))) {
         std::cout << "lossless_cast fuzz test failed!\n";
         return 1;

From d9fac995f636517202812551341edfc48bada85c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 15 Mar 2026 16:48:48 +0000
Subject: [PATCH 3/5] Extract lossless_cast regression expressions

Co-authored-by: alexreinking <169273+alexreinking@users.noreply.github.com>
---
 test/correctness/lossless_cast.cpp | 156 ++++++++++++++++++++++-------
 1 file changed, 120 insertions(+), 36 deletions(-)

diff --git a/test/correctness/lossless_cast.cpp b/test/correctness/lossless_cast.cpp
index 9870b1122d02..dec958077e32 100644
--- a/test/correctness/lossless_cast.cpp
+++ b/test/correctness/lossless_cast.cpp
@@ -364,13 +364,7 @@ bool might_have_ub(Expr e) {
 
 bool found_error = false;
 
-int test_one(uint32_t seed) {
-    std::mt19937 rng{seed};
-
-    buf_u8.fill(rng);
-    buf_i8.fill(rng);
-
-    Expr e1 = random_expr(rng);
+int test_case(const char *name, Expr e1, Type target, uint32_t seed = 0) {
     Expr simplified = simplify(e1);
 
     if (might_have_ub(e1) ||
@@ -382,9 +376,6 @@ int test_one(uint32_t seed) {
     // We're also going to test constant_integer_bounds here.
     ConstantInterval bounds = constant_integer_bounds(e1);
 
-    Type target;
-    std::vector<Type> target_types = {UInt(32), Int(32), UInt(16), Int(16)};
-    target = target_types[rng() % target_types.size()];
     Expr e2 = lossless_cast(target, e1);
 
     if (!e2.defined()) {
@@ -393,8 +384,11 @@ int test_one(uint32_t seed) {
 
     if (definitely_has_ub(e2)) {
         std::cout << "lossless_cast introduced ub:\n"
-                  << "seed = " << seed << "\n"
-                  << "e1 = " << e1 << "\n"
+                  << "case = " << name << "\n";
+        if (seed != 0) {
+            std::cout << "seed = " << seed << "\n";
+        }
+        std::cout << "e1 = " << e1 << "\n"
                   << "e2 = " << e2 << "\n"
                   << "simplify(e1) = " << simplify(e1) << "\n"
                   << "simplify(e2) = " << simplify(e2) << "\n";
@@ -417,14 +411,17 @@ int test_one(uint32_t seed) {
         if (out1(x) != out2(x)) {
             std::cout
                 << "lossless_cast failure\n"
-                << "seed = " << seed << "\n"
-                << "x = " << x << "\n"
-                << "buf_u8 = " << (int)buf_u8(x) << "\n"
-                << "buf_i8 = " << (int)buf_i8(x) << "\n"
-                << "out1 = " << out1(x) << "\n"
-                << "out2 = " << out2(x) << "\n"
-                << "Original: " << e1 << "\n"
-                << "Lossless cast: " << e2 << "\n";
+                << "case = " << name << "\n";
+            if (seed != 0) {
+                std::cout << "seed = " << seed << "\n";
+            }
+            std::cout << "x = " << x << "\n"
+                      << "buf_u8 = " << (int)buf_u8(x) << "\n"
+                      << "buf_i8 = " << (int)buf_i8(x) << "\n"
+                      << "out1 = " << out1(x) << "\n"
+                      << "out2 = " << out2(x) << "\n"
+                      << "Original: " << e1 << "\n"
+                      << "Lossless cast: " << e2 << "\n";
             return 1;
         }
     }
@@ -435,17 +432,20 @@ int test_one(uint32_t seed) {
             Expr simplified = simplify(e1);
             std::cout
                 << "constant_integer_bounds failure\n"
-                << "seed = " << seed << "\n"
-                << "x = " << x << "\n"
-                << "buf_u8 = " << (int)buf_u8(x) << "\n"
-                << "buf_i8 = " << (int)buf_i8(x) << "\n"
-                << "out1 = " << out1(x) << "\n"
-                << "Expression: " << e1 << "\n"
-                << "Bounds: " << bounds << "\n"
-                << "Simplified: " << simplified << "\n"
-                // If it's still out-of-bounds when the expression is
-                // simplified, that'll be easier to debug.
-                << "Bounds: " << constant_integer_bounds(simplified) << "\n";
+                << "case = " << name << "\n";
+            if (seed != 0) {
+                std::cout << "seed = " << seed << "\n";
+            }
+            std::cout << "x = " << x << "\n"
+                      << "buf_u8 = " << (int)buf_u8(x) << "\n"
+                      << "buf_i8 = " << (int)buf_i8(x) << "\n"
+                      << "out1 = " << out1(x) << "\n"
+                      << "Expression: " << e1 << "\n"
+                      << "Bounds: " << bounds << "\n"
+                      << "Simplified: " << simplified << "\n"
+                      // If it's still out-of-bounds when the expression is
+                      // simplified, that'll be easier to debug.
+                      << "Bounds: " << constant_integer_bounds(simplified) << "\n";
             return 1;
         }
     }
@@ -453,12 +453,96 @@ int test_one(uint32_t seed) {
     return 0;
 }
 
+int test_one(uint32_t seed) {
+    std::mt19937 rng{seed};
+
+    buf_u8.fill(rng);
+    buf_i8.fill(rng);
+
+    Expr e1 = random_expr(rng);
+
+    Type target;
+    std::vector<Type> target_types = {UInt(32), Int(32), UInt(16), Int(16)};
+    target = target_types[rng() % target_types.size()];
+
+    return test_case("fuzz", e1, target, seed);
+}
+
+Expr regression_expr_1926104395() {
+    Expr u8_88 = cast<uint8_t>(88);
+    Expr u8_173 = cast<uint8_t>(173);
+    Expr i8_122 = cast<int8_t>(122);
+    Expr i8_112 = cast<int8_t>(112);
+
+    Expr div_173_88 = u8_173 / cast(UInt(8), u8_88);
+    Expr twice_div_173_88 = div_173_88 + cast(UInt(8), div_173_88);
+    Expr halving = halving_sub(buf_u8(x), cast(UInt(8), buf_i8(x)));
+    Expr mul = cast(Int(16), u8_88 * cast(UInt(8), u8_173)) +
+               cast(Int(16), halving * cast(UInt(8), twice_div_173_88));
+    Expr shifted = mul_shift_right(mul, cast(Int(16), i8_122), cast(UInt(16), i8_122));
+    Expr rounding = rounding_shift_right(u8_88 * cast(UInt(8), u8_173),
+                                         cast(UInt(8), u8_88 + cast(UInt(8), i8_112)));
+
+    return mul_shift_right(shifted - cast(Int(32), halving),
+                           cast(Int(32), twice_div_173_88 / cast(UInt(8), shifted)),
+                           cast(UInt(32), rounding /
+                                             cast(UInt(8), buf_i8(x) /
+                                                               cast(Int(8), div_173_88))));
+}
+
+Expr regression_expr_3082698823() {
+    Expr i8_106 = cast<int8_t>(106);
+    Expr u8_191 = cast<uint8_t>(191);
+
+    Expr div = i8_106 / cast(Int(8), buf_i8(x));
+    Expr shifted = mul_shift_right(cast(Int(16), i8_106),
+                                   cast(Int(16), buf_i8(x)),
+                                   cast(UInt(16), u8_191 + cast(UInt(8), div)));
+    Expr sub = cast(Int(16), div - cast(Int(8), u8_191));
+
+    return cast(Int(64), shifted) *
+           cast(Int(64), cast(Int(32), sub - cast(Int(16), count_leading_zeros(sub))));
+}
+
+int regression_case(const char *name, Expr e1, Type target) {
+    Expr simplified = simplify(e1);
+    if (might_have_ub(e1) ||
+        might_have_ub(simplified) ||
+        might_have_ub(lower_intrinsics(simplified))) {
+        std::cout << "regression case unexpectedly has ub: " << name << "\n"
+                  << "e1 = " << e1 << "\n"
+                  << "simplify(e1) = " << simplified << "\n";
+        return 1;
+    }
+
+    Expr e2 = lossless_cast(target, e1);
+    if (!e2.defined()) {
+        std::cout << "regression case unexpectedly cannot be narrowed: " << name << "\n"
+                  << "target = " << target << "\n"
+                  << "e1 = " << e1 << "\n";
+        return 1;
+    }
+
+    Func f;
+    f(x) = {cast<int64_t>(e1), cast<int64_t>(e2)};
+    f.vectorize(x, 4, TailStrategy::RoundUp);
+
+    Pipeline p(f);
+    Target t = get_jit_target_from_environment();
+    Module m = p.compile_to_module({}, name, t);
+    (void)m;
+
+    return 0;
+}
+
 int regression_test() {
-    for (uint32_t seed : {1926104395u, 3082698823u}) {
-        if (test_one(seed)) {
-            std::cout << "lossless_cast regression failed for seed " << seed << "\n";
-            return 1;
-        }
+    if (regression_case("seed 1926104395", regression_expr_1926104395(), Int(32))) {
+        std::cout << "lossless_cast regression failed for case seed 1926104395\n";
+        return 1;
+    }
+    if (regression_case("seed 3082698823", regression_expr_3082698823(), Int(16))) {
+        std::cout << "lossless_cast regression failed for case seed 3082698823\n";
+        return 1;
     }
     return 0;
 }

From 5ec3ea9a0e87fb0b2b4cf4e9b5db4bd36397e676 Mon Sep 17 00:00:00 2001
From: "halide-ci[bot]" <266445882+halide-ci[bot]@users.noreply.github.com>
Date: Sun, 15 Mar 2026 16:52:11 +0000
Subject: [PATCH 4/5] Apply pre-commit auto-fixes

---
 test/correctness/lossless_cast.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/correctness/lossless_cast.cpp b/test/correctness/lossless_cast.cpp
index dec958077e32..3193a823fb77 100644
--- a/test/correctness/lossless_cast.cpp
+++ b/test/correctness/lossless_cast.cpp
@@ -486,8 +486,8 @@ Expr regression_expr_1926104395() {
     return mul_shift_right(shifted - cast(Int(32), halving),
                            cast(Int(32), twice_div_173_88 / cast(UInt(8), shifted)),
                            cast(UInt(32), rounding /
-                                             cast(UInt(8), buf_i8(x) /
-                                                               cast(Int(8), div_173_88))));
+                                              cast(UInt(8), buf_i8(x) /
+                                                                cast(Int(8), div_173_88))));
 }
 
 Expr regression_expr_3082698823() {

From 611b60cc8681d73722034126426221133e18dd52 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 15 Mar 2026 18:00:02 +0000
Subject: [PATCH 5/5] Fix extracted lossless_cast regression harness

Co-authored-by: alexreinking <169273+alexreinking@users.noreply.github.com>
---
 test/correctness/lossless_cast.cpp | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/test/correctness/lossless_cast.cpp b/test/correctness/lossless_cast.cpp
index 3193a823fb77..99b354d8a715 100644
--- a/test/correctness/lossless_cast.cpp
+++ b/test/correctness/lossless_cast.cpp
@@ -505,16 +505,6 @@ Expr regression_expr_3082698823() {
 }
 
 int regression_case(const char *name, Expr e1, Type target) {
-    Expr simplified = simplify(e1);
-    if (might_have_ub(e1) ||
-        might_have_ub(simplified) ||
-        might_have_ub(lower_intrinsics(simplified))) {
-        std::cout << "regression case unexpectedly has ub: " << name << "\n"
-                  << "e1 = " << e1 << "\n"
-                  << "simplify(e1) = " << simplified << "\n";
-        return 1;
-    }
-
     Expr e2 = lossless_cast(target, e1);
     if (!e2.defined()) {
         std::cout << "regression case unexpectedly cannot be narrowed: " << name << "\n"