From 7a70ff164af4ef7a9b38867f3960a2a587d28970 Mon Sep 17 00:00:00 2001
From: Alex Reinking <areinking@adobe.com>
Date: Mon, 16 Mar 2026 22:24:15 -0400
Subject: [PATCH 01/12] FuzzingContext: add operator() and initialize_list
 support

---
 test/fuzz/fuzz_helpers.h | 56 +++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 27 deletions(-)
diff --git a/test/fuzz/fuzz_helpers.h b/test/fuzz/fuzz_helpers.h
index 5b9740070be8..21017ec2ffa4 100644
--- a/test/fuzz/fuzz_helpers.h
+++ b/test/fuzz/fuzz_helpers.h
@@ -1,47 +1,35 @@
 #ifndef HALIDE_FUZZ_HELPERS_H_
 #define HALIDE_FUZZ_HELPERS_H_
 
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
 #define HALIDE_FUZZER_BACKEND_STDLIB 0
 #define HALIDE_FUZZER_BACKEND_LIBFUZZER 1
 
 #ifndef HALIDE_FUZZER_BACKEND
-#error "HALIDE_FUZZER_BACKEND not defined, defaulting to libFuzzer"
+#error "HALIDE_FUZZER_BACKEND not defined. Set to either HALIDE_FUZZER_BACKEND_STDLIB or HALIDE_FUZZER_BACKEND_LIBFUZZER."
 #endif
 
-///////////////////////////////////////////////////////////////////////////////
-
-#include <cstddef>
-#include <cstdint>
-#include <limits>
-#include <type_traits>
-#include <vector>
-
 #if HALIDE_FUZZER_BACKEND == HALIDE_FUZZER_BACKEND_LIBFUZZER
 #include "fuzzer/FuzzedDataProvider.h"  // IWYU pragma: export
 #elif HALIDE_FUZZER_BACKEND == HALIDE_FUZZER_BACKEND_STDLIB
-#include "halide_fuzz_main.h"
+#include "halide_fuzz_main.h"  // IWYU pragma: export
 #include <random>
+#else
+#error "HALIDE_FUZZER_BACKEND must be set to either HALIDE_FUZZER_BACKEND_STDLIB or HALIDE_FUZZER_BACKEND_LIBFUZZER."
 #endif
 
 namespace Halide {
 
-#if HALIDE_FUZZER_BACKEND == HALIDE_FUZZER_BACKEND_LIBFUZZER
-class FuzzingContext : public FuzzedDataProvider {
-public:
-    using FuzzedDataProvider::FuzzedDataProvider;
-    template<typename T>
-    T PickValueInVector(std::vector<T> &vec) {
-        return vec[ConsumeIntegralInRange<std::size_t>(0, vec.size() - 1)];
-    }
-};
-#elif HALIDE_FUZZER_BACKEND == HALIDE_FUZZER_BACKEND_STDLIB
-// IMPORTANT: we don't use std::*_distribution because they are not portable across standard libraries
-class FuzzingContext {
+#if HALIDE_FUZZER_BACKEND == HALIDE_FUZZER_BACKEND_STDLIB
+class FuzzedDataProvider {
 public:
     using RandomEngine = std::mt19937_64;
     using SeedType = RandomEngine::result_type;
 
-    explicit FuzzingContext(SeedType seed) : rng(seed) {
+    explicit FuzzedDataProvider(SeedType seed) : rng(seed) {
     }
 
     template<typename T>
@@ -64,13 +52,13 @@ class FuzzingContext {
     }
 
     template<typename T>
-    T PickValueInVector(std::vector<T> &vec) {
-        return vec[ConsumeIntegralInRange(static_cast<std::size_t>(0), vec.size() - 1)];
+    auto PickValueInArray(T &array) -> decltype(auto) {
+        return array[ConsumeIntegralInRange(static_cast<std::size_t>(0), std::size(array) - 1)];
     }
 
     template<typename T>
-    auto PickValueInArray(T &array) -> decltype(auto) {
-        return array[ConsumeIntegralInRange(static_cast<std::size_t>(0), std::size(array) - 1)];
+    auto PickValueInArray(const std::initializer_list<T> &list) -> decltype(auto) {
+        return *(list.begin() + ConsumeIntegralInRange(static_cast<std::size_t>(0), std::size(list) - 1));
     }
 
 private:
@@ -78,6 +66,20 @@ class FuzzingContext {
 };
 #endif
 
+class FuzzingContext : public FuzzedDataProvider {
+public:
+    using FuzzedDataProvider::FuzzedDataProvider;
+
+    template<typename T>
+    T PickValueInVector(std::vector<T> &vec) {
+        return vec[ConsumeIntegralInRange<std::size_t>(0, vec.size() - 1)];
+    }
+
+    auto operator()() -> decltype(auto) {
+        return ConsumeIntegral<uint64_t>();
+    }
+};
+
 }  // namespace Halide
 
 #if HALIDE_FUZZER_BACKEND == HALIDE_FUZZER_BACKEND_LIBFUZZER

From 6ed3adcaadc83dc5bf11c66118dbaccd81e4e2e6 Mon Sep 17 00:00:00 2001
From: Alex Reinking <areinking@adobe.com>
Date: Mon, 16 Mar 2026 22:24:41 -0400
Subject: [PATCH 02/12] Move lossless_cast fuzzing to test/fuzz

---
 test/correctness/lossless_cast.cpp | 420 ++---------------------------
 test/fuzz/CMakeLists.txt           |   1 +
 test/fuzz/lossless_cast.cpp        | 347 ++++++++++++++++++++++++
 3 files changed, 372 insertions(+), 396 deletions(-)
 create mode 100644 test/fuzz/lossless_cast.cpp

diff --git a/test/correctness/lossless_cast.cpp b/test/correctness/lossless_cast.cpp
index 7633954fb003..b3e055ce3fe3 100644
--- a/test/correctness/lossless_cast.cpp
+++ b/test/correctness/lossless_cast.cpp
@@ -3,7 +3,7 @@
 using namespace Halide;
 using namespace Halide::Internal;
 
-int check_lossless_cast(const Type &t, const Expr &in, const Expr &correct) {
+bool check_lossless_cast(const Type &t, const Expr &in, const Expr &correct) {
     Expr result = lossless_cast(t, in);
     if (!equal(result, correct)) {
         std::cout << "Incorrect lossless_cast result:\n"
@@ -11,12 +11,12 @@ int check_lossless_cast(const Type &t, const Expr &in, const Expr &correct) {
                   << " " << result
                   << " but expected was:\n"
                   << " " << correct << "\n";
-        return 1;
+        return true;
     }
-    return 0;
+    return false;
 }
 
-int lossless_cast_test() {
+int main(int argc, char **argv) {
     Expr x = Variable::make(Int(32), "x");
     Type u8 = UInt(8);
     Type u16 = UInt(16);
@@ -33,53 +33,53 @@ int lossless_cast_test() {
     Expr var_u16 = Variable::make(u16, "x");
     Expr var_u8x = Variable::make(u8x, "x");
 
-    int res = 0;
+    bool found_error = false;
 
     Expr e = cast(u8, x);
-    res |= check_lossless_cast(i32, e, cast(i32, e));
+    found_error |= check_lossless_cast(i32, e, cast(i32, e));
 
     e = cast(u8, x);
-    res |= check_lossless_cast(i32, e, cast(i32, e));
+    found_error |= check_lossless_cast(i32, e, cast(i32, e));
 
     e = cast(i8, var_u16);
-    res |= check_lossless_cast(u16, e, Expr());
+    found_error |= check_lossless_cast(u16, e, Expr());
 
     e = cast(i16, var_u16);
-    res |= check_lossless_cast(u16, e, Expr());
+    found_error |= check_lossless_cast(u16, e, Expr());
 
     e = cast(u32, var_u8);
-    res |= check_lossless_cast(u16, e, cast(u16, var_u8));
+    found_error |= check_lossless_cast(u16, e, cast(u16, var_u8));
 
     e = VectorReduce::make(VectorReduce::Add, cast(u16x, var_u8x), 1);
-    res |= check_lossless_cast(u16, e, cast(u16, e));
+    found_error |= check_lossless_cast(u16, e, cast(u16, e));
 
     e = VectorReduce::make(VectorReduce::Add, cast(u32x, var_u8x), 1);
-    res |= check_lossless_cast(u16, e, VectorReduce::make(VectorReduce::Add, cast(u16x, var_u8x), 1));
+    found_error |= check_lossless_cast(u16, e, VectorReduce::make(VectorReduce::Add, cast(u16x, var_u8x), 1));
 
     e = cast(u32, var_u8) - 16;
-    res |= check_lossless_cast(u16, e, Expr());
+    found_error |= check_lossless_cast(u16, e, Expr());
 
     e = cast(u32, var_u8) + 16;
-    res |= check_lossless_cast(u16, e, cast(u16, var_u8) + 16);
+    found_error |= check_lossless_cast(u16, e, cast(u16, var_u8) + 16);
 
     e = 16 - cast(u32, var_u8);
-    res |= check_lossless_cast(u16, e, Expr());
+    found_error |= check_lossless_cast(u16, e, Expr());
 
     e = 16 + cast(u32, var_u8);
-    res |= check_lossless_cast(u16, e, 16 + cast(u16, var_u8));
+    found_error |= check_lossless_cast(u16, e, 16 + cast(u16, var_u8));
 
     // Check one where the target type is unsigned but there's a signed addition
     // (that can't overflow)
     e = cast(i64, cast(u16, var_u8) + cast(i32, 17));
-    res |= check_lossless_cast(u32, e, cast(u32, cast(u16, var_u8)) + cast(u32, 17));
+    found_error |= check_lossless_cast(u32, e, cast(u32, cast(u16, var_u8)) + cast(u32, 17));
 
     // Check one where the target type is unsigned but there's a signed subtract
     // (that can overflow). It's not safe to enter the i16 sub
     e = cast(i64, cast(i16, 10) - cast(i16, 17));
-    res |= check_lossless_cast(u32, e, Expr());
+    found_error |= check_lossless_cast(u32, e, Expr());
 
     e = cast(i64, 1024) * cast(i64, 1024) * cast(i64, 1024);
-    res |= check_lossless_cast(i32, e, (cast(i32, 1024) * 1024) * 1024);
+    found_error |= check_lossless_cast(i32, e, (cast(i32, 1024) * 1024) * 1024);
 
     // Check narrowing a vector reduction of something narrowable to bool ...
     auto make_reduce = [&](Type t, VectorReduce::Operator op) {
@@ -89,396 +89,24 @@ int lossless_cast_test() {
 
     // It's OK to narrow it to 8-bit.
     e = make_reduce(UInt(16), VectorReduce::Add);
-    res |= check_lossless_cast(UInt(8), e, make_reduce(UInt(8), VectorReduce::Add));
+    found_error |= check_lossless_cast(UInt(8), e, make_reduce(UInt(8), VectorReduce::Add));
 
     // ... but we can't reduce it all the way to bool if the operator isn't
     // legal for bools (issue #9011)
     e = make_reduce(UInt(8), VectorReduce::Add);
-    res |= check_lossless_cast(Bool(), e, Expr());
+    found_error |= check_lossless_cast(Bool(), e, Expr());
 
     // Min or Max, however, can just become And and Or
     e = make_reduce(UInt(8), VectorReduce::Min);
-    res |= check_lossless_cast(Bool(), e, make_reduce(Bool(), VectorReduce::And));
+    found_error |= check_lossless_cast(Bool(), e, make_reduce(Bool(), VectorReduce::And));
 
     e = make_reduce(UInt(8), VectorReduce::Max);
-    res |= check_lossless_cast(Bool(), e, make_reduce(Bool(), VectorReduce::Or));
-
-    return res;
-}
-
-constexpr int size = 1024;
-Buffer<uint8_t> buf_u8(size, "buf_u8");
-Buffer<int8_t> buf_i8(size, "buf_i8");
-Var x{"x"};
-
-Expr random_expr(std::mt19937 &rng) {
-    std::vector<Expr> exprs;
-    // Add some atoms
-    exprs.push_back(cast<uint8_t>((uint8_t)rng()));
-    exprs.push_back(cast<int8_t>((int8_t)rng()));
-    exprs.push_back(cast<uint8_t>((uint8_t)rng()));
-    exprs.push_back(cast<int8_t>((int8_t)rng()));
-    exprs.push_back(buf_u8(x));
-    exprs.push_back(buf_i8(x));
-
-    // Make random combinations of them
-    while (true) {
-        Expr e;
-        int i1 = rng() % exprs.size();
-        int i2 = rng() % exprs.size();
-        int i3 = rng() % exprs.size();
-        int op = rng() % 8;
-        Expr e1 = exprs[i1];
-        Expr e2 = cast(e1.type(), exprs[i2]);
-        Expr e3 = cast(e1.type().with_code(halide_type_uint), exprs[i3]);
-        bool may_widen = e1.type().bits() < 64;
-        Expr e2_narrow = exprs[i2];
-        bool may_widen_right = e2_narrow.type() == e1.type().narrow();
-        switch (op) {
-        case 0:
-            if (may_widen) {
-                e = cast(e1.type().widen(), e1);
-            }
-            break;
-        case 1:
-            if (may_widen) {
-                e = cast(Int(e1.type().bits() * 2), e1);
-            }
-            break;
-        case 2:
-            e = e1 + e2;
-            break;
-        case 3:
-            e = e1 - e2;
-            break;
-        case 4:
-            e = e1 * e2;
-            break;
-        case 5:
-            e = e1 / e2;
-            break;
-        case 6:
-            // Introduce some lets
-            e = common_subexpression_elimination(e1);
-            break;
-        case 7:
-            switch (rng() % 20) {
-            case 0:
-                if (may_widen) {
-                    e = widening_add(e1, e2);
-                }
-                break;
-            case 1:
-                if (may_widen) {
-                    e = widening_sub(e1, e2);
-                }
-                break;
-            case 2:
-                if (may_widen) {
-                    e = widening_mul(e1, e2);
-                }
-                break;
-            case 3:
-                e = halving_add(e1, e2);
-                break;
-            case 4:
-                e = rounding_halving_add(e1, e2);
-                break;
-            case 5:
-                e = halving_sub(e1, e2);
-                break;
-            case 6:
-                e = saturating_add(e1, e2);
-                break;
-            case 7:
-                e = saturating_sub(e1, e2);
-                break;
-            case 8:
-                e = count_leading_zeros(e1);
-                break;
-            case 9:
-                e = count_trailing_zeros(e1);
-                break;
-            case 10:
-                if (may_widen) {
-                    e = rounding_mul_shift_right(e1, e2, e3);
-                }
-                break;
-            case 11:
-                if (may_widen) {
-                    e = mul_shift_right(e1, e2, e3);
-                }
-                break;
-            case 12:
-                if (may_widen_right) {
-                    e = widen_right_add(e1, e2_narrow);
-                }
-                break;
-            case 13:
-                if (may_widen_right) {
-                    e = widen_right_sub(e1, e2_narrow);
-                }
-                break;
-            case 14:
-                if (may_widen_right) {
-                    e = widen_right_mul(e1, e2_narrow);
-                }
-                break;
-            case 15:
-                e = e1 << e2;
-                break;
-            case 16:
-                e = e1 >> e2;
-                break;
-            case 17:
-                e = rounding_shift_right(e1, e2);
-                break;
-            case 18:
-                e = rounding_shift_left(e1, e2);
-                break;
-            case 19:
-                e = ~e1;
-                break;
-            }
-        }
-
-        if (!e.defined()) {
-            continue;
-        }
-
-        // Stop when we get to 64 bits, but probably don't stop on a cast,
-        // because that'll just get trivially stripped.
-        if (e.type().bits() == 64 && (e.as<Cast>() == nullptr || ((rng() & 7) == 0))) {
-            return e;
-        }
-
-        exprs.push_back(e);
-    }
-}
-
-bool definitely_has_ub(Expr e) {
-    e = simplify(e);
-
-    class HasOverflow : public IRVisitor {
-        void visit(const Call *op) override {
-            if (op->is_intrinsic({Call::signed_integer_overflow})) {
-                found = true;
-            }
-            IRVisitor::visit(op);
-        }
-
-    public:
-        bool found = false;
-    } has_overflow;
-    e.accept(&has_overflow);
-
-    return has_overflow.found;
-}
-
-bool might_have_ub(Expr e) {
-    class MightOverflow : public IRVisitor {
-        std::map<Expr, ConstantInterval, ExprCompare> cache;
-
-        using IRVisitor::visit;
-
-        bool no_overflow_int(const Type &t) {
-            return t.is_int() && t.bits() >= 32;
-        }
-
-        ConstantInterval bounds(const Expr &e) {
-            return constant_integer_bounds(e, Scope<ConstantInterval>::empty_scope(), &cache);
-        }
-
-        void visit(const Add *op) override {
-            if (no_overflow_int(op->type) &&
-                !op->type.can_represent(bounds(op->a) + bounds(op->b))) {
-                found = true;
-            } else {
-                IRVisitor::visit(op);
-            }
-        }
-
-        void visit(const Sub *op) override {
-            if (no_overflow_int(op->type) &&
-                !op->type.can_represent(bounds(op->a) - bounds(op->b))) {
-                found = true;
-            } else {
-                IRVisitor::visit(op);
-            }
-        }
-
-        void visit(const Mul *op) override {
-            if (no_overflow_int(op->type) &&
-                !op->type.can_represent(bounds(op->a) * bounds(op->b))) {
-                found = true;
-            } else {
-                IRVisitor::visit(op);
-            }
-        }
-
-        void visit(const Div *op) override {
-            if (no_overflow_int(op->type) &&
-                (bounds(op->a) / bounds(op->b)).contains(-1)) {
-                found = true;
-            } else {
-                IRVisitor::visit(op);
-            }
-        }
-
-        void visit(const Cast *op) override {
-            if (no_overflow_int(op->type) &&
-                !op->type.can_represent(bounds(op->value))) {
-                found = true;
-            } else {
-                IRVisitor::visit(op);
-            }
-        }
-
-        void visit(const Call *op) override {
-            if (op->is_intrinsic({Call::shift_left,
-                                  Call::shift_right,
-                                  Call::rounding_shift_left,
-                                  Call::rounding_shift_right,
-                                  Call::widening_shift_left,
-                                  Call::widening_shift_right,
-                                  Call::mul_shift_right,
-                                  Call::rounding_mul_shift_right})) {
-                auto shift_bounds = bounds(op->args.back());
-                if (!(shift_bounds > -op->type.bits() && shift_bounds < op->type.bits())) {
-                    found = true;
-                }
-            } else if (op->is_intrinsic({Call::signed_integer_overflow})) {
-                found = true;
-            }
-            IRVisitor::visit(op);
-        }
-
-    public:
-        bool found = false;
-    } checker;
-
-    e.accept(&checker);
-
-    return checker.found;
-}
+    found_error |= check_lossless_cast(Bool(), e, make_reduce(Bool(), VectorReduce::Or));
 
-bool found_error = false;
-
-int test_one(uint32_t seed) {
-    std::mt19937 rng{seed};
-
-    buf_u8.fill(rng);
-    buf_i8.fill(rng);
-
-    Expr e1 = random_expr(rng);
-    Expr simplified = simplify(e1);
-
-    if (might_have_ub(e1) ||
-        might_have_ub(simplified) ||
-        might_have_ub(lower_intrinsics(simplified))) {
-        return 0;
-    }
-
-    // We're also going to test constant_integer_bounds here.
-    ConstantInterval bounds = constant_integer_bounds(e1);
-
-    Type target;
-    std::vector<Type> target_types = {UInt(32), Int(32), UInt(16), Int(16)};
-    target = target_types[rng() % target_types.size()];
-    Expr e2 = lossless_cast(target, e1);
-
-    if (!e2.defined()) {
-        return 0;
-    }
-
-    if (definitely_has_ub(e2)) {
-        std::cout << "lossless_cast introduced ub:\n"
-                  << "seed = " << seed << "\n"
-                  << "e1 = " << e1 << "\n"
-                  << "e2 = " << e2 << "\n"
-                  << "simplify(e1) = " << simplify(e1) << "\n"
-                  << "simplify(e2) = " << simplify(e2) << "\n";
+    if (found_error) {
         return 1;
     }
 
-    Func f;
-    f(x) = {cast<int64_t>(e1), cast<int64_t>(e2)};
-    f.vectorize(x, 4, TailStrategy::RoundUp);
-
-    Buffer<int64_t> out1(size), out2(size);
-    Pipeline p(f);
-
-    // Check for signed integer overflow
-    // Module m = p.compile_to_module({}, "test");
-
-    p.realize({out1, out2});
-
-    for (int x = 0; x < size; x++) {
-        if (out1(x) != out2(x)) {
-            std::cout
-                << "lossless_cast failure\n"
-                << "seed = " << seed << "\n"
-                << "x = " << x << "\n"
-                << "buf_u8 = " << (int)buf_u8(x) << "\n"
-                << "buf_i8 = " << (int)buf_i8(x) << "\n"
-                << "out1 = " << out1(x) << "\n"
-                << "out2 = " << out2(x) << "\n"
-                << "Original: " << e1 << "\n"
-                << "Lossless cast: " << e2 << "\n";
-            return 1;
-        }
-    }
-
-    for (int x = 0; x < size; x++) {
-        if ((e1.type().is_int() && !bounds.contains(out1(x))) ||
-            (e1.type().is_uint() && !bounds.contains((uint64_t)out1(x)))) {
-            Expr simplified = simplify(e1);
-            std::cout
-                << "constant_integer_bounds failure\n"
-                << "seed = " << seed << "\n"
-                << "x = " << x << "\n"
-                << "buf_u8 = " << (int)buf_u8(x) << "\n"
-                << "buf_i8 = " << (int)buf_i8(x) << "\n"
-                << "out1 = " << out1(x) << "\n"
-                << "Expression: " << e1 << "\n"
-                << "Bounds: " << bounds << "\n"
-                << "Simplified: " << simplified << "\n"
-                // If it's still out-of-bounds when the expression is
-                // simplified, that'll be easier to debug.
-                << "Bounds: " << constant_integer_bounds(simplified) << "\n";
-            return 1;
-        }
-    }
-
-    return 0;
-}
-
-int fuzz_test(uint32_t root_seed) {
-    std::mt19937 seed_generator(root_seed);
-
-    std::cout << "Fuzz testing with root seed " << root_seed << "\n";
-    for (int i = 0; i < 1000; i++) {
-        auto s = seed_generator();
-        std::cout << s << std::endl;
-        if (test_one(s)) {
-            return 1;
-        }
-    }
-    return 0;
-}
-
-int main(int argc, char **argv) {
-    if (argc == 2) {
-        return test_one(atoi(argv[1]));
-    }
-    if (lossless_cast_test()) {
-        std::cout << "lossless_cast test failed!\n";
-        return 1;
-    }
-    if (fuzz_test(time(NULL))) {
-        std::cout << "lossless_cast fuzz test failed!\n";
-        return 1;
-    }
     std::cout << "Success!\n";
     return 0;
 }
diff --git a/test/fuzz/CMakeLists.txt b/test/fuzz/CMakeLists.txt
index 65376c7ebeb7..0c3afdef359e 100644
--- a/test/fuzz/CMakeLists.txt
+++ b/test/fuzz/CMakeLists.txt
@@ -9,6 +9,7 @@ tests(GROUPS fuzz
       SOURCES
       bounds.cpp
       cse.cpp
+      lossless_cast.cpp
       # By default, the libfuzzer harness runs with a timeout of 1200 seconds.
       # Let's dial that back:
       # - Do 1000 fuzz runs for each test.
diff --git a/test/fuzz/lossless_cast.cpp b/test/fuzz/lossless_cast.cpp
new file mode 100644
index 000000000000..01ef1c38e8d7
--- /dev/null
+++ b/test/fuzz/lossless_cast.cpp
@@ -0,0 +1,347 @@
+#include "fuzz_helpers.h"
+#include <Halide.h>
+
+using namespace Halide;
+using namespace Halide::Internal;
+
+namespace {
+
+constexpr int size = 1024;
+Buffer<uint8_t> buf_u8(size, "buf_u8");
+Buffer<int8_t> buf_i8(size, "buf_i8");
+Var x{"x"};
+
+Expr random_expr(FuzzingContext &fuzz) {
+    std::vector<Expr> exprs;
+    // Add some atoms
+    exprs.push_back(cast<uint8_t>(fuzz.ConsumeIntegral<uint8_t>()));
+    exprs.push_back(cast<int8_t>(fuzz.ConsumeIntegral<int8_t>()));
+    exprs.push_back(cast<uint8_t>(fuzz.ConsumeIntegral<uint8_t>()));
+    exprs.push_back(cast<int8_t>(fuzz.ConsumeIntegral<int8_t>()));
+    exprs.push_back(buf_u8(x));
+    exprs.push_back(buf_i8(x));
+
+    // Make random combinations of them
+    while (true) {
+        Expr e;
+        Expr e1 = fuzz.PickValueInVector(exprs);
+
+        Expr e2 = fuzz.PickValueInVector(exprs);
+        Expr e2_narrow = e2;
+        e2 = cast(e1.type(), e2);
+
+        Expr e3 = cast(e1.type().with_code(halide_type_uint), fuzz.PickValueInVector(exprs));
+        bool may_widen = e1.type().bits() < 64;
+        bool may_widen_right = e2_narrow.type() == e1.type().narrow();
+        switch (fuzz.ConsumeIntegralInRange(0, 7)) {
+        case 0:
+            if (may_widen) {
+                e = cast(e1.type().widen(), e1);
+            }
+            break;
+        case 1:
+            if (may_widen) {
+                e = cast(Int(e1.type().bits() * 2), e1);
+            }
+            break;
+        case 2:
+            e = e1 + e2;
+            break;
+        case 3:
+            e = e1 - e2;
+            break;
+        case 4:
+            e = e1 * e2;
+            break;
+        case 5:
+            e = e1 / e2;
+            break;
+        case 6:
+            // Introduce some lets
+            e = common_subexpression_elimination(e1);
+            break;
+        case 7:
+            switch (fuzz.ConsumeIntegralInRange(0, 19)) {
+            case 0:
+                if (may_widen) {
+                    e = widening_add(e1, e2);
+                }
+                break;
+            case 1:
+                if (may_widen) {
+                    e = widening_sub(e1, e2);
+                }
+                break;
+            case 2:
+                if (may_widen) {
+                    e = widening_mul(e1, e2);
+                }
+                break;
+            case 3:
+                e = halving_add(e1, e2);
+                break;
+            case 4:
+                e = rounding_halving_add(e1, e2);
+                break;
+            case 5:
+                e = halving_sub(e1, e2);
+                break;
+            case 6:
+                e = saturating_add(e1, e2);
+                break;
+            case 7:
+                e = saturating_sub(e1, e2);
+                break;
+            case 8:
+                e = count_leading_zeros(e1);
+                break;
+            case 9:
+                e = count_trailing_zeros(e1);
+                break;
+            case 10:
+                if (may_widen) {
+                    e = rounding_mul_shift_right(e1, e2, e3);
+                }
+                break;
+            case 11:
+                if (may_widen) {
+                    e = mul_shift_right(e1, e2, e3);
+                }
+                break;
+            case 12:
+                if (may_widen_right) {
+                    e = widen_right_add(e1, e2_narrow);
+                }
+                break;
+            case 13:
+                if (may_widen_right) {
+                    e = widen_right_sub(e1, e2_narrow);
+                }
+                break;
+            case 14:
+                if (may_widen_right) {
+                    e = widen_right_mul(e1, e2_narrow);
+                }
+                break;
+            case 15:
+                e = e1 << e2;
+                break;
+            case 16:
+                e = e1 >> e2;
+                break;
+            case 17:
+                e = rounding_shift_right(e1, e2);
+                break;
+            case 18:
+                e = rounding_shift_left(e1, e2);
+                break;
+            case 19:
+                e = ~e1;
+                break;
+            }
+        }
+
+        if (!e.defined()) {
+            continue;
+        }
+
+        // Stop when we get to 64 bits, but probably don't stop on a cast,
+        // because that'll just get trivially stripped.
+        if (e.type().bits() == 64 && (e.as<Cast>() == nullptr || fuzz.ConsumeIntegralInRange(0, 7) == 0)) {
+            return e;
+        }
+
+        exprs.push_back(e);
+    }
+}
+
+bool definitely_has_ub(Expr e) {
+    e = simplify(e);
+
+    class HasOverflow : public IRVisitor {
+        void visit(const Call *op) override {
+            if (op->is_intrinsic({Call::signed_integer_overflow})) {
+                found = true;
+            }
+            IRVisitor::visit(op);
+        }
+
+    public:
+        bool found = false;
+    } has_overflow;
+    e.accept(&has_overflow);
+
+    return has_overflow.found;
+}
+
+bool might_have_ub(Expr e) {
+    class MightOverflow : public IRVisitor {
+        std::map<Expr, ConstantInterval, ExprCompare> cache;
+
+        using IRVisitor::visit;
+
+        bool no_overflow_int(const Type &t) {
+            return t.is_int() && t.bits() >= 32;
+        }
+
+        ConstantInterval bounds(const Expr &e) {
+            return constant_integer_bounds(e, Scope<ConstantInterval>::empty_scope(), &cache);
+        }
+
+        void visit(const Add *op) override {
+            if (no_overflow_int(op->type) &&
+                !op->type.can_represent(bounds(op->a) + bounds(op->b))) {
+                found = true;
+            } else {
+                IRVisitor::visit(op);
+            }
+        }
+
+        void visit(const Sub *op) override {
+            if (no_overflow_int(op->type) &&
+                !op->type.can_represent(bounds(op->a) - bounds(op->b))) {
+                found = true;
+            } else {
+                IRVisitor::visit(op);
+            }
+        }
+
+        void visit(const Mul *op) override {
+            if (no_overflow_int(op->type) &&
+                !op->type.can_represent(bounds(op->a) * bounds(op->b))) {
+                found = true;
+            } else {
+                IRVisitor::visit(op);
+            }
+        }
+
+        void visit(const Div *op) override {
+            if (no_overflow_int(op->type) &&
+                (bounds(op->a) / bounds(op->b)).contains(-1)) {
+                found = true;
+            } else {
+                IRVisitor::visit(op);
+            }
+        }
+
+        void visit(const Cast *op) override {
+            if (no_overflow_int(op->type) &&
+                !op->type.can_represent(bounds(op->value))) {
+                found = true;
+            } else {
+                IRVisitor::visit(op);
+            }
+        }
+
+        void visit(const Call *op) override {
+            if (op->is_intrinsic({Call::shift_left,
+                                  Call::shift_right,
+                                  Call::rounding_shift_left,
+                                  Call::rounding_shift_right,
+                                  Call::widening_shift_left,
+                                  Call::widening_shift_right,
+                                  Call::mul_shift_right,
+                                  Call::rounding_mul_shift_right})) {
+                auto shift_bounds = bounds(op->args.back());
+                if (!(shift_bounds > -op->type.bits() && shift_bounds < op->type.bits())) {
+                    found = true;
+                }
+            } else if (op->is_intrinsic({Call::signed_integer_overflow})) {
+                found = true;
+            }
+            IRVisitor::visit(op);
+        }
+
+    public:
+        bool found = false;
+    } checker;
+
+    e.accept(&checker);
+
+    return checker.found;
+}
+
+}  // namespace
+
+FUZZ_TEST(lossless_cast, FuzzingContext &fuzz) {
+    buf_u8.fill(fuzz);
+    buf_i8.fill(fuzz);
+
+    Expr e1 = random_expr(fuzz);
+    Expr simplified = simplify(e1);
+
+    if (might_have_ub(e1) ||
+        might_have_ub(simplified) ||
+        might_have_ub(lower_intrinsics(simplified))) {
+        return 0;
+    }
+
+    // We're also going to test constant_integer_bounds here.
+    ConstantInterval bounds = constant_integer_bounds(e1);
+
+    std::vector<Type> target_types = {UInt(32), Int(32), UInt(16), Int(16)};
+    Type target = fuzz.PickValueInVector(target_types);
+    Expr e2 = lossless_cast(target, e1);
+
+    if (!e2.defined()) {
+        return 0;
+    }
+
+    if (definitely_has_ub(e2)) {
+        std::cerr << "lossless_cast introduced ub:\n"
+                  << "e1 = " << e1 << "\n"
+                  << "e2 = " << e2 << "\n"
+                  << "simplify(e1) = " << simplify(e1) << "\n"
+                  << "simplify(e2) = " << simplify(e2) << "\n";
+        return 1;
+    }
+
+    Func f;
+    f(x) = {cast<int64_t>(e1), cast<int64_t>(e2)};
+    f.vectorize(x, 4, TailStrategy::RoundUp);
+
+    Buffer<int64_t> out1(size), out2(size);
+    Pipeline p(f);
+
+    // Check for signed integer overflow
+    // Module m = p.compile_to_module({}, "test");
+
+    p.realize({out1, out2});
+
+    for (int x = 0; x < size; x++) {
+        if (out1(x) != out2(x)) {
+            std::cerr
+                << "lossless_cast failure\n"
+                << "x = " << x << "\n"
+                << "buf_u8 = " << (int)buf_u8(x) << "\n"
+                << "buf_i8 = " << (int)buf_i8(x) << "\n"
+                << "out1 = " << out1(x) << "\n"
+                << "out2 = " << out2(x) << "\n"
+                << "Original: " << e1 << "\n"
+                << "Lossless cast: " << e2 << "\n";
+            return 1;
+        }
+    }
+
+    for (int x = 0; x < size; x++) {
+        if ((e1.type().is_int() && !bounds.contains(out1(x))) ||
+            (e1.type().is_uint() && !bounds.contains((uint64_t)out1(x)))) {
+            Expr simplified = simplify(e1);
+            std::cerr
+                << "constant_integer_bounds failure\n"
+                << "x = " << x << "\n"
+                << "buf_u8 = " << (int)buf_u8(x) << "\n"
+                << "buf_i8 = " << (int)buf_i8(x) << "\n"
+                << "out1 = " << out1(x) << "\n"
+                << "Expression: " << e1 << "\n"
+                << "Bounds: " << bounds << "\n"
+                << "Simplified: " << simplified << "\n"
+                // If it's still out-of-bounds when the expression is
+                // simplified, that'll be easier to debug.
+                << "Bounds: " << constant_integer_bounds(simplified) << "\n";
+            return 1;
+        }
+    }
+
+    return 0;
+}

From 209f15479ee12b3fac7ff5a2b632fb1c397bbe34 Mon Sep 17 00:00:00 2001
From: Alex Reinking <areinking@adobe.com>
Date: Mon, 16 Mar 2026 22:38:36 -0400
Subject: [PATCH 03/12] Move widening_lerp to test/fuzz

---
 test/correctness/CMakeLists.txt    |  1 -
 test/correctness/widening_lerp.cpp | 64 ------------------------------
 test/fuzz/CMakeLists.txt           |  1 +
 test/fuzz/widening_lerp.cpp        | 51 ++++++++++++++++++++++++
 4 files changed, 52 insertions(+), 65 deletions(-)
 delete mode 100644 test/correctness/widening_lerp.cpp
 create mode 100644 test/fuzz/widening_lerp.cpp

diff --git a/test/correctness/CMakeLists.txt b/test/correctness/CMakeLists.txt
index 6d41a9e71219..f0d69092c08c 100644
--- a/test/correctness/CMakeLists.txt
+++ b/test/correctness/CMakeLists.txt
@@ -356,7 +356,6 @@ tests(GROUPS correctness
       vectorized_initialization.cpp
       vectorized_load_from_vectorized_allocation.cpp
       vectorized_reduction_bug.cpp
-      widening_lerp.cpp
       widening_reduction.cpp
       # keep-sorted end
 )
diff --git a/test/correctness/widening_lerp.cpp b/test/correctness/widening_lerp.cpp
deleted file mode 100644
index e2b4af081db7..000000000000
--- a/test/correctness/widening_lerp.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-#include "Halide.h"
-
-using namespace Halide;
-
-std::mt19937 rng(0);
-
-int main(int argc, char **argv) {
-
-    int fuzz_seed = argc > 1 ? atoi(argv[1]) : time(nullptr);
-    rng.seed(fuzz_seed);
-    printf("Lerp test seed: %d\n", fuzz_seed);
-
-    // Lerp lowering incorporates a cast. This test checks that a widening lerp
-    // is equal to the widened version of the lerp.
-    for (Type t1 : {UInt(8), UInt(16), UInt(32), Int(8), Int(16), Int(32), Float(32)}) {
-        if (rng() & 1) continue;
-        for (Type t2 : {UInt(8), UInt(16), UInt(32), Float(32)}) {
-            if (rng() & 1) continue;
-            for (Type t3 : {UInt(8), UInt(16), UInt(32), Int(8), Int(16), Int(32), Float(32)}) {
-                if (rng() & 1) continue;
-                Func f;
-                Var x;
-                f(x) = cast(t1, random_uint((int)rng()));
-
-                Expr weight = cast(t2, f(x + 16));
-                if (t2.is_float()) {
-                    weight /= 256.f;
-                    weight = clamp(weight, 0.f, 1.f);
-                }
-
-                Expr zero_val = f(x);
-                Expr one_val = f(x + 8);
-                Expr lerped = lerp(zero_val, one_val, weight);
-
-                Func cast_and_lerp, lerp_alone, cast_of_lerp;
-                cast_and_lerp(x) = cast(t3, lerped);
-                lerp_alone(x) = lerped;
-                cast_of_lerp(x) = cast(t3, lerp_alone(x));
-
-                RDom r(0, 32 * 1024);
-                Func check;
-                check() = maximum(abs(cast<double>(cast_and_lerp(r)) -
-                                      cast<double>(cast_of_lerp(r))));
-
-                f.compute_root().vectorize(x, 8, TailStrategy::RoundUp);
-                lerp_alone.compute_root().vectorize(x, 8, TailStrategy::RoundUp);
-                cast_and_lerp.compute_root().vectorize(x, 8, TailStrategy::RoundUp);
-                cast_of_lerp.compute_root().vectorize(x, 8, TailStrategy::RoundUp);
-
-                double err = evaluate<double>(check());
-
-                if (err > 1e-5) {
-                    printf("Difference of lerp + cast and lerp alone is %f,"
-                           " which exceeds threshold for seed %d\n",
-                           err, fuzz_seed);
-                    return 1;
-                }
-            }
-        }
-    }
-
-    printf("Success!\n");
-    return 0;
-}
diff --git a/test/fuzz/CMakeLists.txt b/test/fuzz/CMakeLists.txt
index 0c3afdef359e..4cfa9bf41f35 100644
--- a/test/fuzz/CMakeLists.txt
+++ b/test/fuzz/CMakeLists.txt
@@ -10,6 +10,7 @@ tests(GROUPS fuzz
       bounds.cpp
       cse.cpp
       lossless_cast.cpp
+      widening_lerp.cpp
       # By default, the libfuzzer harness runs with a timeout of 1200 seconds.
       # Let's dial that back:
       # - Do 1000 fuzz runs for each test.
diff --git a/test/fuzz/widening_lerp.cpp b/test/fuzz/widening_lerp.cpp
new file mode 100644
index 000000000000..1df0aa00f753
--- /dev/null
+++ b/test/fuzz/widening_lerp.cpp
@@ -0,0 +1,51 @@
+#include "fuzz_helpers.h"
+
+#include "Halide.h"
+using namespace Halide;
+
+FUZZ_TEST(widening_lerp, FuzzingContext &fuzz) {
+    // Lerp lowering incorporates a cast. This test checks that a widening lerp
+    // is equal to the widened version of the lerp.
+
+    Type t1 = fuzz.PickValueInArray({UInt(8), UInt(16), UInt(32), Int(8), Int(16), Int(32), Float(32)});
+    Type t2 = fuzz.PickValueInArray({UInt(8), UInt(16), UInt(32), Float(32)});
+    Type t3 = fuzz.PickValueInArray({UInt(8), UInt(16), UInt(32), Int(8), Int(16), Int(32), Float(32)});
+
+    Func f;
+    Var x;
+    f(x) = cast(t1, random_uint(fuzz.ConsumeIntegral<int>()));
+
+    Expr weight = cast(t2, f(x + 16));
+    if (t2.is_float()) {
+        weight /= 256.f;
+        weight = clamp(weight, 0.f, 1.f);
+    }
+
+    Expr zero_val = f(x);
+    Expr one_val = f(x + 8);
+    Expr lerped = lerp(zero_val, one_val, weight);
+
+    Func cast_and_lerp, lerp_alone, cast_of_lerp;
+    cast_and_lerp(x) = cast(t3, lerped);
+    lerp_alone(x) = lerped;
+    cast_of_lerp(x) = cast(t3, lerp_alone(x));
+
+    RDom r(0, 32 * 1024);
+    Func check;
+    check() = maximum(abs(cast<double>(cast_and_lerp(r)) -
+                          cast<double>(cast_of_lerp(r))));
+
+    f.compute_root().vectorize(x, 8, TailStrategy::RoundUp);
+    lerp_alone.compute_root().vectorize(x, 8, TailStrategy::RoundUp);
+    cast_and_lerp.compute_root().vectorize(x, 8, TailStrategy::RoundUp);
+    cast_of_lerp.compute_root().vectorize(x, 8, TailStrategy::RoundUp);
+
+    double err = evaluate<double>(check());
+
+    if (err > 1e-5) {
+        std::cerr << "Difference of lerp + cast and lerp alone is " << err << "\n";
+        return 1;
+    }
+
+    return 0;
+}

From a99ea0d8f92de6ab93e15cf6a2769c057f6cbaa1 Mon Sep 17 00:00:00 2001
From: Alex Reinking <areinking@adobe.com>
Date: Tue, 17 Mar 2026 07:32:17 -0400
Subject: [PATCH 04/12] Move random_expr_generator.h and simplify fuzzing to
 test/fuzz

---
 test/correctness/CMakeLists.txt                           | 1 -
 test/{correctness => fuzz}/random_expr_generator.h        | 0
 test/{correctness/fuzz_simplify.cpp => fuzz/simplify.cpp} | 0
 3 files changed, 1 deletion(-)
 rename test/{correctness => fuzz}/random_expr_generator.h (100%)
 rename test/{correctness/fuzz_simplify.cpp => fuzz/simplify.cpp} (100%)

diff --git a/test/correctness/CMakeLists.txt b/test/correctness/CMakeLists.txt
index f0d69092c08c..e1c44561be00 100644
--- a/test/correctness/CMakeLists.txt
+++ b/test/correctness/CMakeLists.txt
@@ -126,7 +126,6 @@ tests(GROUPS correctness
       fused_where_inner_extent_is_zero.cpp
       fuzz_float_stores.cpp
       fuzz_schedule.cpp
-      fuzz_simplify.cpp
       gameoflife.cpp
       gather.cpp
       gpu_alloc_group_profiling.cpp
diff --git a/test/correctness/random_expr_generator.h b/test/fuzz/random_expr_generator.h
similarity index 100%
rename from test/correctness/random_expr_generator.h
rename to test/fuzz/random_expr_generator.h
diff --git a/test/correctness/fuzz_simplify.cpp b/test/fuzz/simplify.cpp
similarity index 100%
rename from test/correctness/fuzz_simplify.cpp
rename to test/fuzz/simplify.cpp

From 7a4eaadf98dfefffca83d17a7a3192145a2247cf Mon Sep 17 00:00:00 2001
From: Alex Reinking <areinking@adobe.com>
Date: Tue, 17 Mar 2026 08:02:13 -0400
Subject: [PATCH 05/12] Integrate simplify and random_expr_generator into
 fuzzing framework

---
 test/fuzz/CMakeLists.txt          |   1 +
 test/fuzz/random_expr_generator.h | 179 +++++++++++++++---------------
 test/fuzz/simplify.cpp            | 147 +++++++++---------------
 3 files changed, 142 insertions(+), 185 deletions(-)

diff --git a/test/fuzz/CMakeLists.txt b/test/fuzz/CMakeLists.txt
index 4cfa9bf41f35..97b8372cf2ab 100644
--- a/test/fuzz/CMakeLists.txt
+++ b/test/fuzz/CMakeLists.txt
@@ -10,6 +10,7 @@ tests(GROUPS fuzz
       bounds.cpp
       cse.cpp
       lossless_cast.cpp
+      simplify.cpp
       widening_lerp.cpp
       # By default, the libfuzzer harness runs with a timeout of 1200 seconds.
       # Let's dial that back:
diff --git a/test/fuzz/random_expr_generator.h b/test/fuzz/random_expr_generator.h
index afde4ab4bbcc..abb10351260f 100644
--- a/test/fuzz/random_expr_generator.h
+++ b/test/fuzz/random_expr_generator.h
@@ -2,9 +2,10 @@
 
 #include <array>
 #include <functional>
-#include <random>
 #include <vector>
 
+#include "fuzz_helpers.h"
+
 namespace Halide {
 namespace Internal {
 
@@ -14,7 +15,6 @@ using namespace Halide::Internal;
 
 class RandomExpressionGenerator {
 public:
-    using RandomEngine = std::mt19937_64;
     using make_bin_op_fn = Expr (*)(Expr, Expr);
 
     bool gen_cast = true;
@@ -28,20 +28,23 @@ class RandomExpressionGenerator {
     bool gen_shuffles = true;
     bool gen_vector_reduce = true;
 
+    FuzzingContext &fuzz;
+
     std::vector<Type> fuzz_types = {UInt(1), UInt(8), UInt(16), UInt(32), UInt(64), Int(8), Int(16), Int(32), Int(64)};
-    std::vector<Param<int>> fuzz_vars{5};
+    std::vector<Param<int>> fuzz_vars;
 
-    template<typename T>
-    decltype(auto) random_choice(RandomEngine &rng, T &&choices) {
-        std::uniform_int_distribution<size_t> dist(0, std::size(choices) - 1);
-        return choices[dist(rng)];
+    explicit RandomExpressionGenerator(FuzzingContext &fuzz)
+        : fuzz(fuzz) {
+        for (int i = 0; i < 5; i++) {
+            fuzz_vars.emplace_back("a" + std::to_string(i));
+        }
     }
 
-    Type random_scalar_type(RandomEngine &rng) {
-        return random_choice(rng, fuzz_types);
+    Type random_scalar_type() {
+        return fuzz.PickValueInVector(fuzz_types);
     }
 
-    int get_random_divisor(RandomEngine &rng, int x) {
+    int get_random_divisor(int x) {
         vector<int> divisors;
         divisors.reserve(x);
         for (int i = 2; i <= x; i++) {
@@ -49,29 +52,23 @@ class RandomExpressionGenerator {
                 divisors.push_back(i);
             }
         }
-        return random_choice(rng, divisors);
-    }
-
-    std::string fuzz_var(int i) {
-        return std::string(1, 'a' + i);
+        return fuzz.PickValueInVector(divisors);
     }
 
-    Expr random_var(RandomEngine &rng, Type t) {
-        std::uniform_int_distribution<int> dist(0, fuzz_vars.size() - 1);
-        int fuzz_count = dist(rng);
-        return cast(t, Variable::make(Int(32), fuzz_var(fuzz_count)));
+    Expr random_var(Type t) {
+        return cast(t, fuzz.PickValueInVector(fuzz_vars));
     }
 
-    Type random_type(RandomEngine &rng, int width) {
-        Type t = random_choice(rng, fuzz_types);
+    Type random_type(int width) {
+        Type t = random_scalar_type();
         if (width > 1) {
             t = t.with_lanes(width);
         }
         return t;
     }
 
-    Expr random_const(RandomEngine &rng, Type t) {
-        int val = (int)((int8_t)(rng() & 0x0f));
+    Expr random_const(Type t) const {
+        int val = fuzz.ConsumeIntegralInRange(0, 0x0f);
         if (t.is_vector()) {
             return Broadcast::make(cast(t.element_of(), val), t.lanes());
         } else {
@@ -113,40 +110,40 @@ class RandomExpressionGenerator {
         return a >> (b % a.type().bits());
     }
 
-    Expr random_leaf(RandomEngine &rng, Type t, bool overflow_undef = false, bool imm_only = false) {
+    Expr random_leaf(Type t, bool overflow_undef = false, bool imm_only = false) {
         if (t.is_int() && t.bits() == 32) {
             overflow_undef = true;
         }
         if (t.is_scalar()) {
-            if (!imm_only && (rng() & 1)) {
-                return random_var(rng, t);
+            if (!imm_only && fuzz.ConsumeBool()) {
+                return random_var(t);
             } else {
                 if (overflow_undef) {
                     // For Int(32), we don't care about correctness during
                     // overflow, so just use numbers that are unlikely to
                     // overflow.
-                    return cast(t, (int32_t)((int8_t)(rng() & 255)));
+                    return cast(t, fuzz.ConsumeIntegralInRange<int>(0, 255));
                 } else {
-                    return cast(t, (int32_t)(rng()));
+                    return cast(t, fuzz.ConsumeIntegral<int>());
                 }
             }
         } else {
-            int lanes = get_random_divisor(rng, t.lanes());
+            int lanes = get_random_divisor(t.lanes());
 
-            if (rng() & 1) {
-                auto e1 = random_leaf(rng, t.with_lanes(t.lanes() / lanes), overflow_undef);
-                auto e2 = random_leaf(rng, t.with_lanes(t.lanes() / lanes), overflow_undef);
+            if (fuzz.ConsumeBool()) {
+                auto e1 = random_leaf(t.with_lanes(t.lanes() / lanes), overflow_undef);
+                auto e2 = random_leaf(t.with_lanes(t.lanes() / lanes), overflow_undef);
                 return Ramp::make(e1, e2, lanes);
             } else {
-                auto e1 = random_leaf(rng, t.with_lanes(t.lanes() / lanes), overflow_undef);
+                auto e1 = random_leaf(t.with_lanes(t.lanes() / lanes), overflow_undef);
                 return Broadcast::make(e1, lanes);
             }
         }
     }
 
-    // Expr random_expr(RandomEngine &rng, Type t, int depth, bool overflow_undef = false);
+    // Expr random_expr( Type t, int depth, bool overflow_undef = false);
 
-    Expr random_condition(RandomEngine &rng, Type t, int depth, bool maybe_scalar) {
+    Expr random_condition(Type t, int depth, bool maybe_scalar) {
         static make_bin_op_fn make_bin_op[] = {
             EQ::make,
             NE::make,
@@ -156,22 +153,22 @@ class RandomExpressionGenerator {
             GE::make,
         };
 
-        if (maybe_scalar && (rng() & 1)) {
+        if (maybe_scalar && fuzz.ConsumeBool()) {
             t = t.element_of();
         }
 
-        Expr a = random_expr(rng, t, depth);
-        Expr b = random_expr(rng, t, depth);
-        return random_choice(rng, make_bin_op)(a, b);
+        Expr a = random_expr(t, depth);
+        Expr b = random_expr(t, depth);
+        return fuzz.PickValueInArray(make_bin_op)(a, b);
     }
 
-    Expr random_expr(RandomEngine &rng, Type t, int depth, bool overflow_undef = false) {
+    Expr random_expr(Type t, int depth, bool overflow_undef = false) {
         if (t.is_int() && t.bits() == 32) {
             overflow_undef = true;
         }
 
         if (depth-- <= 0) {
-            return random_leaf(rng, t, overflow_undef);
+            return random_leaf(t, overflow_undef);
         }
 
         // Weight the choices to cover all Deinterleaver visit methods:
@@ -181,12 +178,12 @@ class RandomExpressionGenerator {
 
         // Leaf
         ops.push_back([&]() -> Expr {
-            return random_leaf(rng, t);
+            return random_leaf(t);
         });
 
         if (gen_arithmetic) {
             // Arithmetic
-            ops.push_back([&]() {
+            ops.push_back([&] {
                 static make_bin_op_fn make_bin_op[] = {
                     // Arithmetic operations.
                     Add::make,
@@ -198,14 +195,14 @@ class RandomExpressionGenerator {
                     Mod::make,
                     make_absd,
                     make_abs};
-                Expr a = random_expr(rng, t, depth, overflow_undef);
-                Expr b = random_expr(rng, t, depth, overflow_undef);
-                return random_choice(rng, make_bin_op)(a, b);
+                Expr a = random_expr(t, depth, overflow_undef);
+                Expr b = random_expr(t, depth, overflow_undef);
+                return fuzz.PickValueInArray(make_bin_op)(a, b);
             });
         }
         if (gen_bitwise) {
             // Bitwise
-            ops.push_back([&]() {
+            ops.push_back([&] {
                 static make_bin_op_fn make_bin_op[] = {
                     make_bitwise_or,
                     make_bitwise_and,
@@ -214,14 +211,14 @@ class RandomExpressionGenerator {
                     make_shift_right,  // No shift left or we just keep testing integer overflow
                 };
 
-                Expr a = random_expr(rng, t, depth, overflow_undef);
-                Expr b = random_expr(rng, t, depth, overflow_undef);
-                return random_choice(rng, make_bin_op)(a, b);
+                Expr a = random_expr(t, depth, overflow_undef);
+                Expr b = random_expr(t, depth, overflow_undef);
+                return fuzz.PickValueInArray(make_bin_op)(a, b);
             });
         }
         if (gen_bool_ops) {
             // Boolean ops
-            ops.push_back([&]() {
+            ops.push_back([&] {
                 static make_bin_op_fn make_bin_op[] = {
                     And::make,
                     Or::make,
@@ -229,27 +226,27 @@ class RandomExpressionGenerator {
 
                 // Boolean operations -- both sides must be cast to booleans,
                 // and then we must cast the result back to 't'.
-                Expr a = random_expr(rng, t, depth, overflow_undef);
-                Expr b = random_expr(rng, t, depth, overflow_undef);
+                Expr a = random_expr(t, depth, overflow_undef);
+                Expr b = random_expr(t, depth, overflow_undef);
                 Type bool_with_lanes = Bool(t.lanes());
                 a = cast(bool_with_lanes, a);
                 b = cast(bool_with_lanes, b);
-                return cast(t, random_choice(rng, make_bin_op)(a, b));
+                return cast(t, fuzz.PickValueInArray(make_bin_op)(a, b));
             });
         }
         if (gen_select) {
             // Select
             ops.push_back(
                 [&]() -> Expr {
-                    auto c = random_condition(rng, t, depth, true);
-                    auto e1 = random_expr(rng, t, depth, overflow_undef);
-                    auto e2 = random_expr(rng, t, depth, overflow_undef);
+                    auto c = random_condition(t, depth, true);
+                    auto e1 = random_expr(t, depth, overflow_undef);
+                    auto e2 = random_expr(t, depth, overflow_undef);
                     return select(c, e1, e2);
                 });
         }
         // Cast
         if (gen_cast) {
-            ops.push_back([&]() {
+            ops.emplace_back([&] {
                 // Get a random type that isn't `t` or int32 (int32 can overflow, and we don't care about that).
                 std::vector<Type> subtypes;
                 for (const Type &subtype : fuzz_types) {
@@ -257,8 +254,8 @@ class RandomExpressionGenerator {
                         subtypes.push_back(subtype);
                     }
                 }
-                Type subtype = random_choice(rng, subtypes).with_lanes(t.lanes());
-                return Cast::make(t, random_expr(rng, subtype, depth, overflow_undef));
+                Type subtype = fuzz.PickValueInVector(subtypes).with_lanes(t.lanes());
+                return Cast::make(t, random_expr(subtype, depth, overflow_undef));
             });
         }
         if (gen_reinterpret) {
@@ -275,10 +272,10 @@ class RandomExpressionGenerator {
                 }
                 // Should at least be able to preserve the existing bit width and change signedness.
                 internal_assert(!valid_widths.empty());
-                int other_bits = random_choice(rng, valid_widths);
+                int other_bits = fuzz.PickValueInVector(valid_widths);
                 int other_lanes = total_bits / other_bits;
-                Type other = ((rng() & 1) ? Int(other_bits) : UInt(other_bits)).with_lanes(other_lanes);
-                Expr e = random_expr(rng, other, depth);
+                Type other = (fuzz.ConsumeBool() ? Int(other_bits) : UInt(other_bits)).with_lanes(other_lanes);
+                Expr e = random_expr(other, depth);
                 return Reinterpret::make(t, e);
             });
         }
@@ -287,40 +284,40 @@ class RandomExpressionGenerator {
             // Broadcast of vector
             ops.push_back([&]() -> Expr {
                 if (t.lanes() != 1) {
-                    int lanes = get_random_divisor(rng, t.lanes());
-                    auto e1 = random_expr(rng, t.with_lanes(t.lanes() / lanes), depth, overflow_undef);
+                    int lanes = get_random_divisor(t.lanes());
+                    auto e1 = random_expr(t.with_lanes(t.lanes() / lanes), depth, overflow_undef);
                     return Broadcast::make(e1, lanes);
                 }
-                return random_expr(rng, t, depth, overflow_undef);
+                return random_expr(t, depth, overflow_undef);
             });
         }
 
         if (gen_ramp_of_vector) {
             // Ramp
-            ops.push_back([&]() {
+            ops.push_back([&] {
                 if (t.lanes() != 1) {
-                    int lanes = get_random_divisor(rng, t.lanes());
-                    auto e1 = random_expr(rng, t.with_lanes(t.lanes() / lanes), depth, overflow_undef);
-                    auto e2 = random_expr(rng, t.with_lanes(t.lanes() / lanes), depth, overflow_undef);
+                    int lanes = get_random_divisor(t.lanes());
+                    auto e1 = random_expr(t.with_lanes(t.lanes() / lanes), depth, overflow_undef);
+                    auto e2 = random_expr(t.with_lanes(t.lanes() / lanes), depth, overflow_undef);
                     return Ramp::make(e1, e2, lanes);
                 }
-                return random_expr(rng, t, depth, overflow_undef);
+                return random_expr(t, depth, overflow_undef);
             });
         }
         if (gen_bool_ops) {
-            ops.push_back([&]() {
+            ops.push_back([&] {
                 if (t.is_bool()) {
-                    auto e1 = random_expr(rng, t, depth);
+                    auto e1 = random_expr(t, depth);
                     return Not::make(e1);
                 }
-                return random_expr(rng, t, depth, overflow_undef);
+                return random_expr(t, depth, overflow_undef);
             });
-            ops.push_back([&]() {
+            ops.push_back([&] {
                 // When generating boolean expressions, maybe throw in a condition on non-bool types.
                 if (t.is_bool()) {
-                    return random_condition(rng, random_type(rng, t.lanes()), depth, false);
+                    return random_condition(random_type(t.lanes()), depth, false);
                 }
-                return random_expr(rng, t, depth, overflow_undef);
+                return random_expr(t, depth, overflow_undef);
             });
         }
         if (gen_shuffles) {
@@ -328,34 +325,34 @@ class RandomExpressionGenerator {
             ops.push_back([&]() -> Expr {
                 if (t.lanes() >= 4 && t.lanes() % 2 == 0) {
                     int half = t.lanes() / 2;
-                    Expr a = random_expr(rng, t.with_lanes(half), depth);
-                    Expr b = random_expr(rng, t.with_lanes(half), depth);
+                    Expr a = random_expr(t.with_lanes(half), depth);
+                    Expr b = random_expr(t.with_lanes(half), depth);
                     return Shuffle::make_interleave({a, b});
                 }
                 // Fall back to a simple expression
-                return random_expr(rng, t, depth);
+                return random_expr(t, depth);
             });
             // Shuffle (concat)
             ops.push_back([&]() -> Expr {
                 if (t.lanes() >= 4 && t.lanes() % 2 == 0) {
                     int half = t.lanes() / 2;
-                    Expr a = random_expr(rng, t.with_lanes(half), depth);
-                    Expr b = random_expr(rng, t.with_lanes(half), depth);
+                    Expr a = random_expr(t.with_lanes(half), depth);
+                    Expr b = random_expr(t.with_lanes(half), depth);
                     return Shuffle::make_concat({a, b});
                 }
-                return random_expr(rng, t, depth);
+                return random_expr(t, depth);
             });
             // Shuffle (slice)
             ops.push_back([&]() -> Expr {
                 // Make a wider vector and slice it
                 if (t.lanes() <= 8) {
                     int wider = t.lanes() * 2;
-                    Expr e = random_expr(rng, t.with_lanes(wider), depth);
+                    Expr e = random_expr(t.with_lanes(wider), depth);
                     // Slice: take every other element starting at 0 or 1
-                    int start = rng() & 1;
+                    int start = fuzz.ConsumeIntegralInRange(0, 1);
                     return Shuffle::make_slice(e, start, 2, t.lanes());
                 }
-                return random_expr(rng, t, depth);
+                return random_expr(t, depth);
             });
         }
         if (gen_vector_reduce) {
@@ -363,7 +360,7 @@ class RandomExpressionGenerator {
             ops.push_back([&]() -> Expr {
                 // Input has more lanes, output has t.lanes() lanes
                 // factor must divide input lanes, and input lanes = t.lanes() * factor
-                int factor = (rng() % 3) + 2;
+                int factor = fuzz.ConsumeIntegralInRange(2, 4);
                 int input_lanes = t.lanes() * factor;
                 if (input_lanes <= 32) {
                     VectorReduce::Operator ops[] = {
@@ -371,16 +368,16 @@ class RandomExpressionGenerator {
                         VectorReduce::Min,
                         VectorReduce::Max,
                     };
-                    auto op = random_choice(rng, ops);
-                    Expr val = random_expr(rng, t.with_lanes(input_lanes), depth);
+                    auto op = fuzz.PickValueInArray(ops);
+                    Expr val = random_expr(t.with_lanes(input_lanes), depth);
                     internal_assert(val.type().lanes() == input_lanes) << val;
                     return VectorReduce::make(op, val, t.lanes());
                 }
-                return random_expr(rng, t, depth);
+                return random_expr(t, depth);
             });
         }
 
-        Expr e = random_choice(rng, ops)();
+        Expr e = fuzz.PickValueInVector(ops)();
         internal_assert(e.type() == t) << e.type() << " " << t << " " << e;
         return e;
     }
diff --git a/test/fuzz/simplify.cpp b/test/fuzz/simplify.cpp
index 52f9354d0f35..93513e2d1868 100644
--- a/test/fuzz/simplify.cpp
+++ b/test/fuzz/simplify.cpp
@@ -1,8 +1,7 @@
 #include "Halide.h"
-#include <array>
 #include <functional>
-#include <random>
 
+#include "fuzz_helpers.h"
 #include "random_expr_generator.h"
 
 // Test the simplifier in Halide by testing for equivalence of randomly generated expressions.
@@ -13,9 +12,7 @@ using std::string;
 using namespace Halide;
 using namespace Halide::Internal;
 
-using RandomEngine = RandomExpressionGenerator::RandomEngine;
-
-bool test_simplification(Expr a, Expr b, Type t, const map<string, Expr> &vars) {
+bool test_simplification(Expr a, Expr b, const map<string, Expr> &vars) {
     if (equal(a, b) && !a.same_as(b)) {
         std::cerr << "Simplifier created new IR node but made no changes:\n"
                   << a << "\n";
@@ -60,12 +57,12 @@ bool test_simplification(Expr a, Expr b, Type t, const map<string, Expr> &vars)
     return true;
 }
 
-bool test_expression(RandomExpressionGenerator &reg, RandomEngine &rng, Expr test, int samples) {
+bool test_expression(RandomExpressionGenerator &reg, Expr test, int samples) {
     Expr simplified = simplify(test);
 
     map<string, Expr> vars;
-    for (int i = 0; i < (int)reg.fuzz_vars.size(); i++) {
-        vars[reg.fuzz_var(i)] = Expr();
+    for (const auto &fuzz_var : reg.fuzz_vars) {
+        vars[fuzz_var.name()] = Expr();
     }
 
     for (int i = 0; i < samples; i++) {
@@ -74,38 +71,29 @@ bool test_expression(RandomExpressionGenerator &reg, RandomEngine &rng, Expr tes
             // Don't let the random leaf depend on v itself.
             size_t iterations = 0;
             do {
-                val = reg.random_leaf(rng, Int(32), true);
+                val = reg.random_leaf(Int(32), true);
                 iterations++;
             } while (expr_uses_var(val, var) && iterations < kMaxLeafIterations);
         }
 
-        if (!test_simplification(test, simplified, test.type(), vars)) {
+        if (!test_simplification(test, simplified, vars)) {
             return false;
         }
     }
     return true;
 }
 
-template<typename T>
-T initialize_rng() {
-    constexpr size_t kStateWords = T::state_size * sizeof(typename T::result_type) / sizeof(uint32_t);
-    std::vector<uint32_t> random(kStateWords);
-    std::generate(random.begin(), random.end(), std::random_device{});
-    std::seed_seq seed_seq(random.begin(), random.end());
-    return T{seed_seq};
-}
-
 }  // namespace
 
-int main(int argc, char **argv) {
+FUZZ_TEST(simplify, FuzzingContext &fuzz) {
     // Depth of the randomly generated expression trees.
     constexpr int depth = 6;
     // Number of samples to test the generated expressions for.
     constexpr int samples = 3;
+    // Number of samples to test the generated expressions for during minimization.
+    constexpr int samples_during_minimization = 100;
 
-    auto seed_generator = initialize_rng<RandomEngine>();
-
-    RandomExpressionGenerator reg;
+    RandomExpressionGenerator reg{fuzz};
     reg.fuzz_types = {UInt(1), UInt(8), UInt(16), UInt(32), Int(8), Int(16), Int(32)};
     // FIXME: UInt64 fails!
     // FIXME: These need to be disabled (otherwise crashes and/or failures):
@@ -115,83 +103,54 @@ int main(int argc, char **argv) {
     reg.gen_reinterpret = false;
     reg.gen_shuffles = false;
 
-    for (int i = 0; i < ((argc == 1) ? 10000 : 1); i++) {
-        auto seed = seed_generator();
-        if (argc > 1) {
-            std::istringstream{argv[1]} >> seed;
-        }
-        // Print the seed on every iteration so that if the simplifier crashes
-        // (rather than the check failing), we can reproduce.
-        std::cout << "Seed: " << seed << "\n";
-        RandomEngine rng{seed};
-        std::array<int, 6> vector_widths = {1, 2, 3, 4, 6, 8};
-        int width = reg.random_choice(rng, vector_widths);
-        Type VT = reg.random_type(rng, width);
-        // Generate a random expr...
-        Expr test = reg.random_expr(rng, VT, depth);
-        std::cout << test << "\n";
-        if (!test_expression(reg, rng, test, samples)) {
-
-            class LimitDepth : public IRMutator {
-                int limit;
-
-            public:
-                using IRMutator::mutate;
-
-                Expr mutate(const Expr &e) override {
-                    if (limit == 0) {
-                        return simplify(e);
-                    } else {
-                        limit--;
-                        Expr new_e = IRMutator::mutate(e);
-                        limit++;
-                        return new_e;
-                    }
-                }
+    int width = fuzz.PickValueInArray({1, 2, 3, 4, 6, 8});
+    Expr test = reg.random_expr(reg.random_type(width), depth);
+
+    if (!test_expression(reg, test, samples)) {
+        class LimitDepth : public IRMutator {
+            int limit;
+
+        public:
+            using IRMutator::mutate;
 
-                LimitDepth(int l)
-                    : limit(l) {
+            Expr mutate(const Expr &e) override {
+                if (limit == 0) {
+                    return simplify(e);
+                } else {
+                    limit--;
+                    Expr new_e = IRMutator::mutate(e);
+                    limit++;
+                    return new_e;
                 }
-            };
-
-            // Failure. Find the minimal subexpression that failed.
-            std::cout << "Testing subexpressions...\n";
-            class TestSubexpressions : public IRMutator {
-                RandomExpressionGenerator reg;
-                RandomEngine &rng;
-                bool found_failure = false;
-
-            public:
-                using IRMutator::mutate;
-                Expr mutate(const Expr &e) override {
-                    // We know there's a failure here somewhere, so test
-                    // subexpressions more aggressively.
-                    constexpr int samples = 100;
-                    IRMutator::mutate(e);
-                    if (e.type().bits() && !found_failure) {
-                        Expr limited;
-                        for (int i = 1; i < 4 && !found_failure; i++) {
-                            limited = LimitDepth(i).mutate(e);
-                            found_failure = !test_expression(reg, rng, limited, samples);
-                        }
-                        if (!found_failure) {
-                            found_failure = !test_expression(reg, rng, e, samples);
-                        }
+            }
+
+            LimitDepth(int l)
+                : limit(l) {
+            }
+        };
+
+        // Failure. Find the minimal subexpression that failed.
+        std::cerr << "Testing subexpressions...\n";
+        bool found_failure = false;
+        test = mutate_with(test, [&](auto *self, const Expr &e) {
+            self->mutate(e);
+            if (e.type().bits() && !found_failure) {
+                for (int i = 1; i < 4 && !found_failure; i++) {
+                    Expr limited = LimitDepth(i).mutate(e);
+                    found_failure = !test_expression(reg, limited, samples_during_minimization);
+                    if (found_failure) {
+                        return limited;
                     }
-                    return e;
                 }
-
-                TestSubexpressions(RandomExpressionGenerator &reg, RandomEngine &rng)
-                    : reg(reg), rng(rng) {
+                if (!found_failure) {
+                    found_failure = !test_expression(reg, e, samples_during_minimization);
                 }
-            } tester(reg, rng);
-            tester.mutate(test);
-
-            std::cout << "Failed with seed " << seed << "\n";
-            return 1;
-        }
+            }
+            return e;
+        });
+        std::cerr << "Final test case: " << test << "\n";
+        return 1;
     }
 
-    std::cout << "Success!\n";
     return 0;
 }

From 52d1997642a75144644e5fcee7d6c4fb0d9debe0 Mon Sep 17 00:00:00 2001
From: Alex Reinking <areinking@adobe.com>
Date: Tue, 17 Mar 2026 08:05:52 -0400
Subject: [PATCH 06/12] Replace LimitDepth visitor with simplify_at_depth

---
 test/fuzz/simplify.cpp | 36 +++++++++++++-----------------------
 1 file changed, 13 insertions(+), 23 deletions(-)

diff --git a/test/fuzz/simplify.cpp b/test/fuzz/simplify.cpp
index 93513e2d1868..2f2ccd28b659 100644
--- a/test/fuzz/simplify.cpp
+++ b/test/fuzz/simplify.cpp
@@ -83,6 +83,18 @@ bool test_expression(RandomExpressionGenerator &reg, Expr test, int samples) {
     return true;
 }
 
+Expr simplify_at_depth(int limit, const Expr &in) {
+    return mutate_with(in, [&](auto *self, const Expr &e) {
+        if (limit == 0) {
+            return simplify(e);
+        }
+        limit--;
+        Expr new_e = self->mutate(e);
+        limit++;
+        return new_e;
+    });
+}
+
 }  // namespace
 
 FUZZ_TEST(simplify, FuzzingContext &fuzz) {
@@ -107,28 +119,6 @@ FUZZ_TEST(simplify, FuzzingContext &fuzz) {
     Expr test = reg.random_expr(reg.random_type(width), depth);
 
     if (!test_expression(reg, test, samples)) {
-        class LimitDepth : public IRMutator {
-            int limit;
-
-        public:
-            using IRMutator::mutate;
-
-            Expr mutate(const Expr &e) override {
-                if (limit == 0) {
-                    return simplify(e);
-                } else {
-                    limit--;
-                    Expr new_e = IRMutator::mutate(e);
-                    limit++;
-                    return new_e;
-                }
-            }
-
-            LimitDepth(int l)
-                : limit(l) {
-            }
-        };
-
         // Failure. Find the minimal subexpression that failed.
         std::cerr << "Testing subexpressions...\n";
         bool found_failure = false;
@@ -136,7 +126,7 @@ FUZZ_TEST(simplify, FuzzingContext &fuzz) {
             self->mutate(e);
             if (e.type().bits() && !found_failure) {
                 for (int i = 1; i < 4 && !found_failure; i++) {
-                    Expr limited = LimitDepth(i).mutate(e);
+                    Expr limited = simplify_at_depth(i, e);
                     found_failure = !test_expression(reg, limited, samples_during_minimization);
                     if (found_failure) {
                         return limited;

From c58a91c7434b46df830b1fccbfffe9d6b5156a1b Mon Sep 17 00:00:00 2001
From: Alex Reinking <areinking@adobe.com>
Date: Tue, 17 Mar 2026 11:24:26 -0400
Subject: [PATCH 07/12] Port lossless_cast to the RandomExpressionGenerator

---
 test/fuzz/lossless_cast.cpp       | 178 +++++-------------------------
 test/fuzz/random_expr_generator.h | 136 +++++++++++++++++------
 test/fuzz/simplify.cpp            |  18 ++-
 3 files changed, 142 insertions(+), 190 deletions(-)

diff --git a/test/fuzz/lossless_cast.cpp b/test/fuzz/lossless_cast.cpp
index 01ef1c38e8d7..ed4330b76f38 100644
--- a/test/fuzz/lossless_cast.cpp
+++ b/test/fuzz/lossless_cast.cpp
@@ -1,4 +1,5 @@
 #include "fuzz_helpers.h"
+#include "random_expr_generator.h"
 #include <Halide.h>
 
 using namespace Halide;
@@ -6,155 +7,6 @@ using namespace Halide::Internal;
 
 namespace {
 
-constexpr int size = 1024;
-Buffer<uint8_t> buf_u8(size, "buf_u8");
-Buffer<int8_t> buf_i8(size, "buf_i8");
-Var x{"x"};
-
-Expr random_expr(FuzzingContext &fuzz) {
-    std::vector<Expr> exprs;
-    // Add some atoms
-    exprs.push_back(cast<uint8_t>(fuzz.ConsumeIntegral<uint8_t>()));
-    exprs.push_back(cast<int8_t>(fuzz.ConsumeIntegral<int8_t>()));
-    exprs.push_back(cast<uint8_t>(fuzz.ConsumeIntegral<uint8_t>()));
-    exprs.push_back(cast<int8_t>(fuzz.ConsumeIntegral<int8_t>()));
-    exprs.push_back(buf_u8(x));
-    exprs.push_back(buf_i8(x));
-
-    // Make random combinations of them
-    while (true) {
-        Expr e;
-        Expr e1 = fuzz.PickValueInVector(exprs);
-
-        Expr e2 = fuzz.PickValueInVector(exprs);
-        Expr e2_narrow = e2;
-        e2 = cast(e1.type(), e2);
-
-        Expr e3 = cast(e1.type().with_code(halide_type_uint), fuzz.PickValueInVector(exprs));
-        bool may_widen = e1.type().bits() < 64;
-        bool may_widen_right = e2_narrow.type() == e1.type().narrow();
-        switch (fuzz.ConsumeIntegralInRange(0, 7)) {
-        case 0:
-            if (may_widen) {
-                e = cast(e1.type().widen(), e1);
-            }
-            break;
-        case 1:
-            if (may_widen) {
-                e = cast(Int(e1.type().bits() * 2), e1);
-            }
-            break;
-        case 2:
-            e = e1 + e2;
-            break;
-        case 3:
-            e = e1 - e2;
-            break;
-        case 4:
-            e = e1 * e2;
-            break;
-        case 5:
-            e = e1 / e2;
-            break;
-        case 6:
-            // Introduce some lets
-            e = common_subexpression_elimination(e1);
-            break;
-        case 7:
-            switch (fuzz.ConsumeIntegralInRange(0, 19)) {
-            case 0:
-                if (may_widen) {
-                    e = widening_add(e1, e2);
-                }
-                break;
-            case 1:
-                if (may_widen) {
-                    e = widening_sub(e1, e2);
-                }
-                break;
-            case 2:
-                if (may_widen) {
-                    e = widening_mul(e1, e2);
-                }
-                break;
-            case 3:
-                e = halving_add(e1, e2);
-                break;
-            case 4:
-                e = rounding_halving_add(e1, e2);
-                break;
-            case 5:
-                e = halving_sub(e1, e2);
-                break;
-            case 6:
-                e = saturating_add(e1, e2);
-                break;
-            case 7:
-                e = saturating_sub(e1, e2);
-                break;
-            case 8:
-                e = count_leading_zeros(e1);
-                break;
-            case 9:
-                e = count_trailing_zeros(e1);
-                break;
-            case 10:
-                if (may_widen) {
-                    e = rounding_mul_shift_right(e1, e2, e3);
-                }
-                break;
-            case 11:
-                if (may_widen) {
-                    e = mul_shift_right(e1, e2, e3);
-                }
-                break;
-            case 12:
-                if (may_widen_right) {
-                    e = widen_right_add(e1, e2_narrow);
-                }
-                break;
-            case 13:
-                if (may_widen_right) {
-                    e = widen_right_sub(e1, e2_narrow);
-                }
-                break;
-            case 14:
-                if (may_widen_right) {
-                    e = widen_right_mul(e1, e2_narrow);
-                }
-                break;
-            case 15:
-                e = e1 << e2;
-                break;
-            case 16:
-                e = e1 >> e2;
-                break;
-            case 17:
-                e = rounding_shift_right(e1, e2);
-                break;
-            case 18:
-                e = rounding_shift_left(e1, e2);
-                break;
-            case 19:
-                e = ~e1;
-                break;
-            }
-        }
-
-        if (!e.defined()) {
-            continue;
-        }
-
-        // Stop when we get to 64 bits, but probably don't stop on a cast,
-        // because that'll just get trivially stripped.
-        if (e.type().bits() == 64 && (e.as<Cast>() == nullptr || fuzz.ConsumeIntegralInRange(0, 7) == 0)) {
-            return e;
-        }
-
-        exprs.push_back(e);
-    }
-}
-
 bool definitely_has_ub(Expr e) {
     e = simplify(e);
 
@@ -264,10 +116,36 @@ bool might_have_ub(Expr e) {
 }  // namespace
 
 FUZZ_TEST(lossless_cast, FuzzingContext &fuzz) {
+    constexpr int size = 1024;
+    Buffer<uint8_t> buf_u8(size, "buf_u8");
+    Buffer<int8_t> buf_i8(size, "buf_i8");
+    Var x{"x"};
+
     buf_u8.fill(fuzz);
     buf_i8.fill(fuzz);
 
-    Expr e1 = random_expr(fuzz);
+    RandomExpressionGenerator reg{
+        fuzz,
+        {
+            buf_u8(x),
+            buf_i8(x),
+            cast<uint8_t>(fuzz.ConsumeIntegral<uint8_t>()),
+            cast<int8_t>(fuzz.ConsumeIntegral<int8_t>()),
+            cast<uint8_t>(fuzz.ConsumeIntegral<uint8_t>()),
+            cast<int8_t>(fuzz.ConsumeIntegral<int8_t>()),
+        }};
+    // Scalar integer types only, no bool. TODO: Int64 fails
+    reg.fuzz_types = {UInt(8), UInt(16), UInt(32), Int(8), Int(16), Int(32)};
+    // Scalar only, disable vector-specific operations
+    reg.gen_broadcast_of_vector = false;
+    reg.gen_ramp_of_vector = false;
+    reg.gen_shuffles = false;
+    reg.gen_vector_reduce = false;
+    reg.gen_reinterpret = false;
+
+    constexpr int depth = 5;
+    Expr e1 = reg.random_expr(reg.random_type(), depth);
+
     Expr simplified = simplify(e1);
 
     if (might_have_ub(e1) ||
diff --git a/test/fuzz/random_expr_generator.h b/test/fuzz/random_expr_generator.h
index abb10351260f..4926736e0339 100644
--- a/test/fuzz/random_expr_generator.h
+++ b/test/fuzz/random_expr_generator.h
@@ -17,31 +17,28 @@ class RandomExpressionGenerator {
 public:
     using make_bin_op_fn = Expr (*)(Expr, Expr);
 
-    bool gen_cast = true;
-    bool gen_select = true;
+    // keep-sorted start
     bool gen_arithmetic = true;
     bool gen_bitwise = true;
     bool gen_bool_ops = true;
-    bool gen_reinterpret = true;
     bool gen_broadcast_of_vector = true;
+    bool gen_cast = true;
+    bool gen_cse = true;
+    bool gen_intrinsics = true;
     bool gen_ramp_of_vector = true;
+    bool gen_reinterpret = true;
+    bool gen_select = true;
     bool gen_shuffles = true;
     bool gen_vector_reduce = true;
+    // keep-sorted end
 
     FuzzingContext &fuzz;
 
     std::vector<Type> fuzz_types = {UInt(1), UInt(8), UInt(16), UInt(32), UInt(64), Int(8), Int(16), Int(32), Int(64)};
-    std::vector<Param<int>> fuzz_vars;
+    std::vector<Expr> atoms;
 
-    explicit RandomExpressionGenerator(FuzzingContext &fuzz)
-        : fuzz(fuzz) {
-        for (int i = 0; i < 5; i++) {
-            fuzz_vars.emplace_back("a" + std::to_string(i));
-        }
-    }
-
-    Type random_scalar_type() {
-        return fuzz.PickValueInVector(fuzz_types);
+    explicit RandomExpressionGenerator(FuzzingContext &fuzz, std::vector<Expr> atoms)
+        : fuzz(fuzz), atoms(std::move(atoms)) {
     }
 
     int get_random_divisor(int x) {
@@ -56,11 +53,11 @@ class RandomExpressionGenerator {
     }
 
     Expr random_var(Type t) {
-        return cast(t, fuzz.PickValueInVector(fuzz_vars));
+        return cast(t, fuzz.PickValueInVector(atoms));
     }
 
-    Type random_type(int width) {
-        Type t = random_scalar_type();
+    Type random_type(int width = 1) {
+        Type t = fuzz.PickValueInVector(fuzz_types);
         if (width > 1) {
             t = t.with_lanes(width);
         }
@@ -177,13 +174,13 @@ class RandomExpressionGenerator {
         std::vector<std::function<Expr()>> ops;
 
         // Leaf
-        ops.push_back([&]() -> Expr {
+        ops.emplace_back([&] {
             return random_leaf(t);
         });
 
         if (gen_arithmetic) {
             // Arithmetic
-            ops.push_back([&] {
+            ops.emplace_back([&] {
                 static make_bin_op_fn make_bin_op[] = {
                     // Arithmetic operations.
                     Add::make,
@@ -202,7 +199,7 @@ class RandomExpressionGenerator {
         }
         if (gen_bitwise) {
             // Bitwise
-            ops.push_back([&] {
+            ops.emplace_back([&] {
                 static make_bin_op_fn make_bin_op[] = {
                     make_bitwise_or,
                     make_bitwise_and,
@@ -218,7 +215,7 @@ class RandomExpressionGenerator {
         }
         if (gen_bool_ops) {
             // Boolean ops
-            ops.push_back([&] {
+            ops.emplace_back([&] {
                 static make_bin_op_fn make_bin_op[] = {
                     And::make,
                     Or::make,
@@ -236,13 +233,12 @@ class RandomExpressionGenerator {
         }
         if (gen_select) {
             // Select
-            ops.push_back(
-                [&]() -> Expr {
-                    auto c = random_condition(t, depth, true);
-                    auto e1 = random_expr(t, depth, overflow_undef);
-                    auto e2 = random_expr(t, depth, overflow_undef);
-                    return select(c, e1, e2);
-                });
+            ops.emplace_back([&] {
+                auto c = random_condition(t, depth, true);
+                auto e1 = random_expr(t, depth, overflow_undef);
+                auto e2 = random_expr(t, depth, overflow_undef);
+                return select(c, e1, e2);
+            });
         }
         // Cast
         if (gen_cast) {
@@ -260,7 +256,7 @@ class RandomExpressionGenerator {
         }
         if (gen_reinterpret) {
             // Reinterpret (different bit width, changes lane count)
-            ops.push_back([&]() -> Expr {
+            ops.emplace_back([&] {
                 int total_bits = t.bits() * t.lanes();
                 // Pick a different bit width that divides the total bits evenly
                 int bit_widths[] = {8, 16, 32, 64};
@@ -282,7 +278,7 @@ class RandomExpressionGenerator {
 
         if (gen_broadcast_of_vector) {
             // Broadcast of vector
-            ops.push_back([&]() -> Expr {
+            ops.emplace_back([&] {
                 if (t.lanes() != 1) {
                     int lanes = get_random_divisor(t.lanes());
                     auto e1 = random_expr(t.with_lanes(t.lanes() / lanes), depth, overflow_undef);
@@ -294,7 +290,7 @@ class RandomExpressionGenerator {
 
         if (gen_ramp_of_vector) {
             // Ramp
-            ops.push_back([&] {
+            ops.emplace_back([&] {
                 if (t.lanes() != 1) {
                     int lanes = get_random_divisor(t.lanes());
                     auto e1 = random_expr(t.with_lanes(t.lanes() / lanes), depth, overflow_undef);
@@ -305,14 +301,14 @@ class RandomExpressionGenerator {
             });
         }
         if (gen_bool_ops) {
-            ops.push_back([&] {
+            ops.emplace_back([&] {
                 if (t.is_bool()) {
                     auto e1 = random_expr(t, depth);
                     return Not::make(e1);
                 }
                 return random_expr(t, depth, overflow_undef);
             });
-            ops.push_back([&] {
+            ops.emplace_back([&] {
                 // When generating boolean expressions, maybe throw in a condition on non-bool types.
                 if (t.is_bool()) {
                     return random_condition(random_type(t.lanes()), depth, false);
@@ -322,7 +318,7 @@ class RandomExpressionGenerator {
         }
         if (gen_shuffles) {
             // Shuffle (interleave)
-            ops.push_back([&]() -> Expr {
+            ops.emplace_back([&] {
                 if (t.lanes() >= 4 && t.lanes() % 2 == 0) {
                     int half = t.lanes() / 2;
                     Expr a = random_expr(t.with_lanes(half), depth);
@@ -333,7 +329,7 @@ class RandomExpressionGenerator {
                 return random_expr(t, depth);
             });
             // Shuffle (concat)
-            ops.push_back([&]() -> Expr {
+            ops.emplace_back([&] {
                 if (t.lanes() >= 4 && t.lanes() % 2 == 0) {
                     int half = t.lanes() / 2;
                     Expr a = random_expr(t.with_lanes(half), depth);
@@ -343,7 +339,7 @@ class RandomExpressionGenerator {
                 return random_expr(t, depth);
             });
             // Shuffle (slice)
-            ops.push_back([&]() -> Expr {
+            ops.emplace_back([&] {
                 // Make a wider vector and slice it
                 if (t.lanes() <= 8) {
                     int wider = t.lanes() * 2;
@@ -357,7 +353,7 @@ class RandomExpressionGenerator {
         }
         if (gen_vector_reduce) {
             // VectorReduce (only when we can make it work with lane counts)
-            ops.push_back([&]() -> Expr {
+            ops.emplace_back([&] {
                 // Input has more lanes, output has t.lanes() lanes
                 // factor must divide input lanes, and input lanes = t.lanes() * factor
                 int factor = fuzz.ConsumeIntegralInRange(2, 4);
@@ -376,6 +372,74 @@ class RandomExpressionGenerator {
                 return random_expr(t, depth);
             });
         }
+        if (gen_intrinsics && t.bits() >= 8) {
+            // Fixed-point and intrinsic operations (from lossless_cast fuzzer)
+            ops.emplace_back([&] {
+                bool may_widen = t.bits() < 32;  // TODO: uint64 is broken
+                bool has_narrow = t.bits() >= 16;
+                Type nt = has_narrow ? t.narrow() : t;
+
+                std::vector<std::function<Expr()>> choices;
+
+                // Halving ops
+                choices.emplace_back([&] { return halving_add(random_expr(t, depth, overflow_undef), random_expr(t, depth, overflow_undef)); });
+                choices.emplace_back([&] { return rounding_halving_add(random_expr(t, depth, overflow_undef), random_expr(t, depth, overflow_undef)); });
+                choices.emplace_back([&] { return halving_sub(random_expr(t, depth, overflow_undef), random_expr(t, depth, overflow_undef)); });
+
+                // Saturating ops
+                choices.emplace_back([&] { return saturating_add(random_expr(t, depth, overflow_undef), random_expr(t, depth, overflow_undef)); });
+                choices.emplace_back([&] { return saturating_sub(random_expr(t, depth, overflow_undef), random_expr(t, depth, overflow_undef)); });
+
+                // Count ops
+                choices.emplace_back([&] { return count_leading_zeros(random_expr(t, depth, overflow_undef)); });
+                choices.emplace_back([&] { return count_trailing_zeros(random_expr(t, depth, overflow_undef)); });
+
+                // Rounding shift ops
+                choices.emplace_back([&] { return rounding_shift_right(random_expr(t, depth, overflow_undef), random_expr(t, depth, overflow_undef)); });
+                choices.emplace_back([&] { return rounding_shift_left(random_expr(t, depth, overflow_undef), random_expr(t, depth, overflow_undef)); });
+
+                // Widening ops: inputs are t.narrow(), output is t
+                if (has_narrow) {
+                    choices.emplace_back([&] { return widening_add(random_expr(nt, depth, overflow_undef), random_expr(nt, depth, overflow_undef)); });
+                    choices.emplace_back([&] { return widening_mul(random_expr(nt, depth, overflow_undef), random_expr(nt, depth, overflow_undef)); });
+                }
+
+                // Widening sub always returns signed
+                if (has_narrow && t.is_int()) {
+                    choices.emplace_back([&] { return widening_sub(random_expr(nt, depth, overflow_undef), random_expr(nt, depth, overflow_undef)); });
+                }
+
+                // Widen-right ops: a is type t, b is type t.narrow(), output is type t
+                if (has_narrow) {
+                    choices.emplace_back([&] { return widen_right_add(random_expr(t, depth, overflow_undef), random_expr(nt, depth, overflow_undef)); });
+                    choices.emplace_back([&] { return widen_right_sub(random_expr(t, depth, overflow_undef), random_expr(nt, depth, overflow_undef)); });
+                    choices.emplace_back([&] { return widen_right_mul(random_expr(t, depth, overflow_undef), random_expr(nt, depth, overflow_undef)); });
+                }
+
+                // mul_shift_right / rounding_mul_shift_right
+                if (may_widen) {
+                    choices.emplace_back([&] {
+                        Expr a = random_expr(t, depth, overflow_undef);
+                        Expr b = random_expr(t, depth, overflow_undef);
+                        Expr c = cast(t.with_code(halide_type_uint), random_expr(t, depth, overflow_undef));
+                        return mul_shift_right(a, b, c);
+                    });
+                    choices.emplace_back([&] {
+                        Expr a = random_expr(t, depth, overflow_undef);
+                        Expr b = random_expr(t, depth, overflow_undef);
+                        Expr c = cast(t.with_code(halide_type_uint), random_expr(t, depth, overflow_undef));
+                        return rounding_mul_shift_right(a, b, c);
+                    });
+                }
+
+                return fuzz.PickValueInVector(choices)();
+            });
+        }
+        if (gen_cse) {
+            ops.emplace_back([&] {
+                return common_subexpression_elimination(random_expr(t, depth, overflow_undef));
+            });
+        }
 
         Expr e = fuzz.PickValueInVector(ops)();
         internal_assert(e.type() == t) << e.type() << " " << t << " " << e;
diff --git a/test/fuzz/simplify.cpp b/test/fuzz/simplify.cpp
index 2f2ccd28b659..96fa84498f70 100644
--- a/test/fuzz/simplify.cpp
+++ b/test/fuzz/simplify.cpp
@@ -61,8 +61,10 @@ bool test_expression(RandomExpressionGenerator &reg, Expr test, int samples) {
     Expr simplified = simplify(test);
 
     map<string, Expr> vars;
-    for (const auto &fuzz_var : reg.fuzz_vars) {
-        vars[fuzz_var.name()] = Expr();
+    for (const auto &atom : reg.atoms) {
+        if (const Variable *v = atom.as<Variable>()) {
+            vars[v->name] = atom;
+        }
     }
 
     for (int i = 0; i < samples; i++) {
@@ -105,9 +107,17 @@ FUZZ_TEST(simplify, FuzzingContext &fuzz) {
     // Number of samples to test the generated expressions for during minimization.
     constexpr int samples_during_minimization = 100;
 
-    RandomExpressionGenerator reg{fuzz};
-    reg.fuzz_types = {UInt(1), UInt(8), UInt(16), UInt(32), Int(8), Int(16), Int(32)};
+    RandomExpressionGenerator reg{
+        fuzz,
+        {
+            Param<int>("a0"),
+            Param<int>("a1"),
+            Param<int>("a2"),
+            Param<int>("a3"),
+            Param<int>("a4"),
+        }};
     // FIXME: UInt64 fails!
+    reg.fuzz_types = {UInt(1), UInt(8), UInt(16), UInt(32), Int(8), Int(16), Int(32)};
     // FIXME: These need to be disabled (otherwise crashes and/or failures):
     // reg.gen_ramp_of_vector = false;
     // reg.gen_broadcast_of_vector = false;

From 9e86e1007efcfa5c6836a29a4065e29673cf37ab Mon Sep 17 00:00:00 2001
From: Alex Reinking <areinking@adobe.com>
Date: Tue, 17 Mar 2026 12:16:07 -0400
Subject: [PATCH 08/12] Propagate info in Simplify_Cast case.

Co-authored-by: Andrew Adams <anadams@adobe.com>
---
 src/Simplify_Cast.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Simplify_Cast.cpp b/src/Simplify_Cast.cpp
index 8738c26aeb97..6dfe3453b84f 100644
--- a/src/Simplify_Cast.cpp
+++ b/src/Simplify_Cast.cpp
@@ -25,7 +25,7 @@ Expr Simplify::visit(const Cast *op, ExprInfo *info) {
         // It's possible we just reduced to a constant. E.g. if we cast an
         // even number to uint1 we get zero.
         if (value_info.bounds.is_single_point()) {
-            return make_const(op->type, value_info.bounds.min, nullptr);
+            return make_const(op->type, value_info.bounds.min, info);
         }
     }
 

From 5611da7a01675d00ad61c5cd3e61bf710813b509 Mon Sep 17 00:00:00 2001
From: Andrew Adams <andrew.b.adams@gmail.com>
Date: Tue, 17 Mar 2026 09:45:12 -0700
Subject: [PATCH 09/12] Fix accidentally wraparound in cast constant folding

---
 src/Simplify_Cast.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/Simplify_Cast.cpp b/src/Simplify_Cast.cpp
index 6dfe3453b84f..6392e099cd66 100644
--- a/src/Simplify_Cast.cpp
+++ b/src/Simplify_Cast.cpp
@@ -25,7 +25,13 @@ Expr Simplify::visit(const Cast *op, ExprInfo *info) {
         // It's possible we just reduced to a constant. E.g. if we cast an
         // even number to uint1 we get zero.
         if (value_info.bounds.is_single_point()) {
-            return make_const(op->type, value_info.bounds.min, info);
+            if (op->type.is_uint()) {
+                // The single point may be negative before the cast, so make
+                // sure we call the uint64 overload.
+                return make_const(op->type, (uint64_t)value_info.bounds.min, info);
+            } else {
+                return make_const(op->type, value_info.bounds.min, info);
+            }
         }
     }
 

From 91e9d921e35654eb34e1266ede833c865a2eebb7 Mon Sep 17 00:00:00 2001
From: Andrew Adams <andrew.b.adams@gmail.com>
Date: Tue, 17 Mar 2026 13:59:11 -0700
Subject: [PATCH 10/12] Fix idempotence issues for uint64 simplifications

---
 src/Simplify_Add.cpp    | 12 ++++++++++--
 src/Simplify_Cast.cpp   |  8 +-------
 src/Simplify_Exprs.cpp  |  4 +++-
 src/Simplify_Internal.h | 34 ++++++++++++++++++++++++++++++++--
 src/Simplify_Max.cpp    | 17 ++++++++++-------
 src/Simplify_Min.cpp    | 12 +++++++-----
 src/Simplify_Mul.cpp    | 16 ++++++++++++----
 src/Simplify_Select.cpp |  3 ++-
 src/Simplify_Sub.cpp    | 10 +++++++++-
 test/fuzz/simplify.cpp  | 40 ++++++++++++++++++++++++++--------------
 10 files changed, 112 insertions(+), 44 deletions(-)

diff --git a/src/Simplify_Add.cpp b/src/Simplify_Add.cpp
index 6158cc9cd48c..440a154ec521 100644
--- a/src/Simplify_Add.cpp
+++ b/src/Simplify_Add.cpp
@@ -25,8 +25,16 @@ Expr Simplify::visit(const Add *op, ExprInfo *info) {
 
     if (rewrite(IRMatcher::Overflow() + x, a) ||
         rewrite(x + IRMatcher::Overflow(), b) ||
-        rewrite(x + 0, x) ||
-        rewrite(0 + x, x)) {
+        rewrite(x + 0, a) ||
+        rewrite(0 + x, b)) {
+        if (info) {
+            if (rewrite.result.same_as(a)) {
+                info->intersect(a_info);
+            } else {
+                internal_assert(rewrite.result.same_as(b));
+                info->intersect(b_info);
+            }
+        }
         return rewrite.result;
     }
 
diff --git a/src/Simplify_Cast.cpp b/src/Simplify_Cast.cpp
index 6392e099cd66..6dfe3453b84f 100644
--- a/src/Simplify_Cast.cpp
+++ b/src/Simplify_Cast.cpp
@@ -25,13 +25,7 @@ Expr Simplify::visit(const Cast *op, ExprInfo *info) {
         // It's possible we just reduced to a constant. E.g. if we cast an
         // even number to uint1 we get zero.
         if (value_info.bounds.is_single_point()) {
-            if (op->type.is_uint()) {
-                // The single point may be negative before the cast, so make
-                // sure we call the uint64 overload.
-                return make_const(op->type, (uint64_t)value_info.bounds.min, info);
-            } else {
-                return make_const(op->type, value_info.bounds.min, info);
-            }
+            return make_const(op->type, value_info.bounds.min, info);
         }
     }
 
diff --git a/src/Simplify_Exprs.cpp b/src/Simplify_Exprs.cpp
index bbd67a5bace0..8a29a1de6ad4 100644
--- a/src/Simplify_Exprs.cpp
+++ b/src/Simplify_Exprs.cpp
@@ -19,10 +19,12 @@ Expr Simplify::visit(const IntImm *op, ExprInfo *info) {
 }
 
 Expr Simplify::visit(const UIntImm *op, ExprInfo *info) {
-    if (info && Int(64).can_represent(op->value)) {
+    if (info) {
+        // Pretend it's an int constant that has been cast to uint.
         int64_t v = (int64_t)(op->value);
         info->bounds = ConstantInterval::single_point(v);
         info->alignment = ModulusRemainder(0, v);
+        // If it's not representable as an int64, this will wrap it appropriately:
         info->cast_to(op->type);
     } else {
         clear_expr_info(info);
diff --git a/src/Simplify_Internal.h b/src/Simplify_Internal.h
index 7c42f1ece309..d85d0f4d45be 100644
--- a/src/Simplify_Internal.h
+++ b/src/Simplify_Internal.h
@@ -124,8 +124,34 @@ class Simplify : public VariadicVisitor<Simplify, Expr, Stmt> {
                 }
             }
 
-            // Truncate the bounds to the new type.
-            bounds.cast_to(t);
+            // We have to take special care with uint64, because their bounds
+            // and alignment may not be representable with ModulusRemainder and
+            // ConstantInterval.
+            if (t.bits() == 64 && t.is_uint()) {
+                // For UInt64 constants, the remainder might not be representable as an int64
+                if (alignment.modulus == 0 && alignment.remainder < 0) {
+                    // Forget the leading two bits to get a representable modulus
+                    // and remainder.
+                    alignment.modulus = (int64_t)1 << 62;
+                    alignment.remainder = alignment.remainder & (alignment.modulus - 1);
+                }
+
+                int64_t old_min = bounds.min;
+                bounds.cast_to(t);
+                if (bounds.min_defined && old_min > 0) {
+                    // We don't want to lose a known positive min value for
+                    // uint64s. In general a ConstantInterval represents
+                    // infinite-precision integer intervals, and a cast from an infinite
+                    // precision integer to a uint64 could overflow. However, in the
+                    // simplifier, ConstantIntervals are used to represent bounds on the
+                    // values a Halide::Expr could take on, and for all Halide Expr
+                    // types, casting to a uint64_t can't overflow at the top end
+                    // (e.g. double casts to uint64_t saturate).
+                    bounds.min = old_min;
+                }
+            } else {
+                bounds.cast_to(t);
+            }
         }
 
         // Mix in existing knowledge about this Expr
@@ -241,6 +267,10 @@ class Simplify : public VariadicVisitor<Simplify, Expr, Stmt> {
     // We never want to return make_const anything in the simplifier without
     // also setting the ExprInfo, so shadow the global make_const.
     Expr make_const(const Type &t, int64_t c, ExprInfo *info) {
+        if (t.is_uint() && c < 0) {
+            // Wrap it around
+            return make_const(t, (uint64_t)c, info);
+        }
         c = normalize_constant(t, c);
         set_expr_info_to_constant(info, c);
         return Halide::Internal::make_const(t, c);
diff --git a/src/Simplify_Max.cpp b/src/Simplify_Max.cpp
index 1926bc9a069e..db5d676af427 100644
--- a/src/Simplify_Max.cpp
+++ b/src/Simplify_Max.cpp
@@ -21,8 +21,10 @@ Expr Simplify::visit(const Max *op, ExprInfo *info) {
     if (max_info.bounds.is_single_point()) {
         // This is possible when, for example, the largest number in the type
         // that satisfies the alignment of the left-hand-side is smaller than
-        // the min value of the right-hand-side.
-        return make_const(op->type, max_info.bounds.min, nullptr);
+        // the min value of the right-hand-side. Reinferring the info can
+        // potentially give us something tighter than what was computed above if
+        // it's a large uint64.
+        return make_const(op->type, max_info.bounds.min, info);
     }
 
     auto strip_likely = [](const Expr &e) {
@@ -65,10 +67,10 @@ Expr Simplify::visit(const Max *op, ExprInfo *info) {
         return rewrite.result;
     }
 
+    // Cases where one side dominates. All of these must reduce to a or b in the
+    // RHS for ExprInfo to update correctly.
     if (EVAL_IN_LAMBDA  //
         (rewrite(max(x, x), a) ||
-         rewrite(max(c0, c1), fold(max(c0, c1))) ||
-         // Cases where one side dominates:
          rewrite(max(x, c0), b, is_max_value(c0)) ||
          rewrite(max(x, c0), a, is_min_value(c0)) ||
          rewrite(max((x / c0) * c0, x), b, c0 > 0) ||
@@ -148,16 +150,17 @@ Expr Simplify::visit(const Max *op, ExprInfo *info) {
             // than just applying max to two constant intervals.
             if (rewrite.result.same_as(a)) {
                 info->intersect(a_info);
-            } else if (rewrite.result.same_as(b)) {
+            } else {
+                internal_assert(rewrite.result.same_as(b));
                 info->intersect(b_info);
             }
         }
-
         return rewrite.result;
     }
 
     if (EVAL_IN_LAMBDA  //
-        (rewrite(max(max(x, c0), c1), max(x, fold(max(c0, c1)))) ||
+        (rewrite(max(c0, c1), fold(max(c0, c1))) ||
+         rewrite(max(max(x, c0), c1), max(x, fold(max(c0, c1)))) ||
          rewrite(max(max(x, c0), y), max(max(x, y), c0)) ||
          rewrite(max(max(x, y), max(x, z)), max(max(y, z), x)) ||
          rewrite(max(max(y, x), max(x, z)), max(max(y, z), x)) ||
diff --git a/src/Simplify_Min.cpp b/src/Simplify_Min.cpp
index 3f6084c6c4f1..9c44a950818d 100644
--- a/src/Simplify_Min.cpp
+++ b/src/Simplify_Min.cpp
@@ -22,7 +22,7 @@ Expr Simplify::visit(const Min *op, ExprInfo *info) {
         // This is possible when, for example, the smallest number in the type
         // that satisfies the alignment of the left-hand-side is greater than
         // the max value of the right-hand-side.
-        return make_const(op->type, min_info.bounds.min, nullptr);
+        return make_const(op->type, min_info.bounds.min, info);
     }
 
     // Early out when the bounds tells us one side or the other is smaller
@@ -66,10 +66,10 @@ Expr Simplify::visit(const Min *op, ExprInfo *info) {
         return rewrite.result;
     }
 
+    // Cases where one side dominates. All of these must reduce to a or b in the
+    // RHS for ExprInfo to update correctly.
     if (EVAL_IN_LAMBDA  //
         (rewrite(min(x, x), a) ||
-         rewrite(min(c0, c1), fold(min(c0, c1))) ||
-         // Cases where one side dominates:
          rewrite(min(x, c0), b, is_min_value(c0)) ||
          rewrite(min(x, c0), a, is_max_value(c0)) ||
          rewrite(min((x / c0) * c0, x), a, c0 > 0) ||
@@ -148,7 +148,8 @@ Expr Simplify::visit(const Min *op, ExprInfo *info) {
         if (info) {
             if (rewrite.result.same_as(a)) {
                 info->intersect(a_info);
-            } else if (rewrite.result.same_as(b)) {
+            } else {
+                internal_assert(rewrite.result.same_as(b));
                 info->intersect(b_info);
             }
         }
@@ -156,7 +157,8 @@ Expr Simplify::visit(const Min *op, ExprInfo *info) {
     }
 
     if (EVAL_IN_LAMBDA  //
-        (rewrite(min(min(x, c0), c1), min(x, fold(min(c0, c1)))) ||
+        (rewrite(min(c0, c1), fold(min(c0, c1))) ||
+         rewrite(min(min(x, c0), c1), min(x, fold(min(c0, c1)))) ||
          rewrite(min(min(x, c0), y), min(min(x, y), c0)) ||
          rewrite(min(min(x, y), min(x, z)), min(min(y, z), x)) ||
          rewrite(min(min(y, x), min(x, z)), min(min(y, z), x)) ||
diff --git a/src/Simplify_Mul.cpp b/src/Simplify_Mul.cpp
index dfa38d39111c..58032ab79d62 100644
--- a/src/Simplify_Mul.cpp
+++ b/src/Simplify_Mul.cpp
@@ -41,10 +41,18 @@ Expr Simplify::visit(const Mul *op, ExprInfo *info) {
         return rewrite.result;
     }
 
-    if (rewrite(0 * x, 0) ||
-        rewrite(1 * x, x) ||
-        rewrite(x * 0, 0) ||
-        rewrite(x * 1, x)) {
+    if (rewrite(0 * x, a) ||
+        rewrite(1 * x, b) ||
+        rewrite(x * 0, b) ||
+        rewrite(x * 1, a)) {
+        if (info) {
+            if (rewrite.result.same_as(a)) {
+                info->intersect(a_info);
+            } else {
+                internal_assert(rewrite.result.same_as(b));
+                info->intersect(b_info);
+            }
+        }
         return rewrite.result;
     }
 
diff --git a/src/Simplify_Select.cpp b/src/Simplify_Select.cpp
index 3bc4507fc74b..e78d02014eda 100644
--- a/src/Simplify_Select.cpp
+++ b/src/Simplify_Select.cpp
@@ -34,7 +34,8 @@ Expr Simplify::visit(const Select *op, ExprInfo *info) {
         if (info) {
             if (rewrite.result.same_as(true_value)) {
                 *info = t_info;
-            } else if (rewrite.result.same_as(false_value)) {
+            } else {
+                internal_assert(rewrite.result.same_as(false_value));
                 *info = f_info;
             }
         }
diff --git a/src/Simplify_Sub.cpp b/src/Simplify_Sub.cpp
index 29bd02c78ed6..a6fcc9e675ce 100644
--- a/src/Simplify_Sub.cpp
+++ b/src/Simplify_Sub.cpp
@@ -22,7 +22,15 @@ Expr Simplify::visit(const Sub *op, ExprInfo *info) {
 
     if (rewrite(IRMatcher::Overflow() - x, a) ||
         rewrite(x - IRMatcher::Overflow(), b) ||
-        rewrite(x - 0, x)) {
+        rewrite(x - 0, a)) {
+        if (info) {
+            if (rewrite.result.same_as(a)) {
+                info->intersect(a_info);
+            } else {
+                internal_assert(rewrite.result.same_as(b));
+                info->intersect(b_info);
+            }
+        }
         return rewrite.result;
     }
 
diff --git a/test/fuzz/simplify.cpp b/test/fuzz/simplify.cpp
index 96fa84498f70..089ff124c8bd 100644
--- a/test/fuzz/simplify.cpp
+++ b/test/fuzz/simplify.cpp
@@ -19,17 +19,31 @@ bool test_simplification(Expr a, Expr b, const map<string, Expr> &vars) {
         return false;
     }
     if (Expr sb = simplify(b); !equal(b, sb)) {
-        std::cerr << "Idempotency failure!\n    " << a << "\n -> " << b << "\n -> " << sb << "\n";
-        // These are broken out below to make it easier to parse any logging
-        // added to the simplifier to debug the failure.
-        std::cerr << "---------------------------------\n"
-                  << "Begin simplification of original:\n"
-                  << simplify(a) << "\n";
-        std::cerr << "---------------------------------\n"
-                  << "Begin resimplification of result:\n"
-                  << simplify(b) << "\n"
-                  << "---------------------------------\n";
-
+        // Test all sub-expressions in pre-order traversal to minimize
+        bool found_failure = false;
+        mutate_with(a, [&](auto *self, const Expr &e) {
+            self->mutate_base(e);
+            Expr s = simplify(e);
+            Expr ss = simplify(s);
+            if (!found_failure && !equal(s, ss)) {
+                std::cerr << "Idempotency failure\n    "
+                          << e << "\n -> "
+                          << s << "\n -> "
+                          << ss << "\n";
+                // These are broken out below to make it easier to parse any logging
+                // added to the simplifier to debug the failure.
+                std::cerr << "---------------------------------\n"
+                          << "Begin simplification of original:\n"
+                          << simplify(e) << "\n";
+                std::cerr << "---------------------------------\n"
+                          << "Begin resimplification of result:\n"
+                          << simplify(s) << "\n"
+                          << "---------------------------------\n";
+
+                found_failure = true;
+            }
+            return e;
+        });
         return false;
     }
 
@@ -116,8 +130,6 @@ FUZZ_TEST(simplify, FuzzingContext &fuzz) {
             Param<int>("a3"),
             Param<int>("a4"),
         }};
-    // FIXME: UInt64 fails!
-    reg.fuzz_types = {UInt(1), UInt(8), UInt(16), UInt(32), Int(8), Int(16), Int(32)};
     // FIXME: These need to be disabled (otherwise crashes and/or failures):
     // reg.gen_ramp_of_vector = false;
     // reg.gen_broadcast_of_vector = false;
@@ -133,7 +145,7 @@ FUZZ_TEST(simplify, FuzzingContext &fuzz) {
         std::cerr << "Testing subexpressions...\n";
         bool found_failure = false;
         test = mutate_with(test, [&](auto *self, const Expr &e) {
-            self->mutate(e);
+            self->mutate_base(e);
             if (e.type().bits() && !found_failure) {
                 for (int i = 1; i < 4 && !found_failure; i++) {
                     Expr limited = simplify_at_depth(i, e);

From 6135d9ee74e18d80a8ff6d1a1a49ee7a2726246b Mon Sep 17 00:00:00 2001
From: Andrew Adams <andrew.b.adams@gmail.com>
Date: Tue, 17 Mar 2026 14:21:30 -0700
Subject: [PATCH 11/12] Infer more aggressive bounds in UIntImm visitor

---
 src/Simplify_Exprs.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Simplify_Exprs.cpp b/src/Simplify_Exprs.cpp
index 8a29a1de6ad4..8fb7554ec84b 100644
--- a/src/Simplify_Exprs.cpp
+++ b/src/Simplify_Exprs.cpp
@@ -24,8 +24,12 @@ Expr Simplify::visit(const UIntImm *op, ExprInfo *info) {
         int64_t v = (int64_t)(op->value);
         info->bounds = ConstantInterval::single_point(v);
         info->alignment = ModulusRemainder(0, v);
-        // If it's not representable as an int64, this will wrap it appropriately:
+        // If it's not representable as an int64, this will wrap the alignment appropriately:
         info->cast_to(op->type);
+        // Be as informative as we can with bounds for out-of-range uint64s
+        if ((int64_t)op->value < 0) {
+            info->bounds = ConstantInterval::bounded_below(INT64_MAX);
+        }
     } else {
         clear_expr_info(info);
     }

From e9cba46017ca7f02de57f937e8c4ecc6feafe957 Mon Sep 17 00:00:00 2001
From: Alex Reinking <areinking@adobe.com>
Date: Tue, 17 Mar 2026 17:42:48 -0400
Subject: [PATCH 12/12] Another mutate ~> mutate_base fix

---
 test/fuzz/simplify.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/fuzz/simplify.cpp b/test/fuzz/simplify.cpp
index 089ff124c8bd..061f2641fa2d 100644
--- a/test/fuzz/simplify.cpp
+++ b/test/fuzz/simplify.cpp
@@ -105,7 +105,7 @@ Expr simplify_at_depth(int limit, const Expr &in) {
             return simplify(e);
         }
         limit--;
-        Expr new_e = self->mutate(e);
+        Expr new_e = self->mutate_base(e);
         limit++;
         return new_e;
     });