From 2705785442a1e9ecf93f91902da548136fba8336 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Mon, 19 May 2025 14:03:56 -0400 Subject: [PATCH 1/6] Use proper extent expression for dynamic reshape extents Fixes #4476 --- csrc/ops/alias.cpp | 40 +++++++++++++++++++++++------------- tests/cpp/test_evaluator.cpp | 24 ++++++++++++++++++++++ 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/csrc/ops/alias.cpp b/csrc/ops/alias.cpp index cdc80329206..46dbf271615 100644 --- a/csrc/ops/alias.cpp +++ b/csrc/ops/alias.cpp @@ -150,6 +150,25 @@ TensorView* reshape(TensorView* inp_tv, const std::vector& new_sizes) { // domain. std::vector logical_domain(new_sizes.size(), nullptr); bool found_neg_one = false; + + Val* numel = FusionGuard::getCurFusion()->oneVal(); + for (const auto j : arange(inp_dom.size())) { + numel = SimplifyingIrBuilder::mulExpr(numel, inp_dom.at(j)->extent()); + } + + const auto neg_one_size = [&numel, &new_sizes](size_t pos) { + Val* other_new_numel = FusionGuard::getCurFusion()->oneVal(); + for (const auto j : arange(new_sizes.size())) { + if (pos == j) { + continue; + } + other_new_numel = + SimplifyingIrBuilder::mulExpr(other_new_numel, new_sizes.at(j)); + } + Val* new_size = SimplifyingIrBuilder::divExpr(numel, other_new_numel); + return simplifyExpr(new_size); + }; + for (const auto i : arange(new_sizes.size())) { auto new_size = new_sizes.at(i); if (new_size->isConstScalar() && new_size->evaluate().as() == -1) { @@ -162,20 +181,13 @@ TensorView* reshape(TensorView* inp_tv, const std::vector& new_sizes) { "A maximum of one value of -1 can be provided to reshape."); found_neg_one = true; - Val* numel = FusionGuard::getCurFusion()->oneVal(); - Val* other_new_numel = FusionGuard::getCurFusion()->oneVal(); - for (const auto j : arange(inp_dom.size())) { - numel = SimplifyingIrBuilder::mulExpr(numel, inp_dom.at(j)->extent()); - } - for (const auto j : arange(new_sizes.size())) { - if (i == j) { - continue; - } - other_new_numel = - SimplifyingIrBuilder::mulExpr(other_new_numel, new_sizes.at(j)); - } - new_size = SimplifyingIrBuilder::divExpr(numel, other_new_numel); - new_size = simplifyExpr(new_size); + new_size = neg_one_size(i); + } else if (!new_size->isConstScalar()) { + // Dynamic size could be -1. Here we build a correct shape expression + new_size = where( + eq(new_size, IrBuilder::create(-1L, DataType::Index)), + neg_one_size(i), + new_size); } new_size = SimplifyingIrBuilder::maybeCastExpr(DataType::Index, new_size); auto rf_id = diff --git a/tests/cpp/test_evaluator.cpp b/tests/cpp/test_evaluator.cpp index a29d22a3e19..75c4e1b005e 100644 --- a/tests/cpp/test_evaluator.cpp +++ b/tests/cpp/test_evaluator.cpp @@ -825,4 +825,28 @@ TEST_F(ExprEvalTest, NamedScalar) { EXPECT_EQ(cache_id_pvalue.as(), kCacheIdValue); } +TEST_F(ExprEvalTest, View_FlattenToMinusOneAsInput) { + Fusion fusion; + FusionGuard fg(&fusion); + + TensorView* in = makeSymbolicTensor(2); + auto* out_dim_size = IrBuilder::create(DataType::Int); + TensorView* out = reshape(in, {out_dim_size}); + fusion.addInput(in); + fusion.addInput(out_dim_size); + fusion.addOutput(out); + + fusion.printMath(); + + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA); + at::Tensor in_tensor = at::randn({2, 3}, options); + + ExpressionEvaluator evaluator; + evaluator.bind(in, in_tensor); + evaluator.bind(out_dim_size, -1); + auto out_tensor = evaluator.evaluate(out).as(); + + EXPECT_TRUE(at::allclose(out_tensor, in_tensor.view({-1}))); +} + } // namespace nvfuser From 163d1cb0a712730b80fdaf2c13e470f39d08d7c8 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Thu, 22 May 2025 12:46:06 -0400 Subject: [PATCH 2/6] Compute numel only when necessary --- csrc/ops/alias.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/csrc/ops/alias.cpp b/csrc/ops/alias.cpp index 46dbf271615..1dba5ca1719 100644 --- a/csrc/ops/alias.cpp +++ b/csrc/ops/alias.cpp @@ -151,12 +151,17 @@ TensorView* reshape(TensorView* inp_tv, const std::vector& new_sizes) { std::vector logical_domain(new_sizes.size(), nullptr); bool found_neg_one = false; - Val* numel = FusionGuard::getCurFusion()->oneVal(); - for (const auto j : arange(inp_dom.size())) { - numel = SimplifyingIrBuilder::mulExpr(numel, inp_dom.at(j)->extent()); - } + // We don't compute numel unless we need to, and then we compute it only once + // to encourage hoisting + Val* numel = nullptr; + const auto neg_one_size = [&numel, &inp_dom, &new_sizes](size_t pos) { + if (numel == nullptr) { + numel = FusionGuard::getCurFusion()->oneVal(); + for (const auto j : arange(inp_dom.size())) { + numel = SimplifyingIrBuilder::mulExpr(numel, inp_dom.at(j)->extent()); + } + } - const auto neg_one_size = [&numel, &new_sizes](size_t pos) { Val* other_new_numel = FusionGuard::getCurFusion()->oneVal(); for (const auto j : arange(new_sizes.size())) { if (pos == j) { From 3ad7cb9dfe13200d5637dbf4c9d4ab47d13ff45b Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Thu, 22 May 2025 12:47:17 -0400 Subject: [PATCH 3/6] Fix a couple tests --- tests/cpp/test_dynamic_transform.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/cpp/test_dynamic_transform.cpp b/tests/cpp/test_dynamic_transform.cpp index 090ac43af23..109dc2aab83 100644 --- a/tests/cpp/test_dynamic_transform.cpp +++ b/tests/cpp/test_dynamic_transform.cpp @@ -88,17 +88,19 @@ TEST_F(DynamicTransformTest, DynamicTransform1) { expr_eval.bind(tv0->axis(1)->extent(), 3L); expr_eval.bind(reshape_shape0, 3L); expr_eval.bind(reshape_shape1, -1L); + // We cannot infer the shape of tv1 from the above bound values, since + // either axis of tv2 might be broadcast against one from tv1. + expr_eval.bind(tv1->axis(0)->extent(), 3L); + expr_eval.bind(tv1->axis(1)->extent(), 4L); // This should throw an exception since any reshape size of -1 must be // specified as a definition-time constant, as opposed to an input scalar. - EXPECT_THAT( - [&]() { - auto initial_info = DynamicTransform::getInitialInfo(&fusion); - auto info = - DynamicTransformConcretizationInfo(&initial_info, &expr_eval); - }, - ::testing::ThrowsMessage(::testing::HasSubstr( - "Values of -1 passed to reshape must be constant at definition"))); + auto initial_info = DynamicTransform::getInitialInfo(&fusion); + auto info = DynamicTransformConcretizationInfo(&initial_info, &expr_eval); + NVF_CHECK( + info.getReshapeTransforms().size() == 1, + "Expected to have one reshape transform: ", + info.toString()); } { @@ -1160,10 +1162,8 @@ TEST_F(DynamicTransformTest, Issue249InputNegative1) { auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); at::Tensor at_x = at::randn({2, 3, 4, 5}, options); - // Dynamic reshape sizes that are not constant at definition must be explicit: - // no -1 allowed - EXPECT_THROW( - executor_cache.runFusionWithInputs({at_x, 2, 4, -1}), std::exception); + // Test that running with dynamic -1 works as expected + executor_cache.runFusionWithInputs({at_x, 2, 4, -1}); // Passing explicit sizes works fine auto outputs = executor_cache.runFusionWithInputs({at_x, 2, 4, 15}); From b2c773aa0d634d52e2cbc3aa56c956a81191e871 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Thu, 22 May 2025 13:08:05 -0400 Subject: [PATCH 4/6] Fix division by zero --- csrc/ops/alias.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/csrc/ops/alias.cpp b/csrc/ops/alias.cpp index 1dba5ca1719..b9fe2b69d1f 100644 --- a/csrc/ops/alias.cpp +++ b/csrc/ops/alias.cpp @@ -170,6 +170,14 @@ TensorView* reshape(TensorView* inp_tv, const std::vector& new_sizes) { other_new_numel = SimplifyingIrBuilder::mulExpr(other_new_numel, new_sizes.at(j)); } + // In case numel is 0, other_new_numel might also be 0 and we would hit a + // division by zero. In such cases, using 1 as the denominator will cause + // us to properly compute 0 for this extent. + other_new_numel = SimplifyingIrBuilder::whereExpr( + eq(other_new_numel, FusionGuard::getCurFusion()->zeroVal()), + FusionGuard::getCurFusion()->oneVal(), + other_new_numel); + Val* new_size = SimplifyingIrBuilder::divExpr(numel, other_new_numel); return simplifyExpr(new_size); }; From ee8335c1466f679249b64c21b98b6c16908a86f0 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Thu, 22 May 2025 13:08:17 -0400 Subject: [PATCH 5/6] Fix FusionDynamicReshapeReductionShmoo --- tests/cpp/test_dynamic_transform.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tests/cpp/test_dynamic_transform.cpp b/tests/cpp/test_dynamic_transform.cpp index 109dc2aab83..81d37b8e57c 100644 --- a/tests/cpp/test_dynamic_transform.cpp +++ b/tests/cpp/test_dynamic_transform.cpp @@ -860,17 +860,14 @@ TEST_F(DynamicTransformTest, FusionDynamicReshapeReductionShmoo) { {{8, 3 * 5, 7, 9}, {8, 3, 5 * 7, 9}, false}, // merge(1) osplit(1, 3) // test passing -1 dynamically for dimension size - // This is unsupported. See https://github.com/NVIDIA/Fuser/issues/249 - // Values of -1 must be passed as constants instead of input-dependent - // scalars. - //{{8, 3 * 5, 7, 9}, {8, 3, -1, 9}, false} // merge(1) osplit(1, 3) + {{8, 3 * 5, 7, 9}, {8, 3, -1, 9}, false}, // merge(1) osplit(1, 3) // Empty reshapes should translate to FullOp {{8, 0, 7, 9}, {7, 8, 0, 9}, true}, // symbolic_sizes = [ -1, -1, 0, -1 ] - // In the case below there's now a separate Val introduced for the output - // extent, which is zero. This is represented in - // DynamicTransformConcretizationInfo causing cache miss - {{8, 0, 7, 9}, {7, 8, -1, 9}, true}, // symbolic_sizes = [ -1, -1, 0, -1 ] + // This hits the same conc info as {8, 3 * 5, 7, 9}, {8, 3, 5 * 7, 9} + {{8, 0, 7, 9}, + {7, 8, -1, 9}, + false}, // symbolic_sizes = [ -1, -1, 0, -1 ] {{8, 0, 7, 9}, {7, 8, 0, 0}, true}, // symbolic_sizes = [ -1, -1, 0, 0 ] {{8, 0, 7, 9}, {47, 0, 13, 0}, true}, // symbolic_sizes = [ -1, 0, -1, 0 ] }; From 88166f876675ea9d1d2b22f90babc6f75752a609 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Fri, 23 May 2025 09:19:38 -0400 Subject: [PATCH 6/6] Cast new sizes to Index explicitly --- csrc/ops/alias.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/csrc/ops/alias.cpp b/csrc/ops/alias.cpp index b9fe2b69d1f..6b1ad79a3dd 100644 --- a/csrc/ops/alias.cpp +++ b/csrc/ops/alias.cpp @@ -167,8 +167,10 @@ TensorView* reshape(TensorView* inp_tv, const std::vector& new_sizes) { if (pos == j) { continue; } + Val* new_size = + SimplifyingIrBuilder::maybeCastExpr(DataType::Index, new_sizes.at(j)); other_new_numel = - SimplifyingIrBuilder::mulExpr(other_new_numel, new_sizes.at(j)); + SimplifyingIrBuilder::mulExpr(other_new_numel, new_size); } // In case numel is 0, other_new_numel might also be 0 and we would hit a // division by zero. In such cases, using 1 as the denominator will cause @@ -179,6 +181,7 @@ TensorView* reshape(TensorView* inp_tv, const std::vector& new_sizes) { other_new_numel); Val* new_size = SimplifyingIrBuilder::divExpr(numel, other_new_numel); + NVF_ERROR(new_size->dtype() == DataType::Index); return simplifyExpr(new_size); };