diff --git a/csrc/dynamic_transform.cpp b/csrc/dynamic_transform.cpp index 27f91a66c68..6889e35ebf5 100644 --- a/csrc/dynamic_transform.cpp +++ b/csrc/dynamic_transform.cpp @@ -265,7 +265,6 @@ void DynamicTransformConcretizationInfo::analyzeReshapes( // Determine output shape using expr evaluator. Note there may be // one domain of extent -1 std::vector out_shape(out_dom.size(), 0); - bool extent_m1_found = false; for (const auto i : c10::irange(out_dom.size())) { auto out_id = out_dom.at(i); auto extent_val = expr_eval->evaluate(out_id->extent()); @@ -277,16 +276,12 @@ void DynamicTransformConcretizationInfo::analyzeReshapes( extent_val.is(), "Invalid evaluated value of domain extent: ", out_id->toString()); - const auto extent_int = extent_val.as(); + auto extent_int = extent_val.as(); if (extent_int == -1) { - TORCH_INTERNAL_ASSERT( - !extent_m1_found, - "Multiple output domains of size -1 not allowed", - out_tv->toString()); - extent_m1_found = true; - } else { - TORCH_INTERNAL_ASSERT( - extent_int > 0, "Invalid output domain extent: ", extent_int); + // For non-constant Scalar sizes, check that we have not passed -1. + TORCH_CHECK( + out_id->extent()->isConst(), + "Values of -1 passed to reshape must be constant at definition.") } out_shape.at(i) = extent_int; } diff --git a/csrc/ops/alias.cpp b/csrc/ops/alias.cpp index 91aef003b88..a7028be5f75 100644 --- a/csrc/ops/alias.cpp +++ b/csrc/ops/alias.cpp @@ -5,6 +5,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ // clang-format on +#include #include #include #include @@ -121,12 +122,38 @@ TensorView* reshape(TensorView* inp_tv, const std::vector& new_sizes) { // Create placeholder rfactor domain. Note it's not connected with the root // domain. std::vector rfactor_domain(new_sizes.size(), nullptr); + bool found_neg_one = false; for (const auto i : c10::irange(new_sizes.size())) { - auto rf_id = IterDomainBuilder( - FusionGuard::getCurFusion()->zeroVal(), new_sizes.at(i)) - .iter_type(IterType::Symbolic) - .is_rfactor_domain(true) - .build(); + auto new_size = new_sizes.at(i); + if (new_size->isConstScalar() && new_size->evaluateInt() == -1) { + // It is usually safe to use the provided scalars as the output shapes. + // However, if -1 is provided for some position, it will not correspond to + // the actual extent in that position. + + TORCH_CHECK( + !found_neg_one, + "A maximum of one value of -1 can be provided to reshape."); + found_neg_one = true; + + Val* numel = FusionGuard::getCurFusion()->oneVal(); + Val* other_new_numel = FusionGuard::getCurFusion()->oneVal(); + for (const auto j : c10::irange(inp_dom.size())) { + numel = mul(numel, inp_dom.at(j)->extent()); + } + for (const auto j : c10::irange(new_sizes.size())) { + if (i == j) { + continue; + } + other_new_numel = mul(other_new_numel, new_sizes.at(j)); + } + new_size = div(numel, other_new_numel); + new_size = simplifyExpr(new_size); + } + auto rf_id = + IterDomainBuilder(FusionGuard::getCurFusion()->zeroVal(), new_size) + .iter_type(IterType::Symbolic) + .is_rfactor_domain(true) + .build(); rfactor_domain.at(i) = rf_id; } diff --git a/test/test_dynamic_transform.cpp b/test/test_dynamic_transform.cpp index 7530fba589e..6bade708196 100644 --- a/test/test_dynamic_transform.cpp +++ b/test/test_dynamic_transform.cpp @@ -1070,4 +1070,85 @@ TEST_F(NVFuserTest, FusionDynamicEmptyCat2_CUDA) { EXPECT_EQ(output_def->input(0), seg_fusion->inputs()[0]); } +TEST_F(NVFuserTest, Issue249_CUDA) { + std::unique_ptr fusion_ptr = std::make_unique(); + Fusion& fusion = *fusion_ptr.get(); + FusionGuard fg(&fusion); + + TensorView* tv0 = makeSymbolicTensor(4); + fusion.addInput(tv0); + + auto tv1 = add(tv0, tv0); + auto tv2 = reshape( + tv1, + {tv1->axis(0)->extent(), + tv1->axis(2)->extent(), + IrBuilder::create(-1)}); + auto tv3 = add(tv2, tv2); + fusion.addOutput(tv3); + + FusionExecutorCache fusion_executor_cache(std::move(fusion_ptr)); + + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); + at::Tensor at_x = at::randn({2, 3, 4, 5}, options); + auto at_y = (at_x + at_x).reshape({2, 4, -1}); + auto at_z = at_y + at_y; + + auto outputs = fusion_executor_cache.runFusionWithInputs({at_x}); + + testValidate( + fusion_executor_cache.fusion(), + outputs, + {at_x}, + {at_z}, + __LINE__, + __FILE__); +} + +// This is just like the test above, but uses an input scalar with value -1 +TEST_F(NVFuserTest, Issue249InputNegative1_CUDA) { + std::unique_ptr fusion_ptr = std::make_unique(); + Fusion& fusion = *fusion_ptr.get(); + FusionGuard fg(&fusion); + + TensorView* tv0 = makeSymbolicTensor(4); + fusion.addInput(tv0); + + auto s0 = IrBuilder::create(DataType::Int); + auto s1 = IrBuilder::create(DataType::Int); + auto s2 = IrBuilder::create(DataType::Int); + fusion.addInput(s0); + fusion.addInput(s1); + fusion.addInput(s2); + + auto tv1 = add(tv0, tv0); + auto tv2 = reshape(tv1, {s0, s1, s2}); + auto tv3 = add(tv2, tv2); + fusion.addOutput(tv3); + + FusionExecutorCache fusion_executor_cache(std::move(fusion_ptr)); + + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); + at::Tensor at_x = at::randn({2, 3, 4, 5}, options); + auto at_y = (at_x + at_x).reshape({2, 4, -1}); + auto at_z = at_y + at_y; + + // Dynamic reshape sizes that are not constant at definition must be explicit: + // no -1 allowed + EXPECT_THROW( + fusion_executor_cache.runFusionWithInputs({at_x, 2, 4, -1}), + std::exception); + + // Passing explicit sizes works fine + auto outputs = fusion_executor_cache.runFusionWithInputs({at_x, 2, 4, 15}); + + testValidate( + fusion_executor_cache.fusion(), + outputs, + {at_x, 2, 4, 15}, + {at_z}, + __LINE__, + __FILE__); +} + } // namespace nvfuser