-
Notifications
You must be signed in to change notification settings - Fork 79
Handle constant -1 values in dynamic reshape at definition #590
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b423b7b
e461ffd
f429bc7
32efb85
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ | |
| * SPDX-License-Identifier: BSD-3-Clause | ||
| */ | ||
| // clang-format on | ||
| #include <expr_simplifier.h> | ||
| #include <ir/builder.h> | ||
| #include <ir/utils.h> | ||
| #include <ops/alias.h> | ||
|
|
@@ -121,12 +122,38 @@ TensorView* reshape(TensorView* inp_tv, const std::vector<Val*>& new_sizes) { | |
| // Create placeholder rfactor domain. Note it's not connected with the root | ||
| // domain. | ||
| std::vector<IterDomain*> rfactor_domain(new_sizes.size(), nullptr); | ||
| bool found_neg_one = false; | ||
| for (const auto i : c10::irange(new_sizes.size())) { | ||
| auto rf_id = IterDomainBuilder( | ||
| FusionGuard::getCurFusion()->zeroVal(), new_sizes.at(i)) | ||
| .iter_type(IterType::Symbolic) | ||
| .is_rfactor_domain(true) | ||
| .build(); | ||
| auto new_size = new_sizes.at(i); | ||
| if (new_size->isConstScalar() && new_size->evaluateInt() == -1) { | ||
| // It is usually safe to use the provided scalars as the output shapes. | ||
| // However, if -1 is provided for some position, it will not correspond to | ||
| // the actual extent in that position. | ||
|
|
||
| TORCH_CHECK( | ||
| !found_neg_one, | ||
| "A maximum of one value of -1 can be provided to reshape."); | ||
| found_neg_one = true; | ||
|
|
||
| Val* numel = FusionGuard::getCurFusion()->oneVal(); | ||
| Val* other_new_numel = FusionGuard::getCurFusion()->oneVal(); | ||
| for (const auto j : c10::irange(inp_dom.size())) { | ||
| numel = mul(numel, inp_dom.at(j)->extent()); | ||
| } | ||
|
Comment on lines
+140
to
+142
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we put this in a new method
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe in loewr_utils? I'd prefer minimalistic designs for the IR nodes themselves. |
||
| for (const auto j : c10::irange(new_sizes.size())) { | ||
| if (i == j) { | ||
| continue; | ||
| } | ||
| other_new_numel = mul(other_new_numel, new_sizes.at(j)); | ||
| } | ||
| new_size = div(numel, other_new_numel); | ||
| new_size = simplifyExpr(new_size); | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that |
||
| } | ||
| auto rf_id = | ||
| IterDomainBuilder(FusionGuard::getCurFusion()->zeroVal(), new_size) | ||
| .iter_type(IterType::Symbolic) | ||
| .is_rfactor_domain(true) | ||
| .build(); | ||
| rfactor_domain.at(i) = rf_id; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1070,4 +1070,85 @@ TEST_F(NVFuserTest, FusionDynamicEmptyCat2_CUDA) { | |
| EXPECT_EQ(output_def->input(0), seg_fusion->inputs()[0]); | ||
| } | ||
|
|
||
| TEST_F(NVFuserTest, Issue249_CUDA) { | ||
| std::unique_ptr<Fusion> fusion_ptr = std::make_unique<Fusion>(); | ||
| Fusion& fusion = *fusion_ptr.get(); | ||
| FusionGuard fg(&fusion); | ||
|
|
||
| TensorView* tv0 = makeSymbolicTensor(4); | ||
| fusion.addInput(tv0); | ||
|
|
||
| auto tv1 = add(tv0, tv0); | ||
| auto tv2 = reshape( | ||
| tv1, | ||
| {tv1->axis(0)->extent(), | ||
| tv1->axis(2)->extent(), | ||
| IrBuilder::create<Scalar>(-1)}); | ||
| auto tv3 = add(tv2, tv2); | ||
| fusion.addOutput(tv3); | ||
|
|
||
| FusionExecutorCache fusion_executor_cache(std::move(fusion_ptr)); | ||
|
|
||
| auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); | ||
| at::Tensor at_x = at::randn({2, 3, 4, 5}, options); | ||
| auto at_y = (at_x + at_x).reshape({2, 4, -1}); | ||
| auto at_z = at_y + at_y; | ||
|
|
||
| auto outputs = fusion_executor_cache.runFusionWithInputs({at_x}); | ||
|
|
||
| testValidate( | ||
| fusion_executor_cache.fusion(), | ||
| outputs, | ||
| {at_x}, | ||
| {at_z}, | ||
| __LINE__, | ||
| __FILE__); | ||
| } | ||
|
|
||
| // This is just like the test above, but uses an input scalar with value -1 | ||
| TEST_F(NVFuserTest, Issue249InputNegative1_CUDA) { | ||
| std::unique_ptr<Fusion> fusion_ptr = std::make_unique<Fusion>(); | ||
| Fusion& fusion = *fusion_ptr.get(); | ||
| FusionGuard fg(&fusion); | ||
|
|
||
| TensorView* tv0 = makeSymbolicTensor(4); | ||
| fusion.addInput(tv0); | ||
|
|
||
| auto s0 = IrBuilder::create<Scalar>(DataType::Int); | ||
| auto s1 = IrBuilder::create<Scalar>(DataType::Int); | ||
| auto s2 = IrBuilder::create<Scalar>(DataType::Int); | ||
| fusion.addInput(s0); | ||
| fusion.addInput(s1); | ||
| fusion.addInput(s2); | ||
|
|
||
| auto tv1 = add(tv0, tv0); | ||
| auto tv2 = reshape(tv1, {s0, s1, s2}); | ||
| auto tv3 = add(tv2, tv2); | ||
| fusion.addOutput(tv3); | ||
|
|
||
| FusionExecutorCache fusion_executor_cache(std::move(fusion_ptr)); | ||
|
|
||
| auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); | ||
| at::Tensor at_x = at::randn({2, 3, 4, 5}, options); | ||
| auto at_y = (at_x + at_x).reshape({2, 4, -1}); | ||
| auto at_z = at_y + at_y; | ||
|
|
||
| // Dynamic reshape sizes that are not constant at definition must be explicit: | ||
| // no -1 allowed | ||
| EXPECT_THROW( | ||
| fusion_executor_cache.runFusionWithInputs({at_x, 2, 4, -1}), | ||
| std::exception); | ||
|
Comment on lines
+1136
to
+1140
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Input scalars of -1 are not supported for dynamic reshape currently. Supporting this is not impossible, but would increase the complexity quite a bit. At first glance, it seems like we could just replace any |
||
|
|
||
| // Passing explicit sizes works fine | ||
| auto outputs = fusion_executor_cache.runFusionWithInputs({at_x, 2, 4, 15}); | ||
|
|
||
| testValidate( | ||
| fusion_executor_cache.fusion(), | ||
| outputs, | ||
| {at_x, 2, 4, 15}, | ||
| {at_z}, | ||
| __LINE__, | ||
| __FILE__); | ||
| } | ||
|
|
||
| } // namespace nvfuser | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These checks are handled by
reshape()andanalyzeView()and are not needed here.