From 3d73387d1f567c4f756561dce43fb03afe47ee31 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Thu, 20 Jul 2023 10:20:06 -0400 Subject: [PATCH 1/7] Modify Issue 418 test to include validation --- test/test_dynamic_transform.cpp | 48 ++++++++++++++------------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/test/test_dynamic_transform.cpp b/test/test_dynamic_transform.cpp index 7a8a8b4d9f0..074a0b8d1ef 100644 --- a/test/test_dynamic_transform.cpp +++ b/test/test_dynamic_transform.cpp @@ -1068,7 +1068,7 @@ TEST_F(NVFuserTest, FusionDynamicEmptyCat2_CUDA) { } // Repro of https://github.com/NVIDIA/Fuser/issues/418 -TEST_F(NVFuserTest, DynamicTransformIssue418Concretization_CUDA) { +TEST_F(NVFuserTest, DynamicTransformIssue418_CUDA) { auto fusion = std::make_unique(); FusionGuard fg(fusion.get()); @@ -1077,39 +1077,31 @@ TEST_F(NVFuserTest, DynamicTransformIssue418Concretization_CUDA) { auto s0 = IrBuilder::create(DataType::Int); fusion->addInput(s0); - auto v00 = tv0->axis(0)->extent(); - auto v01 = tv0->axis(1)->extent(); - auto v02 = tv0->axis(2)->extent(); - auto v03 = tv0->axis(3)->extent(); - - auto tv1 = reshape(tv0, {v00, div(v01, s0), s0, v02, v03}); + auto sh = tensor_sizes(tv0); + auto tv1 = reshape(tv0, {sh[0], div(sh[1], s0), s0, sh[2], sh[3]}); + // Reducing along axis 2 in tv1 is equivalent to a partial reduction across + // axis 1 of tv0. auto vm = variance_mean(tv1, {2, 3, 4}, 0, true); fusion->addOutput(vm.mean); fusion->addOutput(vm.var); - { - ExpressionEvaluator expr_eval; - - expr_eval.bind(tv0->axis(0)->extent(), 256L); - expr_eval.bind(tv0->axis(1)->extent(), 128L); - expr_eval.bind(tv0->axis(2)->extent(), 28L); - expr_eval.bind(tv0->axis(3)->extent(), 28L); - expr_eval.bind(s0, 4L); - - auto initial_info = DynamicTransform::getInitialInfo(fusion.get()); - auto info = DynamicTransformConcretizationInfo(&initial_info, &expr_eval); - - TORCH_CHECK( - info.getReshapeTransforms().size() == 1, - "Expected to have one reshape transform: ", - info.toString()); + FusionExecutorCache fusion_executor_cache(std::move(fusion)); - DynamicTransform::concretizeFusion(fusion.get(), &info); + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); + at::Tensor at0 = at::randn({256, 128, 28, 28}, options); + std::vector aten_inputs = {at0, 32}; + auto outputs = fusion_executor_cache.runFusionWithInputs(aten_inputs); + auto at1 = at0.reshape({256, 4, 32, 28, 28}); + auto atmean = at1.mean({2, 3, 4}, /*keepdim*/ true); + auto atvar = at1.var({2, 3, 4}, /*unbiased*/ true, /*keepdim*/ true); - TORCH_CHECK( - !fusion->hasDynamicTransform(), - "Expected to have no dynamic transform"); - } + testValidate( + fusion_executor_cache.fusion(), + outputs, + aten_inputs, + {atmean, atvar}, + __LINE__, + __FILE__); } TEST_F(NVFuserTest, Issue249_CUDA) { From a207baecb1f8cb3e973f4b9a3275982b114f7edd Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Thu, 20 Jul 2023 10:20:35 -0400 Subject: [PATCH 2/7] Propagate reshape extents at concretization --- csrc/dynamic_transform.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/csrc/dynamic_transform.cpp b/csrc/dynamic_transform.cpp index f9963b059f4..30c0835ce7e 100644 --- a/csrc/dynamic_transform.cpp +++ b/csrc/dynamic_transform.cpp @@ -497,6 +497,22 @@ void DynamicTransformConcretizer::concretizeReshape() { // replacement is valid checkConcretizedUses(incomplete_out_tv, concrete_reshape_out_tv); + // Extent expressions often change when concretizing a reshape. Here we + // replace these in all downstream expressions so that the Fusion looks just + // like it would have if we had used a static reshape instead. + auto old_rfactor = incomplete_out_tv->getMaybeRFactorDomain(); + auto new_rfactor = concrete_reshape_out_tv->getMaybeRFactorDomain(); + TORCH_INTERNAL_ASSERT( + old_rfactor.size() == new_rfactor.size(), + "Concretized reshape rfactor size does not match symbolic rfactor"); + for (auto idx : c10::irange(new_rfactor.size())) { + auto old_extent = old_rfactor.at(idx)->extent(); + auto new_extent = new_rfactor.at(idx)->extent(); + if (!new_extent->sameAs(old_extent)) { + registerConcretization(old_extent, new_extent); + } + } + // Replace the old tensor with the new concretized tensor auto uses = incomplete_out_tv->uses(); for (auto use_of_old_tv : uses) { From a70be816abbe79fd7a61e45d6bfdb21b24950bda Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Thu, 20 Jul 2023 10:21:15 -0400 Subject: [PATCH 3/7] Move TensorView::convertRfactorToRootDomain to fusion_segmenter.cpp This allows us to just do one traversal for the whole segment to replace vals instead of one traversal per input TV. --- csrc/fusion_segmenter.cpp | 62 ++++++++++++++++++++++++++++++++++++-- csrc/ir/interface_nodes.h | 7 ----- csrc/tensor_view.cpp | 63 --------------------------------------- 3 files changed, 59 insertions(+), 73 deletions(-) diff --git a/csrc/fusion_segmenter.cpp b/csrc/fusion_segmenter.cpp index ccb3e95c083..010a20da972 100644 --- a/csrc/fusion_segmenter.cpp +++ b/csrc/fusion_segmenter.cpp @@ -1437,6 +1437,62 @@ std::string toString(const SegmentedFusion* segmented_fusion) { return ss.str(); } +//! Sets the rfactor as root and erases rfactor of all inputs in fusion. Any +//! non-constant expressions in those extents are replaced by new scalars with +//! no definition. These mutations are performed throughout the Fusion so that +//! downstream expressions dependent on the original inputs' rfactor extents can +//! be computed properly. +void convertInputRfactorsToRoots(Fusion* fusion) { + FusionGuard fg(fusion); + + // Holds all Val replacements across all inputs + std::unordered_map replacement_map; + + for (auto tv : ir_utils::filterByType(fusion->inputs())) { + // Create a new root domain and replacement TensorDomain. + // Given an rfactor domain, create a new IterDomain. + // Otherwise, clone the previous IterDomain + std::vector new_root_domain; + auto rfactor = tv->getMaybeRFactorDomain(); + new_root_domain.reserve(rfactor.size()); + + // Does the domain (root / rfactor) contain all concrete sized extents? + bool tv_is_concrete = true; + for (auto id : rfactor) { + if (!id->extent()->isConstScalar()) { + tv_is_concrete = false; + break; + } + } + + for (const auto& id : rfactor) { + if (id->isRFactorProduct()) { + // Create new symbolic extents for rfactor iterDomains + auto domain_extent = (!tv_is_concrete) + ? IrBuilder::create(DataType::Int) + : id->extent(); + replacement_map.emplace(id->extent(), domain_extent); + new_root_domain.push_back(IterDomainBuilder(id) + .extent(domain_extent) + .resetSchedulingParams() + .build()); + } else { + new_root_domain.push_back(id->cloneWithoutRFactor()); + } + } + + TORCH_INTERNAL_ASSERT( + new_root_domain.size() == tv->domain()->contiguity().size()); + auto new_td = IrBuilder::create( + new_root_domain, tv->domain()->contiguity()); + replacement_map.emplace(tv->domain(), new_td); + } + + // This will replace the values in the mapping replacement_map throughout the + // Fusion + ir_utils::replaceValue(fusion, replacement_map); +} + std::unique_ptr SegmentedFusion::makeFusion(SegmentedGroup* sg) { std::unique_ptr fusion_segment = std::make_unique(); @@ -1469,9 +1525,9 @@ std::unique_ptr SegmentedFusion::makeFusion(SegmentedGroup* sg) { fusion_segment->addOutput(complete_to_segment_map.clone(out)); } - for (auto tv : view_tvs) { - tv->convertRfactorToRootDomain(); - } + // Replace all vals that are rfactor extents in fusion_segment->inputs() with + // new Vals so that they can be bound to the segment inputs. + convertInputRfactorsToRoots(fusion_segment.get()); return fusion_segment; } diff --git a/csrc/ir/interface_nodes.h b/csrc/ir/interface_nodes.h index 01a2b45837c..7da5eb10b24 100644 --- a/csrc/ir/interface_nodes.h +++ b/csrc/ir/interface_nodes.h @@ -123,13 +123,6 @@ class TORCH_CUDA_CU_API TensorView : public Val { return domain_; } - //! This is for a TensorView with an rFactor domain that is an input to a - //! fusion segment. We convert the rfactor domain into a new root domain. - //! Any dynamic-sized rfactor iterDomains are given a new symbolic extent. - //! Concrete integer extents are kept. Output TensorViews of any subsequent - //! expressions that use this TensorView are also updated. - void convertRfactorToRootDomain(); - void setContiguity(const std::vector>& contig) { domain()->setContiguity(contig); } diff --git a/csrc/tensor_view.cpp b/csrc/tensor_view.cpp index 741f9a4a8a0..5ac3800e1b7 100644 --- a/csrc/tensor_view.cpp +++ b/csrc/tensor_view.cpp @@ -240,69 +240,6 @@ std::string TensorView::toInlineString(int indent_size) const { return toString(indent_size); } -void TensorView::convertRfactorToRootDomain() { - // For a given TensorView, does its domain (root / rfactor) contain any - // concrete sized extents? - auto is_concrete_tensor = [](TensorView* tv) { - for (auto id : tv->getMaybeRFactorDomain()) { - if (!id->extent()->isConstScalar()) { - return false; - } - } - return true; - }; - - // Create a new root domain and replacement TensorDomain. - // Given an rfactor domain, create a new IterDomain. - // Otherwise, clone the previous IterDomain - auto createReplacementDomain = - [this](const std::vector& replacement_extents) { - TORCH_INTERNAL_ASSERT( - !replacement_extents.empty() && - getMaybeRFactorDomain().size() == replacement_extents.size()); - size_t idx = 0; - std::vector new_root_domain( - getMaybeRFactorDomain().size()); - for (const auto& id : getMaybeRFactorDomain()) { - if (replacement_extents[idx] != nullptr) { - new_root_domain[idx] = IterDomainBuilder(id) - .extent(replacement_extents[idx]) - .resetSchedulingParams() - .build(); - ++idx; - } else { - TORCH_INTERNAL_ASSERT(!id->isRFactorProduct()); - new_root_domain[idx++] = id->cloneWithoutRFactor(); - } - } - - TORCH_INTERNAL_ASSERT( - new_root_domain.size() == domain()->contiguity().size()); - setDomain(IrBuilder::create( - container(), new_root_domain, domain()->contiguity())); - }; - - std::vector rfactor_extents; - std::unordered_map replacement_map; - const auto kThisIsConcreteTensor = is_concrete_tensor(this); - for (const auto& id : getMaybeRFactorDomain()) { - if (id->isRFactorProduct()) { - // Create new symbolic extents for rfactor iterDomains - auto domain_extent = (!kThisIsConcreteTensor) - ? IrBuilder::create(container(), DataType::Int) - : id->extent(); - rfactor_extents.push_back(domain_extent); - replacement_map.emplace(id->extent(), domain_extent); - } else { - rfactor_extents.push_back(nullptr); - } - } - createReplacementDomain(rfactor_extents); - - // Propagate new extent throughout fusion using ValReplacementMutator - ir_utils::replaceValue(fusion(), replacement_map); -} - TensorView::TensorView(const TensorView* src, IrCloner* ir_cloner) : Val(src, ir_cloner), domain_(ir_cloner->clone(src->domain_)), From 98066d270b999f59b4fa98d0651f9d733930fbc7 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Thu, 20 Jul 2023 12:29:59 -0400 Subject: [PATCH 4/7] Only propagate derived extents. Traverse members+attrs --- csrc/dynamic_transform.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/csrc/dynamic_transform.cpp b/csrc/dynamic_transform.cpp index 30c0835ce7e..c82520e8a11 100644 --- a/csrc/dynamic_transform.cpp +++ b/csrc/dynamic_transform.cpp @@ -454,8 +454,8 @@ void DynamicTransformConcretizer::concretize() { // Finally, propagate concretized domains auto all_stmts = StmtSort::getStmts( info_->fusion(), - /*traverse_members*/ false, - /*traverse_attributes*/ false, + /*traverse_members*/ true, + /*traverse_attributes*/ true, /*traverse_siblings*/ true); for (auto tv : ir_utils::filterByType(all_stmts)) { mutate(tv); @@ -508,7 +508,10 @@ void DynamicTransformConcretizer::concretizeReshape() { for (auto idx : c10::irange(new_rfactor.size())) { auto old_extent = old_rfactor.at(idx)->extent(); auto new_extent = new_rfactor.at(idx)->extent(); - if (!new_extent->sameAs(old_extent)) { + // If the old extent did not have a definition, we don't need to replace + // it, since it will get bound whenever this tensor is a segmentation + // edge. + if (old_extent->definition() && !new_extent->sameAs(old_extent)) { registerConcretization(old_extent, new_extent); } } From deeabf64f9a153f41213043435a71ade3e33aa8f Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Thu, 20 Jul 2023 12:30:30 -0400 Subject: [PATCH 5/7] Update comment about dynamic -1 in ReshapeReductionShmoo --- test/test_dynamic_transform.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/test_dynamic_transform.cpp b/test/test_dynamic_transform.cpp index 074a0b8d1ef..fcd3b59f9b9 100644 --- a/test/test_dynamic_transform.cpp +++ b/test/test_dynamic_transform.cpp @@ -849,7 +849,9 @@ TEST_F(NVFuserTest, FusionDynamicReshapeReductionShmoo_CUDA) { {{8, 3 * 5, 7, 9}, {8, 3, 5 * 7, 9}, false}, // merge(1) osplit(1, 3) // test passing -1 dynamically for dimension size - // This currently fails. see https://github.com/NVIDIA/Fuser/issues/249 + // This is unsupported. See https://github.com/NVIDIA/Fuser/issues/249 + // Values of -1 must be passed as constants instead of input-dependent + // scalars. //{{8, 3 * 5, 7, 9}, {8, 3, -1, 9}, false} // merge(1) osplit(1, 3) }; reductionDynamicViewAddFusion( From 5996e7a3033124ad5acd737dedc728d7498a6da1 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Fri, 21 Jul 2023 13:23:17 -0400 Subject: [PATCH 6/7] Clean up and explain condition for replacing rfactor IDs --- csrc/fusion_segmenter.cpp | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/csrc/fusion_segmenter.cpp b/csrc/fusion_segmenter.cpp index 010a20da972..a7ba0bfb750 100644 --- a/csrc/fusion_segmenter.cpp +++ b/csrc/fusion_segmenter.cpp @@ -1456,21 +1456,18 @@ void convertInputRfactorsToRoots(Fusion* fusion) { auto rfactor = tv->getMaybeRFactorDomain(); new_root_domain.reserve(rfactor.size()); - // Does the domain (root / rfactor) contain all concrete sized extents? - bool tv_is_concrete = true; - for (auto id : rfactor) { - if (!id->extent()->isConstScalar()) { - tv_is_concrete = false; - break; - } - } - for (const auto& id : rfactor) { - if (id->isRFactorProduct()) { + // We replace any IterDomain that is an rfactor product since we must cut + // any connections between root and rfactor. Additionally, we replace any + // IterDomain whose extent is symbolic and is derived from other scalars; + // this ensures that we can bind values to any non-constant extents in + // this tensor. + auto domain_extent = !id->definition() || id->extent()->isConstScalar() + ? id->extent() + : IrBuilder::create(DataType::Int); + + if (id->isRFactorProduct() || domain_extent != id->extent()) { // Create new symbolic extents for rfactor iterDomains - auto domain_extent = (!tv_is_concrete) - ? IrBuilder::create(DataType::Int) - : id->extent(); replacement_map.emplace(id->extent(), domain_extent); new_root_domain.push_back(IterDomainBuilder(id) .extent(domain_extent) From d05d1e7306e464c237e2562b82de844405cdf738 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Fri, 21 Jul 2023 13:26:22 -0400 Subject: [PATCH 7/7] Revert "Clean up and explain condition for replacing rfactor IDs" This reverts commit 5996e7a3033124ad5acd737dedc728d7498a6da1. --- csrc/fusion_segmenter.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/csrc/fusion_segmenter.cpp b/csrc/fusion_segmenter.cpp index a7ba0bfb750..010a20da972 100644 --- a/csrc/fusion_segmenter.cpp +++ b/csrc/fusion_segmenter.cpp @@ -1456,18 +1456,21 @@ void convertInputRfactorsToRoots(Fusion* fusion) { auto rfactor = tv->getMaybeRFactorDomain(); new_root_domain.reserve(rfactor.size()); + // Does the domain (root / rfactor) contain all concrete sized extents? + bool tv_is_concrete = true; + for (auto id : rfactor) { + if (!id->extent()->isConstScalar()) { + tv_is_concrete = false; + break; + } + } + for (const auto& id : rfactor) { - // We replace any IterDomain that is an rfactor product since we must cut - // any connections between root and rfactor. Additionally, we replace any - // IterDomain whose extent is symbolic and is derived from other scalars; - // this ensures that we can bind values to any non-constant extents in - // this tensor. - auto domain_extent = !id->definition() || id->extent()->isConstScalar() - ? id->extent() - : IrBuilder::create(DataType::Int); - - if (id->isRFactorProduct() || domain_extent != id->extent()) { + if (id->isRFactorProduct()) { // Create new symbolic extents for rfactor iterDomains + auto domain_extent = (!tv_is_concrete) + ? IrBuilder::create(DataType::Int) + : id->extent(); replacement_map.emplace(id->extent(), domain_extent); new_root_domain.push_back(IterDomainBuilder(id) .extent(domain_extent)