From b5dafa4d60b12d31aeaded1c8c0ff709585c1873 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Wed, 21 Aug 2024 12:21:57 -0400 Subject: [PATCH 01/17] Add input/expand values from Resize to conc info --- csrc/dynamic_transform.cpp | 71 +++++++++++++++------------- csrc/dynamic_transform.h | 12 +++-- tests/cpp/test_dynamic_transform.cpp | 4 ++ 3 files changed, 51 insertions(+), 36 deletions(-) diff --git a/csrc/dynamic_transform.cpp b/csrc/dynamic_transform.cpp index 726555769a0..765972c841d 100644 --- a/csrc/dynamic_transform.cpp +++ b/csrc/dynamic_transform.cpp @@ -396,27 +396,27 @@ void DynamicTransformConcretizationInfo::analyzeResizes( "Found non-dynamic Resize in initial concretization info: ", op->toString()); - auto extent_val = expr_eval->evaluate(out_id->getMaybeExpandedExtent()); + PolymorphicValue input_extent = + expr_eval->evaluate(op->in()->getMaybeExpandedExtent()); NVF_ERROR( - extent_val.hasValue(), - "Cannot evaluate the extent of a resized domain: ", - out_id->toString()); - NVF_ERROR( - extent_val.is(), - "Invalid evaluated value of resized domain extent: ", - out_id->toString()); - auto extent_int = extent_val.as(); + input_extent.hasValue(), + "Could not compute input extent to dynamic resize ", + op->toString()); + + PolymorphicValue left_expand = expr_eval->evaluate(op->leftExpand()); NVF_ERROR( - extent_int >= 0, - "Invalid resized domain extent ", - extent_int, - " for domain ", - out_id->toString()); + left_expand.hasValue(), + "Could not compute left expand of dynamic resize ", + op->toString()); - auto iter_type = - extent_int == 1 ? IterType::Broadcast : IterType::Iteration; + PolymorphicValue right_expand = expr_eval->evaluate(op->rightExpand()); + NVF_ERROR( + right_expand.hasValue(), + "Could not compute right expand of dynamic resize ", + op->toString()); - resize_itertypes_.emplace_back(id_index, iter_type); + resize_extents_.emplace_back( + id_index, ConcreteResize{input_extent, left_expand, right_expand}); } } @@ -505,7 +505,7 @@ bool DynamicTransformConcretizationInfo::operator==( } if (reshape_transforms_.size() != other.reshape_transforms_.size() || - resize_itertypes_.size() != other.resize_itertypes_.size() || + resize_extents_.size() != other.resize_extents_.size() || empty_extents_.size() != other.empty_extents_.size() || factory_output_itertypes_.size() != other.factory_output_itertypes_.size()) { @@ -520,10 +520,8 @@ bool DynamicTransformConcretizationInfo::operator==( } } - for (const auto i : c10::irange((int64_t)resize_itertypes_.size())) { - const auto& itertype = resize_itertypes_.at(i); - const auto& other_itertype = other.resize_itertypes_.at(i); - if (itertype != other_itertype) { + for (const auto i : c10::irange((int64_t)resize_extents_.size())) { + if (resize_extents_[i] != other.resize_extents_[i]) { return false; } } @@ -577,12 +575,15 @@ std::string DynamicTransformConcretizationInfo::toString() const { } indent(ss, 1) << "Resize:\n"; NVF_ERROR( - resize_itertypes_.size() == + resize_extents_.size() == initial_info_->getDynamicResizedIterDomains().size()); - for (const auto& [id_index, iter_type] : resize_itertypes_) { + for (const auto& [id_index, concrete_resize] : resize_extents_) { + const auto& [input_extent, left_expand, right_expand] = concrete_resize; auto id = initial_info_->getDynamicResizedIterDomains().at(id_index); - indent(ss, 2) << id->toString() << " (index=" << id_index << "), " - << iter_type << "\n"; + indent(ss, 2) << id->toString() << " (index=" << id_index << ")," + << " input_extent=" << input_extent + << " left_expand=" << left_expand + << " right_expand=" << right_expand << "\n"; } indent(ss, 1) << "Expand:\n"; NVF_ERROR( @@ -931,7 +932,13 @@ void DynamicTransformConcretizer::concretizeReshape() { void DynamicTransformConcretizer::concretizeResize() { // Concretize each resize op. - for (const auto& [id_index, iter_type] : info_->getResizeIterTypes()) { + for (const auto& [id_index, concrete_resize] : info_->getResizeExtents()) { + const auto& [input_extent, left_expand, right_expand] = concrete_resize; + + IterType iter_type = input_extent + left_expand + right_expand == 1 + ? IterType::Broadcast + : IterType::Iteration; + auto id = info_->initialInfo()->getDynamicResizedIterDomains().at(id_index); NVF_CHECK( id->definition() && id->definition()->isA(), @@ -1490,8 +1497,11 @@ size_t DynamicTransformConcretizationInfo::hash() const { for (const auto& extent_idx : getEmptyExtents()) { hashCombine(hash, (size_t)extent_idx); } - for (const auto& [id, iter_type] : getResizeIterTypes()) { - hashCombine(hash, (size_t)iter_type); + for (const auto& [id, concrete_resize] : getResizeExtents()) { + const auto& [input_extent, left_expand, right_expand] = concrete_resize; + hashCombine(hash, (size_t)input_extent); + hashCombine(hash, (size_t)left_expand); + hashCombine(hash, (size_t)right_expand); } for (const auto& pair_vec : getFactoryOutputIterTypes()) { for (const auto& [pos, iter_type] : pair_vec) { @@ -1499,9 +1509,6 @@ size_t DynamicTransformConcretizationInfo::hash() const { hashCombine(hash, (size_t)iter_type); } } - for (const auto& [id, iter_type] : getResizeIterTypes()) { - hashCombine(hash, (size_t)iter_type); - } for (const auto& [id, expand_axes] : getExpandAxes()) { hashCombine(hash, (size_t)id); for (bool e : expand_axes) { diff --git a/csrc/dynamic_transform.h b/csrc/dynamic_transform.h index 98376007384..4cf06937994 100644 --- a/csrc/dynamic_transform.h +++ b/csrc/dynamic_transform.h @@ -169,11 +169,15 @@ class DynamicTransformConcretizationInfo { return reshape_transforms_; } + //! Holds input extent, left expansion, right expansion + using ConcreteResize = std::tuple; + //! Return a vector of pairs holding the index of each resized IterDomain in //! the vector returned by initialInfo()->getDynamicResizedIterDomains(), //! along with the IterType it should be concretized to. - const std::vector>& getResizeIterTypes() const { - return resize_itertypes_; + const std::vector>& getResizeExtents() + const { + return resize_extents_; } //! Return a vector of pairs holding the index of each expanded TensorView in @@ -259,8 +263,8 @@ class DynamicTransformConcretizationInfo { //! Holds the index of the resized IterDomain (output of the Resize op) in the //! vector returned by initial_info_->getDynamicResizedIterDomains() along - //! with its concretized IterType - std::vector> resize_itertypes_; + //! with its concretized input extent, left and right expansions + std::vector> resize_extents_; //! Holds the index of the expanded TensorView in the vector returned by //! initial_info_->getDynamicExpandedTensorViews(), and a corresponding vector diff --git a/tests/cpp/test_dynamic_transform.cpp b/tests/cpp/test_dynamic_transform.cpp index 8086d4f5035..7315b4b0941 100644 --- a/tests/cpp/test_dynamic_transform.cpp +++ b/tests/cpp/test_dynamic_transform.cpp @@ -931,6 +931,10 @@ void reductionDynamicPadAddFusion( auto pad_widths = std::get<1>(inv); auto expect_miss = std::get<2>(inv); + // Since we concretize the inputs and expands for each Resize, we always + // expect a cache miss unless we have re-used the exact same values. + expect_miss = true; + NVF_ERROR(input_shape.size() == input_dims); NVF_ERROR(pad_widths.size() == num_pad_widths); From 1cf4e11a1568be3fa849fbc072a1a33b6deebf85 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Thu, 22 Aug 2024 15:31:13 -0400 Subject: [PATCH 02/17] Fix up concretizeResize and mutate(TensorView) --- csrc/dynamic_transform.cpp | 109 ++++++++++++++++++++++++++++++++++--- 1 file changed, 101 insertions(+), 8 deletions(-) diff --git a/csrc/dynamic_transform.cpp b/csrc/dynamic_transform.cpp index 765972c841d..2adbf9c93e2 100644 --- a/csrc/dynamic_transform.cpp +++ b/csrc/dynamic_transform.cpp @@ -666,6 +666,9 @@ class DynamicTransformConcretizer : public OptOutMutator { //! Use this instead of calling registerMutation directly, since it will also //! check that the concretized value is a valid input to all of its uses. void registerConcretization(Val* old_val, Val* new_val) { + if (new_val == old_val) { + return; + } symbolic_to_concretized_map_.emplace(old_val, new_val); checkConcretizedUses(old_val, new_val); NVF_ERROR( @@ -931,7 +934,61 @@ void DynamicTransformConcretizer::concretizeReshape() { } void DynamicTransformConcretizer::concretizeResize() { - // Concretize each resize op. + // First mutate IterDomains so that each Resize producer has constant extent + for (const auto& [id_index, concrete_resize] : info_->getResizeExtents()) { + const auto& [input_extent, left_expand, right_expand] = concrete_resize; + + IterDomain* id = + info_->initialInfo()->getDynamicResizedIterDomains().at(id_index); + + NVF_CHECK( + id->definition() && id->definition()->isA(), + "Resized IterDomain must have a Resize definition"); + IterDomain* orig_in_id = id->definition()->as()->in(); + + IterDomain* in_id = maybeMutated(orig_in_id)->as(); + + bool has_const_extent = in_id->getMaybeExpandedExtent()->isConst(); + + if (has_const_extent && !in_id->isSymbolic()) { + continue; + } + + // Create a new concretized IterDomain to replace the input, if it is not + // already replaced (for example one IterDomain could theoretically be + // input to multiple Resize ops). + IterDomainBuilder builder(in_id); + + if (!has_const_extent) { + Val* new_extent = IrBuilder::create( + input_extent, in_id->getMaybeExpandedExtent()->dtype()); + if (in_id->hasExpandedExtent()) { + builder.expanded_extent(new_extent); + } else { + builder.extent(new_extent); + } + } + + if (in_id->isSymbolic()) { + builder.iter_type( + input_extent + left_expand + right_expand == 1 ? IterType::Broadcast + : IterType::Iteration); + } + + IterDomain* new_in_id = builder.build(); + + ir_utils::replaceValInExprInputs(id->definition(), orig_in_id, new_in_id); + + registerConcretization(orig_in_id, new_in_id); + if (!orig_in_id->getMaybeExpandedExtent()->sameAs( + new_in_id->getMaybeExpandedExtent())) { + registerConcretization( + orig_in_id->getMaybeExpandedExtent(), + new_in_id->getMaybeExpandedExtent()); + } + } + + // Concretize each resize op using constant expand values for (const auto& [id_index, concrete_resize] : info_->getResizeExtents()) { const auto& [input_extent, left_expand, right_expand] = concrete_resize; @@ -939,19 +996,46 @@ void DynamicTransformConcretizer::concretizeResize() { ? IterType::Broadcast : IterType::Iteration; - auto id = info_->initialInfo()->getDynamicResizedIterDomains().at(id_index); + auto orig_id = + info_->initialInfo()->getDynamicResizedIterDomains().at(id_index); + + // id might have been updated in the previous loop + IterDomain* id = maybeMutated(orig_id)->as(); + NVF_CHECK( id->definition() && id->definition()->isA(), "Resized IterDomain must have a Resize definition"); auto def = id->definition()->as(); + + Val* left_expand_val = def->leftExpand(); + if (!def->leftExpand()->isConst()) { + left_expand_val = + IrBuilder::create(left_expand, def->leftExpand()->dtype()); + registerConcretization(def->leftExpand(), left_expand_val); + } + + Val* right_expand_val = def->rightExpand(); + if (!def->rightExpand()->isConst()) { + right_expand_val = + IrBuilder::create(right_expand, def->rightExpand()->dtype()); + registerConcretization(def->rightExpand(), right_expand_val); + } + auto new_id = IterDomain::resize( - def->in(), - def->leftExpand(), - def->rightExpand(), + maybeMutated(def->in())->as(), + left_expand_val, + right_expand_val, id->isRFactorProduct(), iter_type); - registerConcretization(id, new_id); + registerConcretization(orig_id, new_id); + + // Concretize the output shape which is constant + if (!orig_id->extent()->sameAs(new_id->extent())) { + registerConcretization(orig_id->extent(), new_id->extent()); + } + + // concretize scalars for left and right expand } } @@ -1120,8 +1204,14 @@ void DynamicTransformConcretizer::mutate(TensorView* tv) { } } // Update the IterType of each output + bool preserve_def = true; for (auto out_id : ir_utils::filterByType(expr->outputs())) { auto mut_id = maybeMutated(out_id)->as(); + // Preserve the definition, unless there is an output with a + // pre-existing concretization. In that case, we assume that + // concretization was performed purposely and we refuse to redefine it. + preserve_def = preserve_def && mut_id == out_id; + if (!mut_id->isSymbolic()) { // We are only concretizing IterType here, so if we have already // concretized the iter_type for this ID, we can skip this. @@ -1158,8 +1248,11 @@ void DynamicTransformConcretizer::mutate(TensorView* tv) { // unimportant, except that mutate(Expr*) does not return the replacement // Expr*, whereas mutateExprOutputsOnly does. - // Set expr as the definition for concretized outputs - expr = mutateExprOutputsOnly(expr); + if (preserve_def) { + // Set expr as the definition for concretized outputs + expr = mutateExprOutputsOnly(expr); + } + // Replace inputs and attributes that were concretized mutate(expr); } From 825bca16369d08828a5fda8ef58e58c6ecb17d16 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Thu, 22 Aug 2024 15:32:53 -0400 Subject: [PATCH 03/17] Avoid creating immediate cycles in OptOutMutator::mutateExpr --- csrc/mutator.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/csrc/mutator.cpp b/csrc/mutator.cpp index 2e7090373a8..17e0017e89c 100644 --- a/csrc/mutator.cpp +++ b/csrc/mutator.cpp @@ -84,7 +84,8 @@ void OptOutMutator::mutate(Val* s) {} void OptOutMutator::mutate(NamedScalar* ns) {} -void OptOutMutator::mutate(IterDomain* id) { +void OptOutMutator::mutate(IterDomain* orig_id) { + IterDomain* id = maybeMutated(orig_id)->as(); Val* start = maybeMutated(id->start()); Val* extent = maybeMutated(id->extent()); Val* expanded_extent = nullptr; @@ -106,6 +107,7 @@ void OptOutMutator::mutate(IterDomain* id) { .build(); // This guarantees we replace id in all downstream expressions + registerMutation(orig_id, new_id); registerMutation(id, new_id); // Preserve definition if it exists in id. This is important since otherwise @@ -184,13 +186,23 @@ Expr* OptOutMutator::mutateExpr( mutated_outputs.reserve(op->outputs().size()); for (auto output : op->outputs()) { mutated_outputs.emplace_back( - replace_outputs ? maybeMutated(output) : output); + replace_outputs && + std::find(op->inputs().begin(), op->inputs().end(), output) == + op->inputs().end() + ? maybeMutated(output) + : output); } std::vector mutated_inputs; mutated_inputs.reserve(op->inputs().size()); for (auto input : op->inputs()) { - mutated_inputs.emplace_back(replace_inputs ? maybeMutated(input) : input); + mutated_inputs.emplace_back( + replace_inputs && + std::find( + mutated_outputs.begin(), mutated_outputs.end(), input) == + mutated_outputs.end() + ? maybeMutated(input) + : input); } std::vector mutated_attrs; From bd80c714083066086a9ed4741ce6d17839f6060d Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Fri, 23 Aug 2024 05:50:21 -0400 Subject: [PATCH 04/17] Add DisableOption::ConcretizeResizeExtents --- csrc/dynamic_transform.cpp | 28 ++++++++++++++++++++++++++++ csrc/options.cpp | 1 + csrc/options.h | 1 + 3 files changed, 30 insertions(+) diff --git a/csrc/dynamic_transform.cpp b/csrc/dynamic_transform.cpp index 2adbf9c93e2..33a8f9c622f 100644 --- a/csrc/dynamic_transform.cpp +++ b/csrc/dynamic_transform.cpp @@ -934,6 +934,34 @@ void DynamicTransformConcretizer::concretizeReshape() { } void DynamicTransformConcretizer::concretizeResize() { + if (isOptionDisabled(DisableOption::ConcretizeResizeExtents)) { + for (const auto& [id_index, concrete_resize] : info_->getResizeExtents()) { + const auto& [input_extent, left_expand, right_expand] = concrete_resize; + + IterDomain* id = + info_->initialInfo()->getDynamicResizedIterDomains().at(id_index); + + IterType iter_type = input_extent + left_expand + right_expand == 1 + ? IterType::Broadcast + : IterType::Iteration; + + NVF_CHECK( + id->definition() && id->definition()->isA(), + "Resized IterDomain must have a Resize definition"); + auto def = id->definition()->as(); + + auto new_id = IterDomain::resize( + def->in(), + def->leftExpand(), + def->rightExpand(), + id->isRFactorProduct(), + iter_type); + + registerConcretization(id, new_id); + } + return; + } + // First mutate IterDomains so that each Resize producer has constant extent for (const auto& [id_index, concrete_resize] : info_->getResizeExtents()) { const auto& [input_extent, left_expand, right_expand] = concrete_resize; diff --git a/csrc/options.cpp b/csrc/options.cpp index c2c9a7d8a25..aa83ca302eb 100644 --- a/csrc/options.cpp +++ b/csrc/options.cpp @@ -170,6 +170,7 @@ std::unordered_map> Options< DisableOption>::getOptionsFromEnv() { const std::unordered_map available_options = { {"compile_to_sass", DisableOption::CompileToSass}, + {"concretize_resize_extents", DisableOption::ConcretizeResizeExtents}, {"contig_indexing", DisableOption::ContigIndexing}, {"expr_simplify", DisableOption::ExprSimplify}, {"fallback", DisableOption::Fallback}, diff --git a/csrc/options.h b/csrc/options.h index 073530c0673..f32a02a41dc 100644 --- a/csrc/options.h +++ b/csrc/options.h @@ -110,6 +110,7 @@ enum class EnableOption { enum class DisableOption { CompileToSass, //! Disable direct compilation to sass so the ptx can be //! examined + ConcretizeResizeExtents, //! Concretize input extents and expand of Resize ContigIndexing, //! Disable contiguous indexing ExprSimplify, //! Disable expression simplifier Fallback, //! Disable fallback From 221ed743fe0710ea18c83cf82462da857120d338 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Fri, 23 Aug 2024 08:09:22 -0400 Subject: [PATCH 05/17] Use mutated root ID in propagateFromProducerToConsumer --- csrc/dynamic_transform.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/csrc/dynamic_transform.cpp b/csrc/dynamic_transform.cpp index 33a8f9c622f..ad44ab54c28 100644 --- a/csrc/dynamic_transform.cpp +++ b/csrc/dynamic_transform.cpp @@ -1475,7 +1475,13 @@ bool DynamicTransformConcretizer::propagateFromProducerToConsumer( bool is_concretized = false; for (const auto i : c10::irange((int64_t)root_domain.size())) { - auto root_id = root_domain.at(i); + IterDomain* orig_root_id = root_domain.at(i); + + // This root ID might have already been marked for concretization. For + // example, if it is used in a Resize op then it will be concretized + // earlier in concretizeResize. + auto root_id = maybeMutated(orig_root_id)->as(); + if (root_id->getIterType() != IterType::Symbolic) { continue; } @@ -1492,7 +1498,7 @@ bool DynamicTransformConcretizer::propagateFromProducerToConsumer( bool found = false; for (const auto& c2p : c2p_maps) { - auto p_it = c2p.find(root_id); + auto p_it = c2p.find(orig_root_id); // In some cases, we can exact map to one producer, but not to another. // This is the case for index_select, for example, whose first input is // the tensor to look up values in and whose second input gives the @@ -1552,20 +1558,16 @@ bool DynamicTransformConcretizer::propagateFromProducerToConsumer( // Propagate expanded IterDomains by swapping the extent into the expanded // extent concretized_id = - IterDomainBuilder(maybeMutated(root_id)->as()) + IterDomainBuilder(root_id) .iter_type(*id_type) .extent(FusionGuard::getCurFusion()->oneVal(DataType::Index)) - .expanded_extent( - maybeMutated(root_id)->as()->extent()) + .expanded_extent(root_id->extent()) .build(); } else { - concretized_id = - IterDomainBuilder(maybeMutated(root_id)->as()) - .iter_type(*id_type) - .build(); + concretized_id = IterDomainBuilder(root_id).iter_type(*id_type).build(); } - registerConcretization(root_id, concretized_id); + registerConcretization(orig_root_id, concretized_id); is_concretized = true; } From 79bd846ae98e79ea0f5804be073c5c39351a4685 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Fri, 23 Aug 2024 14:53:02 -0400 Subject: [PATCH 06/17] Standardize concretized info when disable option is given --- csrc/dynamic_transform.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/csrc/dynamic_transform.cpp b/csrc/dynamic_transform.cpp index ad44ab54c28..be59f68dac2 100644 --- a/csrc/dynamic_transform.cpp +++ b/csrc/dynamic_transform.cpp @@ -415,8 +415,21 @@ void DynamicTransformConcretizationInfo::analyzeResizes( "Could not compute right expand of dynamic resize ", op->toString()); - resize_extents_.emplace_back( - id_index, ConcreteResize{input_extent, left_expand, right_expand}); + if (isOptionDisabled(DisableOption::ConcretizeResizeExtents)) { + // If this option is disabled, it means we will only concretize IterType, + // not the extents of Resize ops. In such case we standardize the + // concretization info so that the resize_extents_ entries are equivalent + // to saving IterType. This is so that we avoid unnecessary cache misses + // in cases where the IterType does not change but the specific extents or + // expand values do change. + resize_extents_.emplace_back( + id_index, + ConcreteResize{ + 2, input_extent + left_expand + right_expand == 1 ? -1 : 1, 0}); + } else { + resize_extents_.emplace_back( + id_index, ConcreteResize{input_extent, left_expand, right_expand}); + } } } From 73d180425331959dcc4e13fe8c7a7598062eb37e Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Fri, 23 Aug 2024 14:53:54 -0400 Subject: [PATCH 07/17] Guard against registering trivial mutation in OptOutMutator::mutate(IterDomain*) --- csrc/mutator.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/csrc/mutator.cpp b/csrc/mutator.cpp index 17e0017e89c..cd638e5243c 100644 --- a/csrc/mutator.cpp +++ b/csrc/mutator.cpp @@ -108,7 +108,9 @@ void OptOutMutator::mutate(IterDomain* orig_id) { // This guarantees we replace id in all downstream expressions registerMutation(orig_id, new_id); - registerMutation(id, new_id); + if (id != orig_id) { + registerMutation(id, new_id); + } // Preserve definition if it exists in id. This is important since otherwise // we might disconnect the root to logical transform path. For example if id From 3f45928cc4a664bf67f75f21275dd8c55101c27d Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Fri, 23 Aug 2024 14:55:51 -0400 Subject: [PATCH 08/17] Clean up code that prevents introducing cycles in mutateExpr --- csrc/mutator.cpp | 54 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/csrc/mutator.cpp b/csrc/mutator.cpp index cd638e5243c..6acbd23a355 100644 --- a/csrc/mutator.cpp +++ b/csrc/mutator.cpp @@ -187,32 +187,56 @@ Expr* OptOutMutator::mutateExpr( std::vector mutated_outputs; mutated_outputs.reserve(op->outputs().size()); for (auto output : op->outputs()) { - mutated_outputs.emplace_back( - replace_outputs && - std::find(op->inputs().begin(), op->inputs().end(), output) == - op->inputs().end() - ? maybeMutated(output) - : output); + if (replace_outputs) { + Val* mut_out = maybeMutated(output); + if (mut_out != output && + std::find_if( + op->inputs().begin(), op->inputs().end(), [&](Val* const inp) { + return (replace_inputs ? maybeMutated(inp) : inp) == mut_out; + }) == op->inputs().end()) { + // skip using mutated output if is one of the inputs. + output = mut_out; + } + } + mutated_outputs.emplace_back(output); } std::vector mutated_inputs; mutated_inputs.reserve(op->inputs().size()); for (auto input : op->inputs()) { - mutated_inputs.emplace_back( - replace_inputs && - std::find( - mutated_outputs.begin(), mutated_outputs.end(), input) == - mutated_outputs.end() - ? maybeMutated(input) - : input); + if (replace_inputs) { + Val* mut_inp = maybeMutated(input); + if (mut_inp != input && + std::find_if( + op->outputs().begin(), op->outputs().end(), [&](Val* const outp) { + return (replace_outputs ? maybeMutated(outp) : outp) == mut_inp; + }) == op->outputs().end()) { + // skip using mutated input if is one of the inputs. + input = mut_inp; + } + } + mutated_inputs.emplace_back(input); } std::vector mutated_attrs; mutated_attrs.reserve(op->attributes().size()); for (auto attr : op->attributes()) { if (auto attr_val = dynamic_cast(attr)) { - mutated_attrs.emplace_back( - replace_inputs ? maybeMutated(attr_val) : attr_val); + if (replace_inputs) { + Val* mut_attr = maybeMutated(attr_val); + if (mut_attr != attr_val && + std::find_if( + op->outputs().begin(), + op->outputs().end(), + [&](Val* const outp) { + return (replace_outputs ? maybeMutated(outp) : outp) == + mut_attr; + }) == op->outputs().end()) { + // skip using mutated input if is one of the inputs. + attr_val = mut_attr; + } + } + mutated_attrs.emplace_back(attr_val); } else { mutated_attrs.emplace_back(attr); } From c551e937a1f729dc280b59bcf1eac17258e0ee17 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Fri, 23 Aug 2024 14:56:34 -0400 Subject: [PATCH 09/17] Don't use resize op to create replacement if disable option given --- csrc/dynamic_transform.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/csrc/dynamic_transform.cpp b/csrc/dynamic_transform.cpp index be59f68dac2..311c6614afb 100644 --- a/csrc/dynamic_transform.cpp +++ b/csrc/dynamic_transform.cpp @@ -958,19 +958,15 @@ void DynamicTransformConcretizer::concretizeResize() { ? IterType::Broadcast : IterType::Iteration; + auto def = id->definition()->as(); NVF_CHECK( - id->definition() && id->definition()->isA(), + def != nullptr && def->isA(), "Resized IterDomain must have a Resize definition"); - auto def = id->definition()->as(); - auto new_id = IterDomain::resize( - def->in(), - def->leftExpand(), - def->rightExpand(), - id->isRFactorProduct(), - iter_type); + auto new_id = IterDomainBuilder(id).iter_type(iter_type).build(); registerConcretization(id, new_id); + mutateExprOutputsOnly(def); } return; } From ac080bfb0eeacc09524d334e21d0398091928c15 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Tue, 27 Aug 2024 15:10:29 -0400 Subject: [PATCH 10/17] Remove preserve_def hack. Fix tests --- csrc/dynamic_transform.cpp | 26 ++++++++++---------------- tests/cpp/test_resize.cpp | 8 +++++++- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/csrc/dynamic_transform.cpp b/csrc/dynamic_transform.cpp index 311c6614afb..e1e2bca3328 100644 --- a/csrc/dynamic_transform.cpp +++ b/csrc/dynamic_transform.cpp @@ -963,10 +963,14 @@ void DynamicTransformConcretizer::concretizeResize() { def != nullptr && def->isA(), "Resized IterDomain must have a Resize definition"); - auto new_id = IterDomainBuilder(id).iter_type(iter_type).build(); + auto new_id = IterDomain::resize( + def->in(), + def->leftExpand(), + def->rightExpand(), + id->isRFactorProduct(), + iter_type); registerConcretization(id, new_id); - mutateExprOutputsOnly(def); } return; } @@ -1241,13 +1245,8 @@ void DynamicTransformConcretizer::mutate(TensorView* tv) { } } // Update the IterType of each output - bool preserve_def = true; for (auto out_id : ir_utils::filterByType(expr->outputs())) { auto mut_id = maybeMutated(out_id)->as(); - // Preserve the definition, unless there is an output with a - // pre-existing concretization. In that case, we assume that - // concretization was performed purposely and we refuse to redefine it. - preserve_def = preserve_def && mut_id == out_id; if (!mut_id->isSymbolic()) { // We are only concretizing IterType here, so if we have already @@ -1285,10 +1284,8 @@ void DynamicTransformConcretizer::mutate(TensorView* tv) { // unimportant, except that mutate(Expr*) does not return the replacement // Expr*, whereas mutateExprOutputsOnly does. - if (preserve_def) { - // Set expr as the definition for concretized outputs - expr = mutateExprOutputsOnly(expr); - } + // Set expr as the definition for concretized outputs + expr = mutateExprOutputsOnly(expr); // Replace inputs and attributes that were concretized mutate(expr); @@ -1323,13 +1320,10 @@ void DynamicTransformConcretizer::mutate(TensorDomain* td) { return updated_ids; }; - std::vector root_dom = - td->hasRoot() ? updateIdVec(td->root()) : std::vector(); + std::vector root_dom = updateIdVec(td->root()); std::vector logical_dom = updateIdVec(td->logical()); std::vector loop_domain = updateIdVec(td->loop()); - std::vector alloc_dom = td->hasAllocation() - ? updateIdVec(td->allocation()) - : std::vector(); + std::vector alloc_dom = updateIdVec(td->allocation()); if (!mutated) { return; diff --git a/tests/cpp/test_resize.cpp b/tests/cpp/test_resize.cpp index b74348f15d9..601dd2cadf5 100644 --- a/tests/cpp/test_resize.cpp +++ b/tests/cpp/test_resize.cpp @@ -1277,6 +1277,12 @@ TEST_F(ResizeTest, SliceInputShmoo) { auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); + DisableOptionsGuard dog; + // Without this option set, we will concretize the resized domains' extents + // to constant, meaning we cannot then send in different size inputs + DisableOptionsGuard::getCurOptions().set( + DisableOption::ConcretizeResizeExtents); + { // Concretize so that we set output IterType as Iteration. We should now // have expressions that work with any input range. @@ -3636,7 +3642,7 @@ TEST_F(ResizeTest, Chunk_NegativeSize) { auto in_tensor = at::randn({13}).cuda(); fec.runFusionWithInputs({in_tensor}); }, - ThrowsMessage(HasSubstr("Invalid resized domain extent"))); + ThrowsMessage(HasSubstr("Unexpected size of axis: -2"))); } TEST_F(ResizeTest, Chunk_SizeZero) { From 93bba71a49dcf2e49d95dfdd1b79befd59d40e64 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Tue, 27 Aug 2024 20:00:41 -0400 Subject: [PATCH 11/17] Use correct dynamic vals --- csrc/dynamic_transform.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/csrc/dynamic_transform.cpp b/csrc/dynamic_transform.cpp index e1e2bca3328..42026750a4f 100644 --- a/csrc/dynamic_transform.cpp +++ b/csrc/dynamic_transform.cpp @@ -201,10 +201,17 @@ class DynamicTransformInitialInfoBuilder : public IterVisitor { if (!id->definition() || id->getIterType() != IterType::Symbolic) { continue; } - if (id->definition()->isA()) { + if (auto rop = dynamic_cast(id->definition())) { info_.dynamic_resized_ids_.push_back(id); - // extent of output determines its IterType - loop_dynamic_vals_.push_back(id->extent()); + if (isOptionDisabled(DisableOption::ConcretizeResizeExtents)) { + // extent of output determines its IterType + loop_dynamic_vals_.push_back(id->extent()); + } else { + // The input extent and both expand vals are all concretized + loop_dynamic_vals_.push_back(rop->in()->extent()); + loop_dynamic_vals_.push_back(rop->leftExpand()); + loop_dynamic_vals_.push_back(rop->rightExpand()); + } } } } @@ -1075,8 +1082,6 @@ void DynamicTransformConcretizer::concretizeResize() { if (!orig_id->extent()->sameAs(new_id->extent())) { registerConcretization(orig_id->extent(), new_id->extent()); } - - // concretize scalars for left and right expand } } From a84669fe7fef9a60a27f5db4f88f1943c883856b Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Tue, 27 Aug 2024 20:21:58 -0400 Subject: [PATCH 12/17] Disable ConcretizeResizeExtents in ForLoops test --- tests/cpp/test_host_irs.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/cpp/test_host_irs.cpp b/tests/cpp/test_host_irs.cpp index 9764d71d571..62009ee198a 100644 --- a/tests/cpp/test_host_irs.cpp +++ b/tests/cpp/test_host_irs.cpp @@ -441,6 +441,18 @@ TEST_P(HostIrTest, ForLoops) { HostIrExecutorParams params; auto [use_fusion_executor_cache] = GetParam(); params.use_fusion_executor_cache = use_fusion_executor_cache; + + // HostIrExecutor concretizes the fusion once, using the first set of inputs. + // This is insufficient in this case since the concretization should depend + // on the for loop index, so we should get a different kernel in each + // iteration. + // TODO: This should not be necessary. HostIrExecutor should detect if the + // loop index is a dynamic value and if so then it should concretize a + // separate FusionKernelRuntime in each iteration. + DisableOptionsGuard dog; + DisableOptionsGuard::getCurOptions().set( + DisableOption::ConcretizeResizeExtents); + HostIrExecutor hie(std::move(hic), /*communicator=*/nullptr, params); auto options = at::TensorOptions().dtype(at::kLong).device(at::kCUDA, 0); @@ -455,6 +467,9 @@ TEST_P(HostIrTest, ForLoops) { } at::Tensor expected_result = torch::tensor({expected_result_data}, options); + std::cout << "buffer_at=" << buffer_at.item() << std::endl; + std::cout << "expected_result=" << expected_result.item() << std::endl; + EXPECT_TRUE(expected_result.equal(buffer_at)); } From 809a7f923b34c71accde0c94787ae290d6e9bd0c Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Tue, 27 Aug 2024 20:35:49 -0400 Subject: [PATCH 13/17] Remove debug print --- tests/cpp/test_host_irs.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/cpp/test_host_irs.cpp b/tests/cpp/test_host_irs.cpp index 62009ee198a..6acfd8c7bf0 100644 --- a/tests/cpp/test_host_irs.cpp +++ b/tests/cpp/test_host_irs.cpp @@ -467,9 +467,6 @@ TEST_P(HostIrTest, ForLoops) { } at::Tensor expected_result = torch::tensor({expected_result_data}, options); - std::cout << "buffer_at=" << buffer_at.item() << std::endl; - std::cout << "expected_result=" << expected_result.item() << std::endl; - EXPECT_TRUE(expected_result.equal(buffer_at)); } From d15d1ef2a438ee095a20e1e97a3e3c46ed522219 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Mon, 9 Sep 2024 12:36:02 -0400 Subject: [PATCH 14/17] Switch from opt-out to opt-in (EnableOption) --- csrc/dynamic_transform.cpp | 18 +++++++++--------- csrc/options.cpp | 2 +- csrc/options.h | 2 +- tests/cpp/test_host_irs.cpp | 11 ----------- tests/cpp/test_resize.cpp | 6 ------ 5 files changed, 11 insertions(+), 28 deletions(-) diff --git a/csrc/dynamic_transform.cpp b/csrc/dynamic_transform.cpp index 606837901ff..82ea32c118b 100644 --- a/csrc/dynamic_transform.cpp +++ b/csrc/dynamic_transform.cpp @@ -204,14 +204,14 @@ class DynamicTransformInitialInfoBuilder : public IterVisitor { } if (auto rop = dynamic_cast(id->definition())) { info_.dynamic_resized_ids_.push_back(id); - if (isOptionDisabled(DisableOption::ConcretizeResizeExtents)) { - // extent of output determines its IterType - loop_dynamic_vals_.push_back(id->extent()); - } else { + if (isOptionEnabled(EnableOption::ConcretizeResizeExtents)) { // The input extent and both expand vals are all concretized loop_dynamic_vals_.push_back(rop->in()->extent()); loop_dynamic_vals_.push_back(rop->leftExpand()); loop_dynamic_vals_.push_back(rop->rightExpand()); + } else { + // extent of output determines its IterType + loop_dynamic_vals_.push_back(id->extent()); } } } @@ -425,7 +425,10 @@ void DynamicTransformConcretizationInfo::analyzeResizes( "Could not compute right expand of dynamic resize ", op->toString()); - if (isOptionDisabled(DisableOption::ConcretizeResizeExtents)) { + if (isOptionEnabled(EnableOption::ConcretizeResizeExtents)) { + resize_extents_.emplace_back( + id_index, ConcreteResize{input_extent, left_expand, right_expand}); + } else { // If this option is disabled, it means we will only concretize IterType, // not the extents of Resize ops. In such case we standardize the // concretization info so that the resize_extents_ entries are equivalent @@ -436,9 +439,6 @@ void DynamicTransformConcretizationInfo::analyzeResizes( id_index, ConcreteResize{ 2, input_extent + left_expand + right_expand == 1 ? -1 : 1, 0}); - } else { - resize_extents_.emplace_back( - id_index, ConcreteResize{input_extent, left_expand, right_expand}); } } } @@ -957,7 +957,7 @@ void DynamicTransformConcretizer::concretizeReshape() { } void DynamicTransformConcretizer::concretizeResize() { - if (isOptionDisabled(DisableOption::ConcretizeResizeExtents)) { + if (!isOptionEnabled(EnableOption::ConcretizeResizeExtents)) { for (const auto& [id_index, concrete_resize] : info_->getResizeExtents()) { const auto& [input_extent, left_expand, right_expand] = concrete_resize; diff --git a/csrc/options.cpp b/csrc/options.cpp index 9013b78301f..0409dff5b45 100644 --- a/csrc/options.cpp +++ b/csrc/options.cpp @@ -152,6 +152,7 @@ template <> std::unordered_map> Options< EnableOption>::getOptionsFromEnv() { const std::unordered_map available_options = { + {"concretize_resize_extents", EnableOption::ConcretizeResizeExtents}, {"fuse_matmul", EnableOption::FuseMatmul}, {"fuse_multiple_matmuls", EnableOption::FuseMultipleMatmuls}, {"id_model", EnableOption::IdModel}, @@ -172,7 +173,6 @@ std::unordered_map> Options< DisableOption>::getOptionsFromEnv() { const std::unordered_map available_options = { {"compile_to_sass", DisableOption::CompileToSass}, - {"concretize_resize_extents", DisableOption::ConcretizeResizeExtents}, {"contig_indexing", DisableOption::ContigIndexing}, {"expr_simplify", DisableOption::ExprSimplify}, {"fallback", DisableOption::Fallback}, diff --git a/csrc/options.h b/csrc/options.h index 17d5bffda43..55081e49f90 100644 --- a/csrc/options.h +++ b/csrc/options.h @@ -91,6 +91,7 @@ enum class DebugDumpOption { //! These can be set through the `NVFUSER_ENABLE` environment variable //! enum class EnableOption { + ConcretizeResizeExtents, //! Concretize input extents and expand of Resize FuseMatmul, //! Enable automatic fusion of matmul and linear ops FuseMultipleMatmuls, //! Allow fusing more than one matmul in a single kernel IdModel, //! Enable IdModel @@ -112,7 +113,6 @@ enum class EnableOption { enum class DisableOption { CompileToSass, //! Disable direct compilation to sass so the ptx can be //! examined - ConcretizeResizeExtents, //! Concretize input extents and expand of Resize ContigIndexing, //! Disable contiguous indexing ExprSimplify, //! Disable expression simplifier Fallback, //! Disable fallback diff --git a/tests/cpp/test_host_irs.cpp b/tests/cpp/test_host_irs.cpp index 6acfd8c7bf0..4ae5d716427 100644 --- a/tests/cpp/test_host_irs.cpp +++ b/tests/cpp/test_host_irs.cpp @@ -442,17 +442,6 @@ TEST_P(HostIrTest, ForLoops) { auto [use_fusion_executor_cache] = GetParam(); params.use_fusion_executor_cache = use_fusion_executor_cache; - // HostIrExecutor concretizes the fusion once, using the first set of inputs. - // This is insufficient in this case since the concretization should depend - // on the for loop index, so we should get a different kernel in each - // iteration. - // TODO: This should not be necessary. HostIrExecutor should detect if the - // loop index is a dynamic value and if so then it should concretize a - // separate FusionKernelRuntime in each iteration. - DisableOptionsGuard dog; - DisableOptionsGuard::getCurOptions().set( - DisableOption::ConcretizeResizeExtents); - HostIrExecutor hie(std::move(hic), /*communicator=*/nullptr, params); auto options = at::TensorOptions().dtype(at::kLong).device(at::kCUDA, 0); diff --git a/tests/cpp/test_resize.cpp b/tests/cpp/test_resize.cpp index a1a3b437730..7bad424717c 100644 --- a/tests/cpp/test_resize.cpp +++ b/tests/cpp/test_resize.cpp @@ -1277,12 +1277,6 @@ TEST_F(ResizeTest, SliceInputShmoo) { auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); - DisableOptionsGuard dog; - // Without this option set, we will concretize the resized domains' extents - // to constant, meaning we cannot then send in different size inputs - DisableOptionsGuard::getCurOptions().set( - DisableOption::ConcretizeResizeExtents); - { // Concretize so that we set output IterType as Iteration. We should now // have expressions that work with any input range. From e68e37b8763690c151e676a6e71163e7fe0e4ad3 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle <1454944+jacobhinkle@users.noreply.github.com> Date: Mon, 9 Sep 2024 12:51:09 -0400 Subject: [PATCH 15/17] Apply suggestions from code review Co-authored-by: jjsjann123 --- csrc/mutator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csrc/mutator.cpp b/csrc/mutator.cpp index 6acbd23a355..716f9eeca9f 100644 --- a/csrc/mutator.cpp +++ b/csrc/mutator.cpp @@ -211,7 +211,7 @@ Expr* OptOutMutator::mutateExpr( op->outputs().begin(), op->outputs().end(), [&](Val* const outp) { return (replace_outputs ? maybeMutated(outp) : outp) == mut_inp; }) == op->outputs().end()) { - // skip using mutated input if is one of the inputs. + // skip using mutated input if is one of the outputs. input = mut_inp; } } @@ -232,7 +232,7 @@ Expr* OptOutMutator::mutateExpr( return (replace_outputs ? maybeMutated(outp) : outp) == mut_attr; }) == op->outputs().end()) { - // skip using mutated input if is one of the inputs. + // skip using mutated attr if is one of the outputs. attr_val = mut_attr; } } From 571afd6ff4101f1a40ffa9e722b039a0bf0f0636 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Mon, 9 Sep 2024 12:53:12 -0400 Subject: [PATCH 16/17] Clean up reversion of tests --- tests/cpp/test_dynamic_transform.cpp | 4 ---- tests/cpp/test_host_irs.cpp | 1 - 2 files changed, 5 deletions(-) diff --git a/tests/cpp/test_dynamic_transform.cpp b/tests/cpp/test_dynamic_transform.cpp index c889bc2585c..4f337e79a07 100644 --- a/tests/cpp/test_dynamic_transform.cpp +++ b/tests/cpp/test_dynamic_transform.cpp @@ -931,10 +931,6 @@ void reductionDynamicPadAddFusion( auto pad_widths = std::get<1>(inv); auto expect_miss = std::get<2>(inv); - // Since we concretize the inputs and expands for each Resize, we always - // expect a cache miss unless we have re-used the exact same values. - expect_miss = true; - NVF_ERROR(input_shape.size() == input_dims); NVF_ERROR(pad_widths.size() == num_pad_widths); diff --git a/tests/cpp/test_host_irs.cpp b/tests/cpp/test_host_irs.cpp index 4ae5d716427..9764d71d571 100644 --- a/tests/cpp/test_host_irs.cpp +++ b/tests/cpp/test_host_irs.cpp @@ -441,7 +441,6 @@ TEST_P(HostIrTest, ForLoops) { HostIrExecutorParams params; auto [use_fusion_executor_cache] = GetParam(); params.use_fusion_executor_cache = use_fusion_executor_cache; - HostIrExecutor hie(std::move(hic), /*communicator=*/nullptr, params); auto options = at::TensorOptions().dtype(at::kLong).device(at::kCUDA, 0); From 7ba300b7f5b6e6ed1ff6c3c1a044d6d5e4f28c19 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Mon, 9 Sep 2024 12:57:59 -0400 Subject: [PATCH 17/17] Clean up DynamicTransformConcretizationInfo::operator== --- csrc/dynamic_transform.cpp | 48 ++++---------------------------------- 1 file changed, 5 insertions(+), 43 deletions(-) diff --git a/csrc/dynamic_transform.cpp b/csrc/dynamic_transform.cpp index 82ea32c118b..74e46a8d018 100644 --- a/csrc/dynamic_transform.cpp +++ b/csrc/dynamic_transform.cpp @@ -527,49 +527,11 @@ bool DynamicTransformConcretizationInfo::operator==( return true; } - if (reshape_transforms_.size() != other.reshape_transforms_.size() || - resize_extents_.size() != other.resize_extents_.size() || - empty_extents_.size() != other.empty_extents_.size() || - factory_output_itertypes_.size() != - other.factory_output_itertypes_.size()) { - return false; - } - - for (const auto i : c10::irange((int64_t)reshape_transforms_.size())) { - const auto& analysis = reshape_transforms_.at(i); - const auto& other_analysis = other.reshape_transforms_.at(i); - if (analysis != other_analysis) { - return false; - } - } - - for (const auto i : c10::irange((int64_t)resize_extents_.size())) { - if (resize_extents_[i] != other.resize_extents_[i]) { - return false; - } - } - - if (factory_output_itertypes_ != other.factory_output_itertypes_) { - return false; - } - - for (const auto i : c10::irange((int64_t)expand_axes_.size())) { - const auto& expand_axes = expand_axes_.at(i); - const auto& other_expand_axes = other.expand_axes_.at(i); - if (expand_axes != other_expand_axes) { - return false; - } - } - - for (const auto i : c10::irange((int64_t)empty_extents_.size())) { - const auto& ee = empty_extents_.at(i); - const auto& other_ee = other.empty_extents_.at(i); - if (ee != other_ee) { - return false; - } - } - - return true; + return reshape_transforms_ != other.reshape_transforms_ && + resize_extents_ != other.resize_extents_ && + factory_output_itertypes_ != other.factory_output_itertypes_ && + expand_axes_ != other.expand_axes_ && + empty_extents_ != other.empty_extents_; } std::string DynamicTransformConcretizationInfo::toString() const {