diff --git a/csrc/id_model/id_model.cpp b/csrc/id_model/id_model.cpp index f9a759c1f36..f4aa1dd30a0 100644 --- a/csrc/id_model/id_model.cpp +++ b/csrc/id_model/id_model.cpp @@ -627,10 +627,29 @@ std::unordered_map IdModel::buildLoopPromotionMap( idGraph(IdMappingMode::LOOP), inlining_info); + // At this point, most of loop groups should have correct promoted + // IDs. However, non-inlined loop groups may miss promotion that + // should be propagated from parent ID groups, e.g., iS50 of T2 in + // Indexing19. Its parent ID loop group is promoted, but the loop + // group of iS50 is not found yet. + + // Step 4: In order to fully propagate the loop graph promotions, first + // propagate them to the IEL groups, which are then used to + // propagate back to the loop groups in Step 5. Unlike Step 2, the + // initial IEL promotion map is empty and is populated with the loop + // promotion map as we traverse down the IEL graph. + std::unordered_map final_iel_promotion_map; + propagatePromotionsInIELGraph( + iel_graph, + final_iel_promotion_map, + idGraph(IdMappingMode::LOOP), + loop_promotion_map, + true); + // This is not a right map to return but just a placeholder since // the loop promotion map is not yet completely merged. It will be // replaced by a proper map. - return loop_promotion_map; + return final_iel_promotion_map; } std::unordered_map IdModel::buildInlineRootResolutionMap( @@ -867,7 +886,9 @@ namespace { Expr* findMatchingExpr( const ExprGroup& iel_expr, const ValGraph& iel_graph, - const std::vector& maybe_promoted_inputs) { + const std::vector& maybe_promoted_inputs, + bool require_loop_mapped_promotion, + const ValGraph& loop_graph) { // If any of domains in maybe_promoted_inputs is not found in // iel_graph, it means the domain is just replayed and by definition // has no mapping with any existing domain, which means there's no @@ -925,17 +946,96 @@ Expr* findMatchingExpr( continue; } + // For the final loop promotion map, we want to find + // promotions within the same loop groups. Note that that's + // guaranteed when a new domain is replayed instead of reusing an + // existing domain. + if (require_loop_mapped_promotion) { + if (!loop_graph.disjointExprSets().permissiveAreMapped( + iel_expr->front(), maybe_promoted_input_use_group->front())) { + continue; + } + // This is just an extra sanity check. Make sure all exprs in + // the use group are mapped + NVF_ERROR( + std::all_of( + maybe_promoted_input_use_group->vector().begin(), + maybe_promoted_input_use_group->vector().end(), + [&](Expr* iel_use) { + return loop_graph.disjointExprSets().permissiveAreMapped( + iel_expr->front(), iel_use); + }), + "Not all mapped: ", + nvfuser::toString(iel_expr), + "\n", + nvfuser::toString(maybe_promoted_input_use_group)); + } + return maybe_promoted_input_use; } return nullptr; } +// When propagating loop promotions from inputs to outputs of an IEL +// expr, we can't blindly apply loop promotion when all of the input +// domains are loop mapped with the outputs. +// +// i.e. if we have the inlined domains from: +// Inputs: +// T0[i0] +// T1[i0, i1] +// +// T2[i0, b2] = broadcast(T0) +// T3[i0, i1] = T2 + T1 +// +// {T1, T2, T3}->merge(0, 1) +// inlineMost +// +// The inlined loop group would consist of: +// +// {i0, i1, b2, i0*b2, i0*i1} +// +// Note that all these domains would have promotion to i0*i1 at the +// end of Step 3. When the IEL expression of merge(i0, i1) is visited by +// propagatePromotionsInIELGraph again, the promotion to i0*i1 of both +// inputs would be propagated to its output, resulting in promotion of +// i0*i1 to (i0*i1)*(i0*i1), which is not the correct propagation. +// +// Therefore only promote i0*b1 to i0*i1, or i0*i1 to i0*i1 (i.e. don't +// promote an input to any transformation within the loop group). +// +// So if we have an iel_expr make sure its inputs and outputs are not in +// the same loop group. +bool hasUniqueInputLoopGroups( + const ExprGroup& iel_expr, + const ValGraph& iel_graph, + const ValGraph& loop_graph) { + const std::vector iel_inp_groups = iel_graph.inputGroups(iel_expr); + + const std::vector iel_out_groups = iel_graph.outputGroups(iel_expr); + + ValGroups inp_loop_groups; + for (const ValGroup& iel_inp_group : iel_inp_groups) { + inp_loop_groups.pushBack(loop_graph.toGroup(iel_inp_group->front())); + } + ValGroups out_loop_groups; + for (const ValGroup& iel_out_group : iel_out_groups) { + out_loop_groups.pushBack(loop_graph.toGroup(iel_out_group->front())); + } + + // Check if input groups that are not included in the output group set + return !inp_loop_groups.computeSubtract(out_loop_groups).empty(); +} + } // namespace void IdModel::propagatePromotionsInIELGraph( const ValGraph& iel_graph, - std::unordered_map& iel_promotion_map) { + std::unordered_map& iel_promotion_map, + const ValGraph& loop_graph, + const std::unordered_map& loop_graph_promotion_map, + bool require_loop_mapped_promotion) { // In order to make this traversal work, the traversal order must be // topologically sorted. ValGraphStmtSort iel_stmt_sort(iel_graph); @@ -951,6 +1051,11 @@ void IdModel::propagatePromotionsInIELGraph( std::vector maybe_promoted_inputs; maybe_promoted_inputs.reserve(iel_inp_groups.size()); + // Propagate loop graph promotion only when the inputs and outputs are + // not in the same loop group. + const bool loop_promote_inputs = !loop_graph_promotion_map.empty() && + hasUniqueInputLoopGroups(iel_expr, iel_graph, loop_graph); + for (const ValGroup& iel_inp_group : iel_inp_groups) { // Assumed all inputs are IterDomains NVF_ERROR(iel_inp_group->front()->isA()); @@ -963,6 +1068,19 @@ void IdModel::propagatePromotionsInIELGraph( continue; } + // Promote loops based on the loop promotion map. If the loop promotion + // map should be used and has an entry we should use that promotion. + if (loop_promote_inputs) { + const ValGroup& loop_copy_group = + loop_graph.toGroup(iel_inp_group->front()); + auto inp_loop_promo_it = loop_graph_promotion_map.find(loop_copy_group); + if (inp_loop_promo_it != loop_graph_promotion_map.end()) { + maybe_promoted_inputs.push_back(inp_loop_promo_it->second); + an_input_was_promoted = true; + continue; + } + } + // No promotion found. Just use the non-promoted domain maybe_promoted_inputs.push_back(iel_inp_group->front()->as()); } @@ -972,8 +1090,12 @@ void IdModel::propagatePromotionsInIELGraph( continue; } - Expr* promoted_expr = - findMatchingExpr(iel_expr, iel_graph, maybe_promoted_inputs); + Expr* promoted_expr = findMatchingExpr( + iel_expr, + iel_graph, + maybe_promoted_inputs, + require_loop_mapped_promotion, + idGraph(IdMappingMode::LOOP)); bool replayed = false; @@ -1011,6 +1133,13 @@ void IdModel::propagatePromotionsInIELGraph( } } +void IdModel::propagatePromotionsInIELGraph( + const ValGraph& iel_graph, + std::unordered_map& iel_promotion_map) { + propagatePromotionsInIELGraph( + iel_graph, iel_promotion_map, idGraph(IdMappingMode::LOOP), {}, false); +} + // Replay Expr but with the inputs provided. Expr* IdModel::addReplayAs(std::vector new_inputs, Expr* expr) { // Figure out which graphs are already initialized to make sure we add the new @@ -1332,4 +1461,27 @@ VectorOfUniqueEntries IdModel::computeTerminalLoopIds( return terminal_loop_ids; } +std::unordered_map updateValGroupIdMap( + const std::unordered_map& stale_map, + ValGraph& new_graph) { + std::unordered_map new_map; + + for (const auto& [stale_group, mapped_id] : stale_map) { + const ValGroups& new_groups = new_graph.toGroups(*stale_group); + NVF_ERROR( + new_groups.size() == 1, + "\nUpdate map assumes that new graph is equivalent to old graph plus extra mappings.\n", + "i.e. all mappings in new_graph should exist in the graph stale_map was produced on.\n", + "old:", + nvfuser::toString(stale_group), + "new: ", + nvfuser::toString(new_groups)); + NVF_ERROR( + new_map.emplace(new_groups.front(), mapped_id).second, + "Expected only a single mapping but multiple entries detected for ", + nvfuser::toString(new_groups.front())); + } + return new_map; +} + } // namespace nvfuser diff --git a/csrc/id_model/id_model.h b/csrc/id_model/id_model.h index 792a6501be9..38d533f4734 100644 --- a/csrc/id_model/id_model.h +++ b/csrc/id_model/id_model.h @@ -169,7 +169,7 @@ class IdModel : public PolymorphicBase { // fusion. void buildIterDomainDefinitionsAndUses(); - /// Start loop map by grouping inlined iter domains + // Start loop map by grouping inlined iter domains void initializeLoopGraph(const StatefulInliningInfo& info); // Build a map of loop groups to IterDomains that represent actual @@ -192,7 +192,35 @@ class IdModel : public PolymorphicBase { // input is promoted, the output needs to be promoted too. If // there's already an equivalent expr that uses the promoted inputs, // create a mapping from the outputs of the IEL expr to the outputs - // of the equivalent expr. + // of the equivalent expr. When require_loop_mapped_promotion is + // true, the equivalent expr needs to be already loop mapped. If no + // such expr is found, the IEL expr is replayed with the promoted + // inputs. require_loop_mapped_promotion is true when this function + // is used for step 3. + // + // This is used twice when building the promotion map. The first time + // it is used there's no loop graph promotion yet, so only the IEL + // promotions are propagated. In that case, loop_graph_promotion_map + // should be just empty. + // + // Propagation uses iel_promotion_map and + // loop_graph_promotion_map. If both are available for an IEL group, + // the former has the precedence. This is because when this function + // is used for step 4, the given iel_promotion_map starts as an + // empty map and gets populated during this propagation, so any + // mapping in the map is guaranteed to be the correct final mapping, + // whereas the loop graph may have invalid mappings for partially + // inlined domains. + void propagatePromotionsInIELGraph( + const ValGraph& iel_graph, + std::unordered_map& iel_promotion_map, + const ValGraph& loop_graph, + const std::unordered_map& loop_promotion_map, + bool require_loop_mapped_promotion); + + // Same as the other propagatePromotionsInIELGraph but without loop + // graph map. This is used for step 2, where there's no loop + // graph map yet. void propagatePromotionsInIELGraph( const ValGraph& iel_graph, std::unordered_map& iel_promotion_map); @@ -281,4 +309,11 @@ class IdModel : public PolymorphicBase { std::unordered_map loop_promotion_map_; }; +// A utility function to update a map of ValGroups to ID from an old +// Valgraph to a new ValGraph. The new graph must be a superset of the +// old graph. +std::unordered_map updateValGroupIdMap( + const std::unordered_map& stale_map, + ValGraph& new_graph); + } // namespace nvfuser diff --git a/tests/cpp/test_id_model.cpp b/tests/cpp/test_id_model.cpp index a59bdf4aafa..9b25fb24c9f 100644 --- a/tests/cpp/test_id_model.cpp +++ b/tests/cpp/test_id_model.cpp @@ -110,6 +110,13 @@ ValType* getValByName(const std::vector& vals, StmtNameType name) { } } +IterDomain* getChildIdByName(IterDomain* id, StmtNameType name) { + auto named_val = getValByName(ir_utils::consumerValsOf(id), name); + NVF_ERROR(named_val != nullptr, "Cannot find a child ID named ", name); + NVF_ERROR(named_val->isA()); + return named_val->as(); +}; + // Helper class to test IdModel class IdModelTester : public IdModel { public: @@ -137,17 +144,34 @@ class IdModelTester : public IdModel { propagatePromotionsInIELGraph(iel_graph, s2_iel_promotion_map); - s3_loop_promotion_map = projectIELPromotionToLoopGraph( + const auto s3_original_loop_promotion_map = projectIELPromotionToLoopGraph( iel_graph, s2_iel_promotion_map, idGraph(IdMappingMode::LOOP), inlining_info); + + // Make a copy for validation as idGraph(IdMappingMode::LOOP) will + // be updated in the later steps + s3_loop_graph = idGraph(IdMappingMode::LOOP); + s3_loop_promotion_map = + updateValGroupIdMap(s3_original_loop_promotion_map, s3_loop_graph); + + // Note that s4_iel_promotion_map is an empty map at this + // point. It'll be populated with the Step-3 map + propagatePromotionsInIELGraph( + iel_graph, + s4_iel_promotion_map, + idGraph(IdMappingMode::LOOP), + s3_original_loop_promotion_map, + true); } ValGraph iel_graph; std::unordered_map s1_root_resolution_map; std::unordered_map s2_iel_promotion_map; + ValGraph s3_loop_graph; std::unordered_map s3_loop_promotion_map; + std::unordered_map s4_iel_promotion_map; }; // Test if id is resolved to an ID that is exact mapped with @@ -292,6 +316,34 @@ void checkStep3Results( } } +void checkStep4Results( + const ValGraph& iel_graph, + const std::unordered_map& iel_promotion_map, + const std::vector, IterDomain*>>& + ref_promotion_map) { + EXPECT_EQ(iel_promotion_map.size(), ref_promotion_map.size()) + << "Mismatched Step-4 result map. " + << "Expected to have " << ref_promotion_map.size() + << " mappings but found " << iel_promotion_map.size(); + + for (const auto& ref_promotion_pair : ref_promotion_map) { + const auto& ref_promotion_group = ref_promotion_pair.first; + const auto& ref_promotion_id = ref_promotion_pair.second; + + auto iel_promotion_it = std::find_if( + iel_promotion_map.begin(), + iel_promotion_map.end(), + [&](const auto& iel_promotion) { + return iel_promotion.first->set() == ref_promotion_group; + }); + + auto iel_promotion_id = iel_promotion_it->second; + EXPECT_EQ(ref_promotion_id, iel_promotion_id) + << "Expected promotion: " << ref_promotion_id->toString() + << ". Actual: " << iel_promotion_id->toString(); + } +} + // Create a fusion where we're missing a valid concrete id so the compute at map // processing will fail. We need to be able to create the concrete ID not just // look for one. It is not yet possible to lower this fusion as the @@ -660,9 +712,10 @@ TEST_F(IdModelTest, LoopPromotion1) { {std::unordered_set{t2->axis(1), t3->axis(1)}, t3->axis(1)}}; checkStep3Results( - tester.idGraph(IdMappingMode::LOOP), - tester.s3_loop_promotion_map, - s3_reference_map); + tester.s3_loop_graph, tester.s3_loop_promotion_map, s3_reference_map); + + ASSERT_TRUE(tester.s4_iel_promotion_map.empty()) + << "No step-4 IEL promotion expected"; } } @@ -717,9 +770,10 @@ TEST_F(IdModelTest, LoopPromotion2) { {std::unordered_set{t3->axis(0), t4->axis(0)}, t4->axis(0)}}; checkStep3Results( - tester.idGraph(IdMappingMode::LOOP), - tester.s3_loop_promotion_map, - s3_reference_map); + tester.s3_loop_graph, tester.s3_loop_promotion_map, s3_reference_map); + + ASSERT_TRUE(tester.s4_iel_promotion_map.empty()) + << "No step-4 IEL promotion expected"; } // Multiple inlined and non-inlined broadcast domains @@ -795,9 +849,10 @@ TEST_F(IdModelTest, LoopPromotion3) { tv3->axis(0)}}; checkStep3Results( - tester.idGraph(IdMappingMode::LOOP), - tester.s3_loop_promotion_map, - s3_reference_map); + tester.s3_loop_graph, tester.s3_loop_promotion_map, s3_reference_map); + + ASSERT_TRUE(tester.s4_iel_promotion_map.empty()) + << "No step-4 IEL promotion expected"; } // Test root resolution with a fusion with outer split. @@ -895,9 +950,24 @@ TEST_F(IdModelTest, LoopPromotion4) { {std::unordered_set{tv2->axis(1)}, tv4->axis(1)}}; checkStep3Results( - tester.idGraph(IdMappingMode::LOOP), - tester.s3_loop_promotion_map, - s3_reference_map); + tester.s3_loop_graph, tester.s3_loop_promotion_map, s3_reference_map); + + auto id10 = getParentId(tv4->axis(0), 1); + ASSERT_EQ(id10->name(), 10); + auto id32 = getChildIdByName(id10, 32); + auto id33 = getChildIdByName(id10, 33); + + std::vector, IterDomain*>> + s4_reference_map = { + // 19 -> 10 + {std::unordered_set{getParentId(tv2->axis(0), 1)}, id10}, + // 20 -> 32 + {std::unordered_set{tv2->axis(0)}, id32}, + // 21 -> 33 + {std::unordered_set{tv2->axis(1)}, id33}}; + + checkStep4Results( + tester.iel_graph, tester.s4_iel_promotion_map, s4_reference_map); } // Test root resolution with the same fusion as Indexing1 @@ -1027,9 +1097,50 @@ TEST_F(IdModelTest, LoopPromotion5) { }; checkStep3Results( - tester.idGraph(IdMappingMode::LOOP), - tester.s3_loop_promotion_map, - s3_reference_map); + tester.s3_loop_graph, tester.s3_loop_promotion_map, s3_reference_map); + + auto id19 = getParentId(tv4->axis(0), 3); + ASSERT_EQ(id19->name(), 19); + auto id20 = getParentId(tv4->axis(0), 2); + ASSERT_EQ(id20->name(), 20); + auto id40 = getChildIdByName(id20, 40); + auto id41 = getChildIdByName(id20, 41); + auto id42 = getChildIdByName(id20, 42); + auto id43 = getChildIdByName(id20, 43); + auto id46 = getChildIdByName(id40, 46); + auto id47 = getChildIdByName(id40, 47); + auto id48 = getChildIdByName(id42, 48); + auto id49 = getChildIdByName(id42, 49); + + std::vector, IterDomain*>> + s4_reference_map = { + // 32 -> 19 + {std::unordered_set{getParentId(tv2->axis(0), 3)}, id19}, + // 33 -> 20 + {std::unordered_set{getParentId(tv2->axis(0), 2)}, id20}, + // 34 -> 40 + {std::unordered_set{getParentId(tv2->axis(0), 1)}, id40}, + // 35 -> 41 + {std::unordered_set{tv2->axis(2)}, id41}, + // 36 -> 46 + {std::unordered_set{tv2->axis(0)}, id46}, + // 37 -> 47 + {std::unordered_set{tv2->axis(1)}, id47}, + // 26 -> 19 + {std::unordered_set{getParentId(tv3->axis(0), 3)}, id19}, + // 27 -> 20 + {std::unordered_set{getParentId(tv3->axis(0), 2)}, id20}, + // 28 -> 42 + {std::unordered_set{getParentId(tv3->axis(0), 1)}, id42}, + // 29 -> 43 + {std::unordered_set{tv3->axis(2)}, id43}, + // 30 -> 48 + {std::unordered_set{tv3->axis(0)}, id48}, + // 31 -> 49 + {std::unordered_set{tv3->axis(1)}, id49}}; + + checkStep4Results( + tester.iel_graph, tester.s4_iel_promotion_map, s4_reference_map); } // Test root resolution with the same fusion as Indexing19 @@ -1123,30 +1234,16 @@ TEST_F(IdModelTest, LoopPromotion6) { tester.idGraph(IdMappingMode::EXACT), tester.s2_iel_promotion_map); - auto id79 = - getValByName(ir_utils::consumerValsOf(tv9->getRootDomain().at(2)), 79) - ->as(); - ASSERT_NE(id79, nullptr) << "IterDomain 79 not found"; - auto id80 = - getValByName(ir_utils::consumerValsOf(tv9->getRootDomain().at(2)), 80) - ->as(); - ASSERT_NE(id80, nullptr) << "IterDomain 80 not found"; - auto id81 = getChildId(id79, 1); - ASSERT_EQ(id81->name(), 81); - auto id82 = getChildId(id79, 1, 1); - ASSERT_EQ(id82->name(), 82); - auto id83 = getChildId(id80, 1); - ASSERT_EQ(id83->name(), 83); - auto id84 = getChildId(id80, 1, 1); - ASSERT_EQ(id84->name(), 84); - auto id85 = getChildId(id81, 1); - ASSERT_EQ(id85->name(), 85); - auto id86 = getChildId(id81, 1, 1); - ASSERT_EQ(id86->name(), 86); - auto id87 = getChildId(id83, 1); - ASSERT_EQ(id87->name(), 87); - auto id88 = getChildId(id83, 1, 1); - ASSERT_EQ(id88->name(), 88); + auto id79 = getChildIdByName(tv9->getRootDomain().at(2), 79); + auto id80 = getChildIdByName(tv9->getRootDomain().at(2), 80); + auto id81 = getChildIdByName(id79, 81); + auto id82 = getChildIdByName(id79, 82); + auto id83 = getChildIdByName(id80, 83); + auto id84 = getChildIdByName(id80, 84); + auto id85 = getChildIdByName(id81, 85); + auto id86 = getChildIdByName(id81, 86); + auto id87 = getChildIdByName(id83, 87); + auto id88 = getChildIdByName(id83, 88); // Check Step 3 results. See the design doc for the expected results std::vector, IterDomain*>> @@ -1223,9 +1320,111 @@ TEST_F(IdModelTest, LoopPromotion6) { }; checkStep3Results( - tester.idGraph(IdMappingMode::LOOP), - tester.s3_loop_promotion_map, - s3_reference_map); + tester.s3_loop_graph, tester.s3_loop_promotion_map, s3_reference_map); + + // For tv1 + auto id94 = getChildIdByName(id80, 94); + auto id95 = getChildIdByName(id80, 95); + auto id109 = getChildIdByName(id94, 109); + auto id110 = getChildIdByName(id94, 110); + + // For tv2 + auto id98 = getChildIdByName(id80, 98); + auto id99 = getChildIdByName(id80, 99); + auto id113 = getChildIdByName(id98, 113); + auto id114 = getChildIdByName(id98, 114); + + // For tv6 + auto id102 = getChildIdByName(id80, 102); + auto id103 = getChildIdByName(id80, 103); + auto id117 = getChildIdByName(id102, 117); + auto id118 = getChildIdByName(id102, 118); + + // For tv4 + auto id111 = getChildIdByName(id80, 111); + auto id112 = getChildIdByName(id80, 112); + auto id129 = getChildIdByName(id111, 129); + auto id130 = getChildIdByName(id111, 130); + + // For tv5 + auto id127 = getChildIdByName(id80, 127); + auto id128 = getChildIdByName(id80, 128); + auto id135 = getChildIdByName(id127, 135); + auto id136 = getChildIdByName(id127, 136); + + // For tv8 + auto id107 = getChildIdByName(id80, 107); + auto id108 = getChildIdByName(id80, 108); + auto id125 = getChildIdByName(id107, 125); + auto id126 = getChildIdByName(id107, 126); + + // For tv9 + auto id121 = getChildIdByName(id80, 121); + auto id122 = getChildIdByName(id80, 122); + auto id131 = getChildIdByName(id121, 131); + auto id132 = getChildIdByName(id121, 132); + + std::vector, IterDomain*>> + s4_reference_map = { + // tv1: 71 -> 94 + {std::unordered_set{getParentId(tv1->axis(0), 1)}, id94}, + // tv1: 72 -> 95 + {std::unordered_set{tv1->axis(2)}, id95}, + // tv1: 73 -> 109 + {std::unordered_set{tv1->axis(0)}, id109}, + // tv1: 74 -> 110 + {std::unordered_set{tv1->axis(1)}, id110}, + // tv2: 47 -> 98 + {std::unordered_set{getParentId(tv2->axis(0), 1)}, id98}, + // tv2: 48 -> 99 + {std::unordered_set{tv2->axis(2)}, id99}, + // tv2: 49 -> 113 + {std::unordered_set{tv2->axis(0)}, id113}, + // tv2: 50 -> 114 + {std::unordered_set{tv2->axis(1)}, id114}, + // tv4: 42 -> 111 + {std::unordered_set{getParentId(tv4->axis(0), 1)}, id111}, + // tv4: 43 -> 112 + {std::unordered_set{tv4->axis(2)}, id112}, + // tv4: 44 -> 129 + {std::unordered_set{tv4->axis(0)}, id129}, + // tv4: 45 -> 130 + {std::unordered_set{tv4->axis(1)}, id130}, + // tv5: 37 -> 127 + {std::unordered_set{getParentId(tv5->axis(0), 1)}, id127}, + // tv5: 38 -> 128 + {std::unordered_set{tv5->axis(2)}, id128}, + // tv5: 39 -> 135 + {std::unordered_set{tv5->axis(0)}, id135}, + // tv5: 40 -> 136 + {std::unordered_set{tv5->axis(1)}, id136}, + // tv6: 62 -> 102 + {std::unordered_set{getParentId(tv6->axis(0), 1)}, id102}, + // tv6: 63 -> 103 + {std::unordered_set{tv6->axis(2)}, id103}, + // tv6: 64 -> 117 + {std::unordered_set{tv6->axis(0)}, id117}, + // tv6: 65 -> 118 + {std::unordered_set{tv6->axis(1)}, id118}, + // tv8: 57 -> 107 + {std::unordered_set{getParentId(tv8->axis(0), 1)}, id107}, + // tv8: 58 -> 108 + {std::unordered_set{tv8->axis(2)}, id108}, + // tv8: 59 -> 125 + {std::unordered_set{tv8->axis(0)}, id125}, + // tv8: 60 -> 126 + {std::unordered_set{tv8->axis(1)}, id126}, + // tv9: 31 -> 121 + {std::unordered_set{getParentId(tv9->axis(0), 1)}, id121}, + // tv9: 32 -> 122 + {std::unordered_set{tv9->axis(2)}, id122}, + // tv9: 33 -> 131 + {std::unordered_set{tv9->axis(0)}, id131}, + // tv9: 34 -> 132 + {std::unordered_set{tv9->axis(1)}, id132}}; + + checkStep4Results( + tester.iel_graph, tester.s4_iel_promotion_map, s4_reference_map); } // Same fusion as NvFuserTest.FusionInlineBroadcastIndexing0 @@ -1290,6 +1489,8 @@ TEST_F(IdModelTest, LoopPromotion7) { tester.idGraph(IdMappingMode::EXACT), tester.s2_iel_promotion_map); + auto id8 = getChildIdByName(tv4->getRootDomain().at(0), 8); + // Check Step 3 results. See the design doc for the expected results std::vector, IterDomain*>> s3_reference_map = { @@ -1301,8 +1502,8 @@ TEST_F(IdModelTest, LoopPromotion7) { getChildId(tv3->getRootDomain().at(0), 1), tv4->getRootDomain().at(0), tv4->getRootDomain().at(1), - getChildId(tv4->getRootDomain().at(0), 1)}, - getChildId(tv4->getRootDomain().at(0), 1)}, + id8}, + id8}, // 17, 15, 9 -> 9 {std::unordered_set{tv2->axis(0), tv3->axis(0), tv4->axis(0)}, tv4->axis(0)}, @@ -1310,9 +1511,36 @@ TEST_F(IdModelTest, LoopPromotion7) { {std::unordered_set{tv3->axis(1)}, tv4->axis(1)}}; checkStep3Results( - tester.idGraph(IdMappingMode::LOOP), - tester.s3_loop_promotion_map, - s3_reference_map); + tester.s3_loop_graph, tester.s3_loop_promotion_map, s3_reference_map); + + // For tv2 + auto id26 = getChildIdByName(id8, 26); + auto id27 = getChildIdByName(id8, 27); + auto id34 = getChildIdByName(id27, 34); + auto id35 = getChildIdByName(id27, 35); + + // For tv3 + auto id30 = getChildIdByName(id8, 30); + auto id31 = getChildIdByName(id8, 31); + + std::vector, IterDomain*>> + s4_reference_map = { + // tv2: 17 -> 26 + {std::unordered_set{tv2->axis(0)}, id26}, + // tv2: 18 -> 27 + {std::unordered_set{getParentId(tv2->axis(1), 1)}, id27}, + // tv2: 21 -> 34 + {std::unordered_set{tv2->axis(1)}, id34}, + // tv2: 22 -> 35 + {std::unordered_set{tv2->axis(2)}, id35}, + // tv3: 15 -> 26 + {std::unordered_set{tv3->axis(0)}, id30}, + // tv3: 16 -> 27 + {std::unordered_set{tv3->axis(1)}, id31}, + }; + + checkStep4Results( + tester.iel_graph, tester.s4_iel_promotion_map, s4_reference_map); } // Same fusion as NvFuserTest.FusionIndexing20 @@ -1406,6 +1634,11 @@ TEST_F(IdModelTest, LoopPromotion8) { tester.idGraph(IdMappingMode::EXACT), tester.s2_iel_promotion_map); + auto id29 = getParentId(tv7->axis(0), 1); + ASSERT_EQ(id29->name(), 29) << "Unexpected ID: " << id29->toString(); + auto id42 = getParentId(tv7->axis(1), 1); + ASSERT_EQ(id42->name(), 42); + // Check Step 3 results. See the design doc for the expected results std::vector, IterDomain*>> s3_reference_map = { @@ -1442,8 +1675,8 @@ TEST_F(IdModelTest, LoopPromotion8) { getChildId( getChildId(tv7->getRootDomain().at(0), 1), 1, 1), // 31 tv7->getRootDomain().at(2), // 16 - getChildId(tv7->getRootDomain().at(2), 1)}, // 42 - getChildId(tv7->getRootDomain().at(2), 1)}, + id42}, // 42 + id42}, // 22 -> 19 {std::unordered_set{tv2->axis(1)}, tv4->axis(1)}, // 40, 43 -> 43 @@ -1453,9 +1686,33 @@ TEST_F(IdModelTest, LoopPromotion8) { }; checkStep3Results( - tester.idGraph(IdMappingMode::LOOP), - tester.s3_loop_promotion_map, - s3_reference_map); + tester.s3_loop_graph, tester.s3_loop_promotion_map, s3_reference_map); + + auto id49 = getChildIdByName(id29, 49); + auto id50 = getChildIdByName(id29, 50); + auto id51 = getChildIdByName(id29, 51); + auto id52 = getChildIdByName(id29, 52); + auto id63 = getChildIdByName(id42, 63); + auto id64 = getChildIdByName(id42, 64); + + std::vector, IterDomain*>> + s4_reference_map = { + // tv1: 35 -> 49 + {std::unordered_set{tv1->axis(0)}, id49}, + // tv1: 36 -> 50 + {std::unordered_set{tv1->axis(1)}, id50}, + // tv2: 21 -> 51 + {std::unordered_set{tv2->axis(0)}, id51}, + // tv2: 22 -> 52 + {std::unordered_set{tv2->axis(1)}, id52}, + // tv5: 40 -> 63 + {std::unordered_set{tv5->axis(1)}, id63}, + // tv5: 41 -> 64 + {std::unordered_set{tv5->axis(2)}, id64}, + }; + + checkStep4Results( + tester.iel_graph, tester.s4_iel_promotion_map, s4_reference_map); } namespace {