diff --git a/csrc/alias_analysis.cpp b/csrc/alias_analysis.cpp index 8bca891e4c4..8fd5da9da77 100644 --- a/csrc/alias_analysis.cpp +++ b/csrc/alias_analysis.cpp @@ -239,10 +239,6 @@ void AliasFinder::handle(const ViewOp* view) { } void AliasFinder::handle(const LoadStoreOp* set) { - if (isResharding(set)) { - return; - } - TensorView* in = dynamic_cast(set->in()); if (in == nullptr) { return; diff --git a/csrc/preseg_passes/pre_segmenter.cpp b/csrc/preseg_passes/pre_segmenter.cpp index 042f03191f7..52d949ad95a 100644 --- a/csrc/preseg_passes/pre_segmenter.cpp +++ b/csrc/preseg_passes/pre_segmenter.cpp @@ -39,12 +39,6 @@ namespace nvfuser::preseg_passes { debug() << "========================================" << std::endl; } - // For resharding across GPUs. - OptimizationPass::runPass(fusion); - OptimizationPass::runPass(fusion); - OptimizationPass::runPass(fusion); - OptimizationPass::runPass(fusion); - // Replace TensorViews with zero extent. Outputs and inputs may still be empty OptimizationPass::runPass(fusion); // This pass should be placed before ConsecutiveCastPass as more @@ -81,6 +75,16 @@ namespace nvfuser::preseg_passes { OptimizationPass::runPass(fusion); OptimizationPass::runPass(fusion); OptimizationPass::runPass(fusion); + + // All the multidevice passes are moved after allocation related passes: + // MarkAliasesPreparePass, and AllocationDomainPass Multidevice passes will + // try to set the allocation domain for tvs with device mesh which will + // conflict with these passes. + OptimizationPass::runPass(fusion); + OptimizationPass::runPass(fusion); + OptimizationPass::runPass(fusion); + OptimizationPass::runPass(fusion); + OptimizationPass::runPass(fusion); OptimizationPass::runPass(fusion); OptimizationPass::runPass(fusion); diff --git a/tests/cpp/test_alias_analysis.cpp b/tests/cpp/test_alias_analysis.cpp index 937167a5517..260172fb4c2 100644 --- a/tests/cpp/test_alias_analysis.cpp +++ b/tests/cpp/test_alias_analysis.cpp @@ -270,7 +270,9 @@ TEST_F(AliasAnalysisTest, BroadcastExpandDimensions) { EXPECT_EQ(analysis.getRoot(expanded_tv), in); } -TEST_F(AliasAnalysisTest, NoAliasForReshardingExprs) { +// See PR: https://github.com/NVIDIA/Fuser/pull/4274 +// for alias analysis for resharding exprs +TEST_F(AliasAnalysisTest, AliasForReshardingExprs) { Fusion fusion; FusionGuard fg(&fusion); @@ -288,7 +290,7 @@ TEST_F(AliasAnalysisTest, NoAliasForReshardingExprs) { fusion.addOutput(out); AliasAnalysisResult analysis = findAliases(&fusion); - EXPECT_TRUE(analysis.getRoot(out) == nullptr); + EXPECT_TRUE(analysis.getRoot(out) == in); } } // namespace nvfuser diff --git a/tests/cpp/test_multidevice_matmul.cpp b/tests/cpp/test_multidevice_matmul.cpp index b8aba89aa86..ee24479a0af 100644 --- a/tests/cpp/test_multidevice_matmul.cpp +++ b/tests/cpp/test_multidevice_matmul.cpp @@ -238,7 +238,7 @@ TEST_F(DistributedMatmulTest, Matmul_LayoutTN_Allgather) { executor_cache.getMostRecentKernelRuntime(); EXPECT_THAT( kernel_runtime->fusionSegments()->groups(), - Contains(HeuristicIs(SchedulerType::ExprEval)).Times(2)); + Contains(HeuristicIs(SchedulerType::ExprEval)).Times(3)); } TEST_F(DistributedMatmulTest, Matmul_LayoutNT_AllReduce) {