Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions csrc/alias_analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,6 @@ void AliasFinder::handle(const ViewOp* view) {
}

void AliasFinder::handle(const LoadStoreOp* set) {
if (isResharding(set)) {
return;
}

TensorView* in = dynamic_cast<TensorView*>(set->in());
if (in == nullptr) {
return;
Expand Down
16 changes: 10 additions & 6 deletions csrc/preseg_passes/pre_segmenter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,6 @@ namespace nvfuser::preseg_passes {
debug() << "========================================" << std::endl;
}

// For resharding across GPUs.
OptimizationPass<PropagateShardingsPass>::runPass(fusion);
OptimizationPass<InsertReshardingsPass>::runPass(fusion);
OptimizationPass<ReorderShardedAxisPass>::runPass(fusion);
OptimizationPass<MakeReshardingContiguousPass>::runPass(fusion);

// Replace TensorViews with zero extent. Outputs and inputs may still be empty
OptimizationPass<RemoveEmptyPass>::runPass(fusion);
// This pass should be placed before ConsecutiveCastPass as more
Expand Down Expand Up @@ -81,6 +75,16 @@ namespace nvfuser::preseg_passes {
OptimizationPass<MarkAliasesPreparePass>::runPass(fusion);
OptimizationPass<ExactMappedExtentSubstitutionPass>::runPass(fusion);
OptimizationPass<AllocationDomainPass>::runPass(fusion);

// All the multidevice passes are moved after allocation related passes:
// MarkAliasesPreparePass, and AllocationDomainPass Multidevice passes will
// try to set the allocation domain for tvs with device mesh which will
// conflict with these passes.
OptimizationPass<PropagateShardingsPass>::runPass(fusion);
OptimizationPass<InsertReshardingsPass>::runPass(fusion);
OptimizationPass<ReorderShardedAxisPass>::runPass(fusion);
OptimizationPass<MakeReshardingContiguousPass>::runPass(fusion);

OptimizationPass<RemoveBcastSqueeze>::runPass(fusion);
OptimizationPass<SegmentInplaceUpdatePass>::runPass(fusion);
OptimizationPass<TranslateNoReductionMatmulToMulSqueeze>::runPass(fusion);
Expand Down
6 changes: 4 additions & 2 deletions tests/cpp/test_alias_analysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,9 @@ TEST_F(AliasAnalysisTest, BroadcastExpandDimensions) {
EXPECT_EQ(analysis.getRoot(expanded_tv), in);
}

TEST_F(AliasAnalysisTest, NoAliasForReshardingExprs) {
// See PR: https://github.com/NVIDIA/Fuser/pull/4274
// for alias analysis for resharding exprs
TEST_F(AliasAnalysisTest, AliasForReshardingExprs) {
Fusion fusion;
FusionGuard fg(&fusion);

Expand All @@ -288,7 +290,7 @@ TEST_F(AliasAnalysisTest, NoAliasForReshardingExprs) {
fusion.addOutput(out);

AliasAnalysisResult analysis = findAliases(&fusion);
EXPECT_TRUE(analysis.getRoot(out) == nullptr);
EXPECT_TRUE(analysis.getRoot(out) == in);
}

} // namespace nvfuser
2 changes: 1 addition & 1 deletion tests/cpp/test_multidevice_matmul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ TEST_F(DistributedMatmulTest, Matmul_LayoutTN_Allgather) {
executor_cache.getMostRecentKernelRuntime();
EXPECT_THAT(
kernel_runtime->fusionSegments()->groups(),
Contains(HeuristicIs(SchedulerType::ExprEval)).Times(2));
Contains(HeuristicIs(SchedulerType::ExprEval)).Times(3));
}

TEST_F(DistributedMatmulTest, Matmul_LayoutNT_AllReduce) {
Expand Down