From 09786f87d57d39ade78c9186729a707307e2061d Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Thu, 13 Jul 2023 09:54:01 -0400 Subject: [PATCH 1/5] Add test to repro #585 --- test/test_gpu3.cpp | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/test/test_gpu3.cpp b/test/test_gpu3.cpp index cd9ee4f7392..54fb4da2af4 100644 --- a/test/test_gpu3.cpp +++ b/test/test_gpu3.cpp @@ -9215,6 +9215,42 @@ TEST_F(NVFuserTest, FusionDebugStreamGuard_CUDA) { ASSERT_EQ(ss.str(), text); } +// Repro of https://github.com/NVIDIA/Fuser/issues/585 +TEST_F(NVFuserTest, FusionDanglingUnaryOp_CUDA) { + auto fusion = std::make_unique(); + FusionGuard fg(fusion.get()); + + // Create a segmented Fusion. We call segment_set here to ensure the whole + // Fusion cannot be scheduled. This triggers segmentation, so that + // forwardInputs() is called. The structure of this Fusion is not important; + // it is only important that it must be segmented. + auto size = IrBuilder::create(5); + auto tv0 = full({size}, fusion->zeroVal(), DataType::Int); + auto tv1 = segment_set(tv0); + fusion->addOutput(tv1); + + // Now take in an input that has a chain of UnaryOp uses that terminates in a + // Val with no uses. This triggers a segfault in forwardInputs(). + Val* alpha = IrBuilder::create(DataType::Int); + fusion->addInput(alpha); + neg(castOp(DataType::Float, alpha)); + + FusionExecutorCache executor_cache(std::move(fusion)); + + auto cg_outputs = executor_cache.runFusionWithInputs({11}); + + auto options = at::TensorOptions().dtype(at::kInt).device(at::kCUDA, 0); + auto aten_out = at::zeros({5}, options); + + testValidate( + executor_cache.fusion(), + cg_outputs, + {11}, + {aten_out}, + __LINE__, + __FILE__); +} + // Test file size should be up to 10K LoC. Create a new file for more tests. } // namespace nvfuser From 14a48c567100fcfd983cc88706494136e5c4c87e Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Thu, 13 Jul 2023 09:54:33 -0400 Subject: [PATCH 2/5] Continue when output_uses is empty --- csrc/fusion_segmenter.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/csrc/fusion_segmenter.cpp b/csrc/fusion_segmenter.cpp index 5a02962fb03..750c804da20 100644 --- a/csrc/fusion_segmenter.cpp +++ b/csrc/fusion_segmenter.cpp @@ -3399,12 +3399,22 @@ void SegmentCandidateFinder::forwardInputs() { continue; } - if (expr->output(0)->uses().size() > 1) { + // expr is a unary op so there is a single output. Here we look at that + // output's further uses + auto output_uses = expr->output(0)->uses(); + if (output_uses.empty()) { + // Unused outputs terminate here + continue; + } + + if (output_uses.size() > 1) { excluded_inp_unary_exprs_.pushBack(expr); forwarded_inputs.pushBack(expr->output(0)); continue; } + // If there is a single use, visit it to try and extend the chain of + // unaryOps to_visit.emplace_back(expr->output(0)->uses()[0]); } } From 02ca254177f026aa96f1b8195701c339a93908ad Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Thu, 13 Jul 2023 10:04:01 -0400 Subject: [PATCH 3/5] Change approach and treat unused outputs same as multi-use --- csrc/fusion_segmenter.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/csrc/fusion_segmenter.cpp b/csrc/fusion_segmenter.cpp index 750c804da20..874e19142a9 100644 --- a/csrc/fusion_segmenter.cpp +++ b/csrc/fusion_segmenter.cpp @@ -3402,20 +3402,18 @@ void SegmentCandidateFinder::forwardInputs() { // expr is a unary op so there is a single output. Here we look at that // output's further uses auto output_uses = expr->output(0)->uses(); - if (output_uses.empty()) { - // Unused outputs terminate here - continue; - } - if (output_uses.size() > 1) { + if (output_uses.size() == 1) { + // If there is a single use, visit it to try and extend the chain of + // unaryOps + to_visit.emplace_back(output_uses[0]); + } else { + // If there are either no more uses, or more than one use, we cannot + // extend the chain of unary Ops. In either case, finalize this chain by + // saving the expr and its output. excluded_inp_unary_exprs_.pushBack(expr); forwarded_inputs.pushBack(expr->output(0)); - continue; } - - // If there is a single use, visit it to try and extend the chain of - // unaryOps - to_visit.emplace_back(expr->output(0)->uses()[0]); } } From cfb80a7c6f0037565ab85115b28a5350c05a08be Mon Sep 17 00:00:00 2001 From: Jacob Hinkle <1454944+jacobhinkle@users.noreply.github.com> Date: Fri, 14 Jul 2023 07:14:36 -0400 Subject: [PATCH 4/5] Update csrc/fusion_segmenter.cpp Co-authored-by: Naoya Maruyama --- csrc/fusion_segmenter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/fusion_segmenter.cpp b/csrc/fusion_segmenter.cpp index 874e19142a9..ca2f370351a 100644 --- a/csrc/fusion_segmenter.cpp +++ b/csrc/fusion_segmenter.cpp @@ -3401,7 +3401,7 @@ void SegmentCandidateFinder::forwardInputs() { // expr is a unary op so there is a single output. Here we look at that // output's further uses - auto output_uses = expr->output(0)->uses(); + const auto& output_uses = expr->output(0)->uses(); if (output_uses.size() == 1) { // If there is a single use, visit it to try and extend the chain of From 07c9c0f82a1e4b7bd1192770c1f5ae26d7257543 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle <1454944+jacobhinkle@users.noreply.github.com> Date: Fri, 14 Jul 2023 07:14:45 -0400 Subject: [PATCH 5/5] Update csrc/fusion_segmenter.cpp Co-authored-by: Naoya Maruyama --- csrc/fusion_segmenter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/fusion_segmenter.cpp b/csrc/fusion_segmenter.cpp index ca2f370351a..3b4458ea8f9 100644 --- a/csrc/fusion_segmenter.cpp +++ b/csrc/fusion_segmenter.cpp @@ -3406,7 +3406,7 @@ void SegmentCandidateFinder::forwardInputs() { if (output_uses.size() == 1) { // If there is a single use, visit it to try and extend the chain of // unaryOps - to_visit.emplace_back(output_uses[0]); + to_visit.emplace_back(output_uses.at(0)); } else { // If there are either no more uses, or more than one use, we cannot // extend the chain of unary Ops. In either case, finalize this chain by