Skip to content

Commit d158799

Browse files
committed
Move some logic from MarkAliasPass to AliasAnalysis.
This prepares #1478 for landing. We'll need to use AliasAnalysis before segmentation and during scheduling, so it's good to move common logic into AliasAnalysis.
1 parent 805d917 commit d158799

File tree

4 files changed

+112
-66
lines changed

4 files changed

+112
-66
lines changed

csrc/optimization/alias_analysis.cpp

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ void AliasAnalysisResult::add(
352352
const Val* AliasAnalysisResult::findRoot(const Val* alias) const {
353353
const TensorView* root = dynamic_cast<const TensorView*>(alias);
354354
if (root == nullptr) {
355-
return nullptr;
355+
return alias;
356356
}
357357

358358
// This can be made faster by path compression at the cost of losing
@@ -363,6 +363,31 @@ const Val* AliasAnalysisResult::findRoot(const Val* alias) const {
363363
return root;
364364
}
365365

366+
const TensorView* AliasAnalysisResult::getRoot(
367+
const TensorView* fusion_out) const {
368+
const auto i = out_to_root_.find(fusion_out);
369+
return i == out_to_root_.end() ? nullptr : i->second;
370+
}
371+
372+
void AliasAnalysisResult::finalize(Fusion* fusion) {
373+
for (TensorView* out :
374+
ir_utils::filterByType<TensorView>(fusion->outputs())) {
375+
// Lazy move: we could check compatibility and only give up when
376+
// the allocation domain is incompatible with what we prefer for
377+
// aliasing.
378+
if (out->hasAllocation()) {
379+
continue;
380+
}
381+
382+
const Val* in = findRoot(out);
383+
if (!in->isFusionInput()) {
384+
continue;
385+
}
386+
387+
out_to_root_[out] = in->as<TensorView>();
388+
}
389+
}
390+
366391
Layout AliasAnalysisResult::preferredLayout(const Val* v) const {
367392
const TensorView* tv = dynamic_cast<const TensorView*>(v);
368393
NVF_CHECK(
@@ -378,11 +403,23 @@ Layout AliasAnalysisResult::preferredLayout(const Val* v) const {
378403

379404
std::string AliasAnalysisResult::toString(const int indent_size) const {
380405
std::stringstream ss;
406+
indent(ss, indent_size) << "All aliases:"
407+
<< (alias_to_source_.empty() ? " <empty>" : "")
408+
<< std::endl;
381409
for (const auto& [alias, source_and_layout] : alias_to_source_) {
382410
const auto& [source, layout] = source_and_layout;
383-
indent(ss, indent_size) << ir_utils::varName(alias) << " is an alias of "
384-
<< ir_utils::varName(source) << " if its layout is "
385-
<< layout.toString() << std::endl;
411+
indent(ss, indent_size + 1)
412+
<< ir_utils::varName(alias) << " is an alias of "
413+
<< ir_utils::varName(source) << " if its layout is "
414+
<< layout.toString() << std::endl;
415+
}
416+
indent(ss, indent_size) << "Output aliases only:"
417+
<< (out_to_root_.empty() ? " <empty>" : "")
418+
<< std::endl;
419+
for (const auto& [out, root] : out_to_root_) {
420+
indent(ss, indent_size + 1)
421+
<< ir_utils::varName(out) << " is a transitive alias of "
422+
<< ir_utils::varName(root) << std::endl;
386423
}
387424
return ss.str();
388425
}
@@ -399,6 +436,7 @@ AliasAnalysisResult findAliases(Fusion* fusion) {
399436
// results).
400437
finder.dispatch(expr);
401438
}
439+
analysis.finalize(fusion);
402440
return analysis;
403441
}
404442

csrc/optimization/alias_analysis.h

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,17 @@ struct Layout {
2121
std::string toString(int indent_size = 0) const;
2222
};
2323

24+
// Holds aliases found in a fusion. The expected user flow is
25+
//
26+
// ```
27+
// AliasAnalysisResult analysis;
28+
// analysis.add(...);
29+
// ...
30+
// analysis.add(...);
31+
// analysis.finalize(fusion);
32+
//
33+
// // The user can now call const methods to retrieve information.
34+
// ```
2435
class AliasAnalysisResult {
2536
public:
2637
AliasAnalysisResult() = default;
@@ -29,27 +40,35 @@ class AliasAnalysisResult {
2940
AliasAnalysisResult(AliasAnalysisResult&&) = default;
3041
AliasAnalysisResult& operator=(AliasAnalysisResult&&) = default;
3142

32-
// Returns itself if `alias` doesn't alias anything.
33-
const Val* findRoot(const Val* alias) const;
43+
// Marks `source` as the immediate aliasing source of `alias` and sets the
44+
// preferred layout.
45+
void add(const TensorView* alias, const TensorView* source, Layout&& layout);
46+
47+
void finalize(Fusion* fusion);
3448

3549
// Returns the preferred layout. If `alias` is not in `preferred_layout_`,
3650
// returns the `TensorView`'s initial layout.
3751
Layout preferredLayout(const Val* alias) const;
3852

39-
// Marks `source` as the immediate aliasing source of `alias` and sets the
40-
// preferred layout.
41-
void add(const TensorView* alias, const TensorView* source, Layout&& layout);
42-
4353
std::string toString(int indent_size) const;
4454

55+
const TensorView* getRoot(const TensorView* fusion_out) const;
56+
4557
private:
58+
// Walks up `alias_to_source_` to find the root of the chain. Returns itself
59+
// if `alias` doesn't alias anything.
60+
const Val* findRoot(const Val* alias) const;
61+
4662
// Maps aliases (e.g. the output of a View) to their direct sources (e.g. the
4763
// input of the same View). Also stores the preferred output layout for the
4864
// alias. Consider path compression, a common optimization used in
4965
// disjoint-set data structure, so it's easy to figure out the root of an
5066
// alias.
5167
std::unordered_map<const TensorView*, std::pair<const TensorView*, Layout>>
5268
alias_to_source_;
69+
70+
// Maps fusion outputs to their aliased fusion inputs.
71+
std::unordered_map<const TensorView*, const TensorView*> out_to_root_;
5372
};
5473

5574
// Finds aliases of the fusion inputs. The analysis should be conservative --

csrc/optimization/mark_alias.cpp

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,28 +14,16 @@
1414
namespace nvfuser::optimization {
1515

1616
void MarkAliasPass::runPass(Fusion* fusion) {
17-
const AliasAnalysisResult alias_analysis = findAliases(fusion);
17+
const AliasAnalysisResult analysis = findAliases(fusion);
1818
if (isDebugDumpEnabled(DebugDumpOption::PreSegmenterLogging)) {
1919
debug() << "Alias analysis result:" << std::endl;
20-
debug() << alias_analysis.toString(/*indent_size=*/1) << std::endl;
20+
debug() << analysis.toString(/*indent_size=*/1) << std::endl;
2121
}
2222

2323
for (TensorView* out :
2424
ir_utils::filterByType<TensorView>(fusion->outputs())) {
25-
// Lazy move: we could check compatibility and only give up when
26-
// the allocation domain is incompatible with what we prefer for
27-
// aliasing.
28-
if (out->hasAllocation()) {
29-
if (isDebugDumpEnabled(DebugDumpOption::PreSegmenterLogging)) {
30-
debug() << "MarkAliasPass skipped " << out->toString()
31-
<< " because it already has an allocation domain:" << std::endl
32-
<< out->domain()->toString(1, /*leaf_only=*/false) << std::endl;
33-
}
34-
continue;
35-
}
36-
37-
const Val* in = alias_analysis.findRoot(out);
38-
if (!in->isFusionInput()) {
25+
const Val* in = analysis.getRoot(out);
26+
if (in == nullptr) {
3927
continue;
4028
}
4129

@@ -55,7 +43,7 @@ void MarkAliasPass::runPass(Fusion* fusion) {
5543
continue;
5644
}
5745

58-
const Layout out_layout = alias_analysis.preferredLayout(out);
46+
const Layout out_layout = analysis.preferredLayout(out);
5947
if (isDebugDumpEnabled(DebugDumpOption::PreSegmenterLogging)) {
6048
debug() << "MarkAliasPass changed the layout of " << out->toString()
6149
<< std::endl;

test/test_alias.cpp

Lines changed: 40 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ TEST_F(AliasAnalysisTest, View_SymbolicTensor) {
4646

4747
optimization::AliasAnalysisResult alias_analysis =
4848
optimization::findAliases(&fusion);
49-
EXPECT_EQ(alias_analysis.findRoot(out), in);
49+
EXPECT_EQ(alias_analysis.getRoot(out), in);
5050
}
5151

5252
TEST_F(AliasAnalysisTest, ChainOfViews) {
@@ -65,7 +65,7 @@ TEST_F(AliasAnalysisTest, ChainOfViews) {
6565

6666
optimization::AliasAnalysisResult alias_analysis =
6767
optimization::findAliases(&fusion);
68-
EXPECT_EQ(alias_analysis.findRoot(out), in);
68+
EXPECT_EQ(alias_analysis.getRoot(out), in);
6969
}
7070

7171
TEST_F(AliasAnalysisTest, View_Contiguous) {
@@ -82,7 +82,7 @@ TEST_F(AliasAnalysisTest, View_Contiguous) {
8282

8383
optimization::AliasAnalysisResult alias_analysis =
8484
optimization::findAliases(&fusion);
85-
EXPECT_EQ(alias_analysis.findRoot(out), in);
85+
EXPECT_EQ(alias_analysis.getRoot(out), in);
8686
optimization::Layout preferred_layout = alias_analysis.preferredLayout(out);
8787
EXPECT_THAT(
8888
preferred_layout.allocation_domain,
@@ -108,7 +108,7 @@ TEST_F(AliasAnalysisTest, View_MergeNonContiguous) {
108108

109109
optimization::AliasAnalysisResult alias_analysis =
110110
optimization::findAliases(&fusion);
111-
EXPECT_EQ(alias_analysis.findRoot(out), out);
111+
EXPECT_EQ(alias_analysis.getRoot(out), nullptr);
112112
}
113113

114114
TEST_F(AliasAnalysisTest, Set) {
@@ -124,7 +124,7 @@ TEST_F(AliasAnalysisTest, Set) {
124124

125125
optimization::AliasAnalysisResult alias_analysis =
126126
optimization::findAliases(&fusion);
127-
EXPECT_EQ(alias_analysis.findRoot(out), in);
127+
EXPECT_EQ(alias_analysis.getRoot(out), in);
128128

129129
const std::vector<IterDomain*>& out_rfactor = out->getMaybeRFactorDomain();
130130
EXPECT_THAT(
@@ -145,7 +145,7 @@ TEST_F(AliasAnalysisTest, Permute) {
145145

146146
optimization::AliasAnalysisResult alias_analysis =
147147
optimization::findAliases(&fusion);
148-
EXPECT_EQ(alias_analysis.findRoot(out), in);
148+
EXPECT_EQ(alias_analysis.getRoot(out), in);
149149

150150
const std::vector<IterDomain*>& out_rfactor = out->getMaybeRFactorDomain();
151151
EXPECT_THAT(
@@ -157,47 +157,49 @@ TEST_F(AliasAnalysisTest, View_SplitExpandedBroadcast) {
157157
Fusion fusion;
158158
FusionGuard fg(&fusion);
159159

160-
TensorView* in = makeContigConcreteTensor({4, 5});
160+
TensorView* in = TensorViewBuilder()
161+
.ndims(3)
162+
.dtype(DataType::Float)
163+
.contiguity({true, true, std::nullopt})
164+
.shape({4, 5, 6})
165+
.expanded({false, false, true})
166+
.build();
161167
fusion.addInput(in);
162-
TensorView* broadcast_out = broadcast(in, {false, false, true});
163-
TensorView* expand_out = expand(
164-
broadcast_out,
165-
{IrBuilder::create<Val>(4),
166-
IrBuilder::create<Val>(5),
167-
IrBuilder::create<Val>(6)});
168168
// tryStaticReshape used to fail to get the expanded extent, which is 6.
169-
TensorView* out = reshape(
170-
expand_out, {IrBuilder::create<Val>(40), IrBuilder::create<Val>(3)});
169+
// Therefore, we use the `vector<Val*>` version of `reshape` as a regression
170+
// test.
171+
TensorView* out =
172+
reshape(in, {IrBuilder::create<Val>(40), IrBuilder::create<Val>(3)});
171173
fusion.addOutput(out);
172174

173175
optimization::AliasAnalysisResult alias_analysis =
174176
optimization::findAliases(&fusion);
175-
EXPECT_EQ(alias_analysis.findRoot(out), out);
177+
EXPECT_EQ(alias_analysis.getRoot(out), nullptr);
176178
}
177179

178180
TEST_F(AliasAnalysisTest, View_ForwardExpandedBroadcast) {
179181
Fusion fusion;
180182
FusionGuard fg(&fusion);
181183

182-
TensorView* in = makeContigConcreteTensor({4, 5});
184+
TensorView* in = TensorViewBuilder()
185+
.ndims(3)
186+
.dtype(DataType::Float)
187+
.contiguity({true, true, std::nullopt})
188+
.shape({4, 5, 6})
189+
.expanded({false, false, true})
190+
.build();
183191
fusion.addInput(in);
184-
TensorView* broadcast_out = broadcast(in, {false, false, true});
185-
TensorView* expand_out = expand(
186-
broadcast_out,
187-
{IrBuilder::create<Val>(4),
188-
IrBuilder::create<Val>(5),
189-
IrBuilder::create<Val>(6)});
190-
TensorView* out = reshape(expand_out, {4, 5, 6}, {20, -1});
192+
TensorView* out = reshape(in, {4, 5, 6}, {20, -1});
191193
fusion.addOutput(out);
192194

193195
optimization::AliasAnalysisResult alias_analysis =
194196
optimization::findAliases(&fusion);
195-
EXPECT_EQ(alias_analysis.findRoot(out), expand_out);
197+
EXPECT_EQ(alias_analysis.getRoot(out), in);
196198

197199
// Verify the last dimension isn't expanded physically.
198200
FusionExecutor fe;
199201
at::Tensor in_tensor =
200-
at::randn({4, 5}, at::dtype(at::kFloat).device(at::kCUDA, 0));
202+
at::randn({4, 5}).cuda().as_strided({4, 5, 6}, {5, 1, 0});
201203
fe.compileFusion(&fusion, {in_tensor});
202204
at::Tensor out_tensor = fe.runFusion({in_tensor})[0];
203205

@@ -208,20 +210,20 @@ TEST_F(AliasAnalysisTest, View_MergeExpandedBroadcast) {
208210
Fusion fusion;
209211
FusionGuard fg(&fusion);
210212

211-
TensorView* in = makeContigConcreteTensor({4, 5});
213+
TensorView* in = TensorViewBuilder()
214+
.ndims(3)
215+
.dtype(DataType::Float)
216+
.contiguity({true, true, std::nullopt})
217+
.shape({4, 5, 6})
218+
.expanded({false, false, true})
219+
.build();
212220
fusion.addInput(in);
213-
TensorView* broadcast_out = broadcast(in, {false, false, true});
214-
TensorView* expand_out = expand(
215-
broadcast_out,
216-
{IrBuilder::create<Val>(4),
217-
IrBuilder::create<Val>(5),
218-
IrBuilder::create<Val>(6)});
219-
TensorView* out = reshape(expand_out, {4, 5, 6}, {4, -1});
221+
TensorView* out = reshape(in, {4, 5, 6}, {4, -1});
220222
fusion.addOutput(out);
221223

222224
optimization::AliasAnalysisResult alias_analysis =
223225
optimization::findAliases(&fusion);
224-
EXPECT_EQ(alias_analysis.findRoot(out), out);
226+
EXPECT_EQ(alias_analysis.getRoot(out), nullptr);
225227
}
226228

227229
TEST_F(AliasAnalysisTest, TrivialSlice) {
@@ -236,7 +238,7 @@ TEST_F(AliasAnalysisTest, TrivialSlice) {
236238

237239
optimization::AliasAnalysisResult alias_analysis =
238240
optimization::findAliases(&fusion);
239-
EXPECT_EQ(alias_analysis.findRoot(out), in);
241+
EXPECT_EQ(alias_analysis.getRoot(out), in);
240242
}
241243

242244
TEST_F(AliasAnalysisTest, MergeTriviallySlicedDimensions) {
@@ -251,7 +253,7 @@ TEST_F(AliasAnalysisTest, MergeTriviallySlicedDimensions) {
251253

252254
optimization::AliasAnalysisResult alias_analysis =
253255
optimization::findAliases(&fusion);
254-
EXPECT_EQ(alias_analysis.findRoot(out), in);
256+
EXPECT_EQ(alias_analysis.getRoot(out), in);
255257
}
256258

257259
TEST_F(AliasAnalysisTest, MergeSlicedDimensions) {
@@ -266,8 +268,7 @@ TEST_F(AliasAnalysisTest, MergeSlicedDimensions) {
266268

267269
optimization::AliasAnalysisResult alias_analysis =
268270
optimization::findAliases(&fusion);
269-
EXPECT_EQ(alias_analysis.findRoot(out), out);
270-
EXPECT_EQ(alias_analysis.findRoot(slice_out), in);
271+
EXPECT_EQ(alias_analysis.getRoot(out), nullptr);
271272
}
272273

273274
using AliasTest = NVFuserTest;

0 commit comments

Comments
 (0)