-
Notifications
You must be signed in to change notification settings - Fork 79
Clip slice range expressions #460
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
30 commits
Select commit
Hold shift + click to select a range
f9def57
Add clipping to slice output extent expressions
jacobhinkle 15587bd
Merge branch 'main' into slice_clip
jacobhinkle 2c4af30
Add where to ExpressionEvaluator, handle negative in slice
jacobhinkle 2b4ef9a
Merge remote-tracking branch 'origin/main' into slice_clip
jacobhinkle 5dcf2c8
Support Set,Where, bool ops in NaiveValueMachine
jacobhinkle 7432adb
Silence clang-tidy in test_resize.cpp
jacobhinkle d96ee2e
Merge remote-tracking branch 'origin/main' into slice_clip
jacobhinkle 9856d93
Add simplifying comparison operators
jacobhinkle 8544650
Clean up normalized slice start/stop expressions
jacobhinkle eee9ff2
Fix wrong ostream in preseg ir dump
jacobhinkle efcb203
Remove debug print
jacobhinkle bf0a4b6
Handle NE in runBinaryOp
jacobhinkle 121a3b9
Fix simplified comparison ops
jacobhinkle 93f3737
Use maybeCastExpr in resize
jacobhinkle ec315a1
Update doc comment on slice op
jacobhinkle 98f3519
Add input range test
jacobhinkle b557e38
Reformat test
jacobhinkle 5b06adc
Merge branch 'main' into slice_clip
jacobhinkle 72493d2
Simplify clipping exprs, clean up op
jacobhinkle 553e63d
Merge remote-tracking branch 'origin/main' into slice_clip
jacobhinkle a9d9e58
Simplify maybe cast exprs
jacobhinkle 90f9616
Remove unneeded change to nodes.cpp
jacobhinkle ca3a674
Restore check for trivial slice
jacobhinkle 2fd5c23
Use SimplifyingIrBuilder
jacobhinkle 2bc4748
Cast extent first
jacobhinkle 2f714a8
Merge branch 'main' into slice_clip
jacobhinkle 2331989
Adding a reshape example (#944)
naoyam 74eb074
Remove manual refs and add FEC test
jacobhinkle d7a4b56
Change check for invalid extents to be >= 0
jacobhinkle 87dae14
Use same set of slice cases for all three tests
jacobhinkle File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -897,8 +897,8 @@ FusionKernelRuntime::FusionKernelRuntime( | |
| fusion.get()); | ||
|
|
||
| if (isDebugDumpEnabled(DebugDumpOption::FusionIrPreseg)) { | ||
| std::cout << "Fusion IR after pre-segmenter optimization passes:" | ||
| << std::endl; | ||
| debug() << "Fusion IR after pre-segmenter optimization passes:" | ||
| << std::endl; | ||
|
Comment on lines
-900
to
+901
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unrelated to this PR. Just found wrong ostream in this debug dump. |
||
| fusion->printMath(); | ||
| } | ||
|
|
||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1124,6 +1124,132 @@ TEST_F(ResizeTest, FusionResizeSlice5) { | |
| testValidate(&fusion, cg_outputs, aten_inputs, {t2, t4}, __LINE__, __FILE__); | ||
| } | ||
|
|
||
| std::vector<std::pair<int64_t, int64_t>> slice_cases( | ||
| {{0, 5}, | ||
| {3, 9}, | ||
| {3, 4}, | ||
| {7, 5}, | ||
| {0, 11}, | ||
| {11, 13}, | ||
| {-3, 8}, | ||
| {-3, -1}, | ||
| {-3, -5}, | ||
| {13, -1}, | ||
| {-11, 9}, | ||
| {-11, 0}, | ||
| {-13, -11}}); | ||
|
|
||
| // Test slice with a variety of constant ranges | ||
| TEST_F(NVFuserTest, FusionResizeSliceConstantShmoo_CUDA) { | ||
| for (auto [start, stop] : slice_cases) { | ||
| Fusion fusion; | ||
| FusionGuard fg(&fusion); | ||
|
|
||
| std::vector<int64_t> shape({9}); | ||
|
|
||
| // concrete shapes to avoid dynamic Fusion | ||
| auto tv0 = makeConcreteTensor(shape); | ||
| fusion.addInput(tv0); | ||
|
|
||
| auto tv1 = slice( | ||
| tv0, {{IrBuilder::create<Val>(start), IrBuilder::create<Val>(stop)}}); | ||
| fusion.addOutput(tv1); | ||
|
|
||
| auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); | ||
|
|
||
| auto t0 = at::randn(shape, options); | ||
| std::vector<c10::IValue> aten_inputs({t0}); | ||
|
|
||
| FusionExecutor fe; | ||
| fe.compileFusion(&fusion, aten_inputs); | ||
| auto cg_outputs = fe.runFusion(aten_inputs); | ||
|
|
||
| testValidate(&fusion, cg_outputs, aten_inputs, __LINE__, __FILE__); | ||
| } | ||
| } | ||
|
|
||
| // Test slice with a variety of non-constant input ranges | ||
| TEST_F(NVFuserTest, FusionResizeSliceInputShmoo_CUDA) { | ||
| Fusion fusion; | ||
| FusionGuard fg(&fusion); | ||
|
|
||
| std::vector<int64_t> shape({9}); | ||
|
|
||
| // concrete shapes to avoid dynamic Fusion | ||
| auto tv0 = makeConcreteTensor(shape); | ||
| auto s0 = IrBuilder::create<Val>(DataType::Index); | ||
| auto s1 = IrBuilder::create<Val>(DataType::Index); | ||
| fusion.addInput(tv0); | ||
| fusion.addInput(s0); | ||
| fusion.addInput(s1); | ||
|
|
||
| auto tv1 = slice(tv0, {{s0, s1}}); | ||
| fusion.addOutput(tv1); | ||
|
|
||
| auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); | ||
|
|
||
| { | ||
| // Concretize so that we set output IterType as Iteration. We should now | ||
| // have expressions that work with any input range. | ||
| ExpressionEvaluator expr_eval; | ||
|
|
||
| expr_eval.bind(tv0->axis(0)->extent(), 9); | ||
| expr_eval.bind(s0, 0); | ||
| expr_eval.bind(s1, 9); | ||
|
|
||
| auto initial_info = DynamicTransform::getInitialInfo(&fusion); | ||
| auto info = DynamicTransformConcretizationInfo(&initial_info, &expr_eval); | ||
|
|
||
| DynamicTransform::concretizeFusion(&fusion, &info); | ||
| NVF_CHECK( | ||
| !fusion.hasDynamicTransform(), "Expected to have no dynamic transform"); | ||
| } | ||
|
|
||
| FusionExecutor fe; | ||
| fe.compileFusion(&fusion); | ||
|
|
||
| auto t0 = at::randn(shape, options); | ||
| for (auto [start, stop] : slice_cases) { | ||
| std::vector<c10::IValue> aten_inputs({t0, start, stop}); | ||
| auto cg_outputs = fe.runFusion(aten_inputs); | ||
|
|
||
| testValidate(&fusion, cg_outputs, aten_inputs, __LINE__, __FILE__); | ||
| } | ||
| } | ||
|
|
||
| // Same as FusionResizeSliceInputShmoo_CUDA but use FusionExecutorCache, which | ||
| // might re-concretize when output sizes change | ||
| TEST_F(NVFuserTest, FusionResizeSliceInputShmooFusionExecutorCache_CUDA) { | ||
| auto fusion_ptr = std::make_unique<Fusion>(); | ||
| auto fusion = fusion_ptr.get(); | ||
| FusionGuard fg(fusion); | ||
|
|
||
| std::vector<int64_t> shape({9}); | ||
|
|
||
| // concrete shapes to avoid dynamic Fusion | ||
| auto tv0 = makeConcreteTensor(shape); | ||
| auto s0 = IrBuilder::create<Val>(DataType::Index); | ||
| auto s1 = IrBuilder::create<Val>(DataType::Index); | ||
| fusion->addInput(tv0); | ||
| fusion->addInput(s0); | ||
| fusion->addInput(s1); | ||
|
|
||
| auto tv1 = slice(tv0, {{s0, s1}}); | ||
| fusion->addOutput(tv1); | ||
|
|
||
| auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); | ||
|
|
||
| FusionExecutorCache fec(std::move(fusion_ptr)); | ||
|
|
||
| auto t0 = at::randn(shape, options); | ||
| for (auto [start, stop] : slice_cases) { | ||
| std::vector<c10::IValue> aten_inputs({t0, start, stop}); | ||
| auto cg_outputs = fec.runFusionWithInputs(aten_inputs); | ||
|
|
||
| testValidate(fec.fusion(), cg_outputs, aten_inputs, __LINE__, __FILE__); | ||
| } | ||
| } | ||
|
|
||
zasdfgbnm marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // Auto scheduled version of Slice1 | ||
| TEST_F(ResizeTest, FusionResizeSliceScheduler1) { | ||
| auto fusion_ptr = std::make_unique<Fusion>(); | ||
|
|
@@ -2319,7 +2445,7 @@ TEST_F(ResizeTest, Slice1DVectorizeManual1) { | |
| FusionGuard fg(fusion_ptr.get()); | ||
|
|
||
| const int64_t slice_offset = 4; | ||
| const std::vector<int64_t> shape({1024 * 1024}); | ||
| const std::vector<int64_t> shape({1024L * 1024L}); | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Silencing clang-tidy |
||
|
|
||
| // Using a concrete tensor to avoid dynamic reshape | ||
| auto tv0 = makeContigConcreteTensor(shape); | ||
|
|
@@ -2358,7 +2484,7 @@ TEST_F(ResizeTest, Slice1DVectorizeManual2) { | |
| FusionGuard fg(fusion_ptr.get()); | ||
|
|
||
| const int64_t slice_offset = 4; | ||
| const std::vector<int64_t> shape({1024 * 1024}); | ||
| const std::vector<int64_t> shape({1024L * 1024L}); | ||
|
|
||
| auto tv0 = makeContigConcreteTensor(shape); | ||
| fusion.addInput(tv0); | ||
|
|
@@ -2414,7 +2540,7 @@ TEST_F(ResizeTest, Slice1DVectorizeManual3) { | |
| FusionGuard fg(fusion_ptr.get()); | ||
|
|
||
| const int64_t slice_offset = 4; | ||
| const std::vector<int64_t> shape({1024 * 1024}); | ||
| const std::vector<int64_t> shape({1024L * 1024L}); | ||
|
|
||
| auto tv0 = makeContigConcreteTensor(shape); | ||
| fusion.addInput(tv0); | ||
|
|
@@ -2463,7 +2589,7 @@ TEST_F(ResizeTest, Slice1DVectorizeManual4) { | |
| auto& fusion = *fusion_ptr; | ||
| FusionGuard fg(fusion_ptr.get()); | ||
|
|
||
| const std::vector<int64_t> shape({1024 * 1024}); | ||
| const std::vector<int64_t> shape({1024L * 1024L}); | ||
|
|
||
| auto tv0 = makeContigConcreteTensor({shape[0] - 4}); | ||
| fusion.addInput(tv0); | ||
|
|
@@ -2505,7 +2631,7 @@ TEST_F(ResizeTest, Slice2DVectorizeManual1) { | |
| // The extent of the innermost domain is just 2, and the outer | ||
| // domain is sliced. This slicing should be vectorizable by a | ||
| // factor of 4 as the two domains can be merged and vectorized. | ||
| const std::vector<int64_t> shape({1024 * 1024, 2}); | ||
| const std::vector<int64_t> shape({1024L * 1024L, 2}); | ||
|
|
||
| auto tv0 = makeContigConcreteTensor(shape); | ||
| fusion.addInput(tv0); | ||
|
|
||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.