Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
9a0dc9e
host ir alias and prealloc output support
samnordmann Mar 26, 2025
9820d5a
harden and simplify allocation in for loop test
samnordmann Mar 26, 2025
8c49c95
Merge branch 'main' of github.com:NVIDIA/Fuser into host_irs/alias_su…
samnordmann Apr 11, 2025
2ad510d
refactor and clean host ir lowering and segmentation
samnordmann Mar 26, 2025
46c6717
lint
samnordmann Mar 26, 2025
73d5d7b
put back isResharding as the condition for lower to a standalone host…
samnordmann Mar 26, 2025
e35ddd0
minor comments
samnordmann Apr 11, 2025
4964680
lint
samnordmann Apr 11, 2025
ed8dc7c
add host ir support for set reduce and binary op
samnordmann Mar 26, 2025
85b7b75
move .contiguous to be in postScatter
samnordmann Apr 11, 2025
01e94a7
lint and build issue
samnordmann Apr 11, 2025
e1db518
reviews
samnordmann Apr 14, 2025
7e6cef6
Merge branch 'main' of github.com:NVIDIA/Fuser into host_irs/alias_su…
samnordmann Apr 14, 2025
59622ff
add comment
samnordmann Apr 15, 2025
25c618c
add comment
samnordmann Apr 15, 2025
eb46aef
minor comment
samnordmann Apr 16, 2025
5f161f5
lint
samnordmann Apr 16, 2025
b936420
Merge branch 'host_irs/refactor_lowering_and_segmentation' into host_…
samnordmann Apr 16, 2025
97b1743
Merge branch 'main' of github.com:NVIDIA/Fuser into host_irs/alias_su…
samnordmann Apr 16, 2025
684118f
Merge branch 'host_irs/alias_support' into host_irs/refactor_lowering…
samnordmann Apr 16, 2025
e29abe4
Merge branch 'host_irs/refactor_lowering_and_segmentation' into host_…
samnordmann Apr 16, 2025
d265584
Revert "move .contiguous to be in postScatter"
samnordmann Apr 16, 2025
a177cb4
add ParallelType::Stream lowering pass in host Ir for single device f…
samnordmann Mar 26, 2025
e886941
improve comments
samnordmann Mar 26, 2025
b6c54f2
fix rebase
samnordmann Apr 16, 2025
dffcd51
Merge branch 'main' of github.com:NVIDIA/Fuser into host_irs/LoadStor…
samnordmann Apr 16, 2025
55f510f
Merge branch 'host_irs/LoadStore_Reduction_binaryOp_support' into hos…
samnordmann Apr 16, 2025
32a8d55
temporarily disable stream pass also in the python test
samnordmann Apr 16, 2025
afbd020
lint
samnordmann Apr 16, 2025
165bd1b
move stream_parallel_type to host_ir/pass folder
samnordmann Apr 16, 2025
b55d4e7
minor comment
samnordmann Apr 18, 2025
1181eac
minor improvements and cleanup
samnordmann Apr 18, 2025
cad9bce
further refactor of stream pass
samnordmann Apr 18, 2025
7ae7c52
improve comments clarity
samnordmann Apr 18, 2025
6dd673f
more comments
samnordmann Apr 18, 2025
bc8c2cb
Merge branch 'host_irs/LoadStore_Reduction_binaryOp_support' into hos…
samnordmann Apr 18, 2025
db90ef0
add HirAliasSelect
samnordmann Apr 23, 2025
e32653a
replace SelectOp by HirAliasSelect in stream lowering
samnordmann Apr 23, 2025
a50b53c
add cache for tensor slicing
samnordmann Apr 23, 2025
d01c5a2
separate out tensor allocation logic
samnordmann Apr 23, 2025
25b7695
minor cleanup
samnordmann Apr 23, 2025
6b479de
lower as HIR only set without permute
samnordmann Apr 24, 2025
b2a76e9
add comment
samnordmann Apr 24, 2025
20204fc
Merge branch 'host_irs/LoadStore_Reduction_binaryOp_support' into hos…
samnordmann Apr 24, 2025
7f7caf5
change namespace of the optimization pass to hir
samnordmann Apr 27, 2025
b6213f3
Merge branch 'main' of github.com:NVIDIA/Fuser into host_irs/stream_l…
samnordmann Apr 27, 2025
7777fe0
lint
samnordmann Apr 27, 2025
e517bc3
fix merge
samnordmann Apr 27, 2025
35ff4da
empty commit to trigger the CI
samnordmann Apr 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ list(APPEND NVFUSER_SRCS
${NVFUSER_SRCS_DIR}/preseg_passes/remove_empty.cpp
${NVFUSER_SRCS_DIR}/preseg_passes/reorder_sharded_axis.cpp
${NVFUSER_SRCS_DIR}/preseg_passes/segment_inplace_update.cpp
${NVFUSER_SRCS_DIR}/host_ir/pass/stream_parallel_type.cpp
${NVFUSER_SRCS_DIR}/preseg_passes/translate_no_reduction_matmul_to_mul_squeeze.cpp
${NVFUSER_SRCS_DIR}/preseg_passes/translate_repeat_to_expand.cpp
${NVFUSER_SRCS_DIR}/rng.cpp
Expand Down Expand Up @@ -739,6 +740,7 @@ if(BUILD_TEST)
list(APPEND HOSTIR_TEST_SRCS
${NVFUSER_ROOT}/tests/cpp/test_host_irs.cpp
${NVFUSER_ROOT}/tests/cpp/test_host_ir_integration.cpp
${NVFUSER_ROOT}/tests/cpp/test_host_ir_stream_lowering.cpp
)
add_test(test_host_ir "${HOSTIR_TEST_SRCS}" "")
list(APPEND TEST_BINARIES test_host_ir)
Expand Down
10 changes: 6 additions & 4 deletions csrc/host_ir/container.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@ Stream* HostIrContainer::getDefaultStream() {
std::ostream& HostIrContainer::print(std::ostream& os) const {
IrMathPrinter op_exprs(os);
op_exprs.handle(this);
os << "Aliases:{";
for (const auto& alias : alias_) {
os << "\n " << alias.first << " -> " << alias.second;
if (alias_.size() > 0) {
os << "Aliases:{";
for (const auto& alias : alias_) {
os << "\n " << alias.first << " -> " << alias.second;
}
os << "\n}\n";
}
os << "\n}\n";
return os;
}

Expand Down
4 changes: 4 additions & 0 deletions csrc/host_ir/executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ class HostIrEvaluator final : public OptOutDispatch {
return container_->outputs();
}

auto* container() const {
return container_.get();
}

std::ostream& print(std::ostream& os) const {
return container_->print(os);
};
Expand Down
10 changes: 10 additions & 0 deletions csrc/host_ir/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// clang-format on
#include <device_lower/utils.h>
#include <host_ir/lower.h>
#include <host_ir/pass/stream_parallel_type.h>
#include <ir/all_nodes.h>
#include <ir/builder.h>
#include <ir/interface_nodes.h>
Expand Down Expand Up @@ -735,6 +736,13 @@ std::unique_ptr<hir::HostIrContainer> HostIrLower::lower(
hic->addOutput(ir_cloner.clone(output));
}

for (auto tv : hic->allTvs()) {
// set all host tensors to global memory type. This must be the case by
// definition of a host tensor, and setting the memory type to global is
// also required to avoid Allocate HIR nodes to throw
tv->setMemoryType(MemoryType::Global);
}

std::vector<Expr*> new_top_level_exprs;
for (auto top_level_expr : hic->topLevelExprs()) {
if (!isResharding(top_level_expr)) {
Expand All @@ -761,6 +769,8 @@ std::unique_ptr<hir::HostIrContainer> HostIrLower::lower(
}
hic->resetTopLevelExprs(new_top_level_exprs);

preseg_passes::OptimizationPass<hir::StreamParallelType>::runPass(hic.get());

return hic;
}

Expand Down
Loading