From 4fdf1d157e30bfd36ae60d7be03cabba218cf3de Mon Sep 17 00:00:00 2001 From: Eric Lunderberg Date: Tue, 30 May 2023 09:14:35 -0500 Subject: [PATCH 1/6] [Bugfix][TIR][VTA] Update host-side target, even without device func This resolves an issue introduced by the combination of https://github.com/apache/tvm/pull/14918 and https://github.com/apache/tvm/pull/14945. The bug occurred for targets that do not require device-side codegen, but do require a `device_type` other than `kDLCPU`. It wasn't caught by CI, as the issue only occurred with the combination of both PRs. 1. #14918 updated `SplitHostDevice` to only modify the `"target"` attribute when a device-side function has been extracted. 2. For VTA, there is no device-side function, as everything is done through host-side API calls. 3. From (1) and (2), the VTA examples kept the target `T.target("ext_dev", host="llvm")` after the `SplitHostDevice` pass, instead of being updated to `T.target("llvm")`. 4. #14945 restricted CombineContextCall to only apply to host-side passes. 5. From (4) and (5), the `CombineContextCall` pass was no longer applied to the VTA context calls. This PR fixes `SplitHostDevice`, updating the target from `T.target("ext_dev", host="llvm")` to `T.target("llvm")`, even if no device sections have been extracted from the function. --- src/tir/transforms/split_host_device.cc | 10 +++++----- .../test_tir_transform_split_host_device.py | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/tir/transforms/split_host_device.cc b/src/tir/transforms/split_host_device.cc index 9270b356ba22..2de831e8ad0c 100644 --- a/src/tir/transforms/split_host_device.cc +++ b/src/tir/transforms/split_host_device.cc @@ -108,12 +108,12 @@ PrimFunc SplitHostDevice(PrimFunc func, IRModule* device_mod, const GlobalVar& g HostDeviceSplitter splitter(device_mod, name_prefix); - auto body = splitter(func->body); - - if (!body.same_as(func->body)) { + if (auto body = splitter(func->body); !body.same_as(func->body)) { func.CopyOnWrite()->body = body; - auto target_host = target->GetHost().value_or(Target("llvm")); - func = WithAttr(std::move(func), tvm::attr::kTarget, target_host); + } + + if (auto target_host = target->GetHost()) { + func = WithAttr(std::move(func), tvm::attr::kTarget, target_host.value()); } return func; diff --git a/tests/python/unittest/test_tir_transform_split_host_device.py b/tests/python/unittest/test_tir_transform_split_host_device.py index cf866ae005c8..1599b9a031a0 100644 --- a/tests/python/unittest/test_tir_transform_split_host_device.py +++ b/tests/python/unittest/test_tir_transform_split_host_device.py @@ -168,5 +168,21 @@ def main_kernel(n: T.int32): return mod +class TestSplitHostDevice(BaseCompare): + """Like TestSplitHostDevice, but no device regions to extract + + Even if there are no device regions, the host-side function should + still have its "target" attribute updated. + """ + + def before(): + T.func_attr({"target": T.target("ext_dev", host="llvm")}) + T.evaluate(0) + + def expected(): + T.func_attr({"target": T.target("llvm")}) + T.evaluate(0) + + if __name__ == "__main__": tvm.testing.main() From b44aa65bda9ccbc34896f1d0a238bd2997b3c304 Mon Sep 17 00:00:00 2001 From: Eric Lunderberg Date: Fri, 24 Mar 2023 14:58:52 -0500 Subject: [PATCH 2/6] [TIR] Restrict tir.transform.LowerTVMBuiltin to host functions Previously, the `tir.transform.LowerTVMBuiltin` pass applied to all functions in an `IRModule`, but was only applied to modules that contain only host functions. This commit updates `tir.transform.LowerTVMBuiltin` to apply only to host functions. --- src/tir/transforms/lower_tvm_builtin.cc | 8 +- .../test_tir_transform_lower_tvm_builtin.py | 97 ++++++++++++++++++- 2 files changed, 99 insertions(+), 6 deletions(-) diff --git a/src/tir/transforms/lower_tvm_builtin.cc b/src/tir/transforms/lower_tvm_builtin.cc index ea418635bc2a..837a3e6d3587 100644 --- a/src/tir/transforms/lower_tvm_builtin.cc +++ b/src/tir/transforms/lower_tvm_builtin.cc @@ -629,9 +629,11 @@ namespace transform { Pass LowerTVMBuiltin() { auto pass_func = [](PrimFunc f, IRModule m, PassContext ctx) { - auto* n = f.CopyOnWrite(); - n->body = BuiltinLower().Build(n->body); - VLOG(2) << "LowerTVMBuiltin: " << f; + if (IsHostFunc(f).value_or(false)) { + auto global_symbol = f->GetAttr(tvm::attr::kGlobalSymbol); + f.CopyOnWrite()->body = BuiltinLower().Build(f->body); + VLOG(2) << "LowerTVMBuiltin: " << f; + } return f; }; return CreatePrimFuncPass(pass_func, 0, "tir.LowerTVMBuiltin", {}); diff --git a/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py b/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py index d224a688d298..2e0784cc3126 100644 --- a/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py +++ b/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py @@ -56,7 +56,7 @@ def check_packed_func(target="llvm"): # Construct a valid IRModule to be lowered: mod = tvm.IRModule.from_expr(tvm.tir.PrimFunc([a_buffer, b_buffer, c_buffer], stmt)) - target = tvm.target.Target(target) + target = tvm.target.Target(target, host="llvm") mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", target))(mod) mod = tvm.tir.transform.Apply(lambda f: f.with_attr("global_symbol", "main"))(mod) mod = tvm.tir.transform.MakePackedAPI()(mod) @@ -189,6 +189,97 @@ def variance4(rxplaceholder: T.Buffer((T.int64(1), T.int64(32), T.int64(25690112 tvm.build(func, target="llvm") # should not crash +class TestLowerDeviceAllocate(tvm.testing.CompareBeforeAfter): + """Device allocations are lowered to TVMBackend* calls + + This test validates the current behavior of LowerTVMBuiltin. This + unit test may be improved in the future by addressing: + + - The AttrStmt for "storage_alignment" occurs outside the LetStmt + that defines the pointer, which is currently required by + CodeGenLLVM. This fails to match when `map_free_vars=False` + (default), because the first occurrence is undefined. + + - The call to TVMBackendFreeWorkspace uses the allocated pointer, + but occurs outside the LetStmt. + + - TVMScript always produces "handle" dtype for + `T.tvm_throw_last_error`, while LowerTVMBuiltin outputs "int32" + dtype. + """ + + transform = tvm.tir.transform.LowerTVMBuiltin() + + def before(): + T.func_attr({"target": T.target("llvm")}) + T.attr("dummy", "device_type", 2) # kDLCuda + T.attr("dummy", "device_id", 0) + ptr = T.allocate([16], "float32") + buf = T.decl_buffer(16, "float32", data=ptr) + buf[0] = 0.0 + + def expected(): + T.func_attr({"target": T.target("llvm")}) + ptr = T.handle("float32", "global") + T.attr(ptr, "storage_alignment", 64) + with T.LetStmt(T.TVMBackendAllocWorkspace(2, 0, T.uint64(64), 2, 32), var=ptr): + if T.isnullptr(ptr): + T.Call("int32", "tir.tvm_throw_last_error", []) + buf = T.decl_buffer((16,), data=ptr) + buf[0] = T.float32(0) + if T.TVMBackendFreeWorkspace(2, 0, ptr) != 0: + T.Call("int32", "tir.tvm_throw_last_error", []) + + def test_compare(self, before, expected, transform): + after = transform(before) + tvm.ir.assert_structural_equal(after, expected, map_free_vars=True) + + +class TestLowerCPUAllocation(tvm.testing.CompareBeforeAfter): + """CPU allocations can be handled at codegen time""" + + transform = tvm.tir.transform.LowerTVMBuiltin() + + def before(): + T.func_attr({"target": T.target("llvm")}) + T.attr("dummy", "device_type", 1) # kDLCPU + T.attr("dummy", "device_id", 0) + ptr = T.allocate([16], "float32") + buf = T.decl_buffer(16, "float32", data=ptr) + buf[0] = 0.0 + + def expected(): + T.func_attr({"target": T.target("llvm")}) + ptr = T.allocate([16], "float32") + buf = T.decl_buffer(16, "float32", data=ptr) + buf[0] = 0.0 + + +class TestLowerAllocateRequiresDeviceID(tvm.testing.CompareBeforeAfter): + transform = tvm.tir.transform.LowerTVMBuiltin() + + def before(): + T.func_attr({"target": T.target("llvm")}) + T.attr("dummy", "device_id", 0) + ptr = T.allocate([16], "float32") + buf = T.decl_buffer(16, "float32", data=ptr) + buf[0] = 0.0 + + expected = tvm.TVMError + + +class TestLowerAllocateRequiresDeviceType(tvm.testing.CompareBeforeAfter): + transform = tvm.tir.transform.LowerTVMBuiltin() + + def before(): + T.func_attr({"target": T.target("llvm")}) + T.attr("dummy", "device_id", 0) + ptr = T.allocate([16], "float32") + buf = T.decl_buffer(16, "float32", data=ptr) + buf[0] = 0.0 + + expected = tvm.TVMError + + if __name__ == "__main__": - test_call_packed_return_non_i32() - test_lower_packed_func() + tvm.testing.main() From e249d351b5f5e81763c1b6ac8a67f32997be35dc Mon Sep 17 00:00:00 2001 From: Eric Lunderberg Date: Thu, 25 May 2023 07:47:24 -0500 Subject: [PATCH 3/6] Updated "stackvm" target to have "cpu" key. With the presence/absence of the "cpu" key in a target used to determine whether host-only calls should be run, should make sure to add it to "stackvm". --- src/target/target_kind.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc index 3c4e885ef9b5..7ca8ddc20cdb 100644 --- a/src/target/target_kind.cc +++ b/src/target/target_kind.cc @@ -424,7 +424,8 @@ TVM_REGISTER_TARGET_KIND("hexagon", kDLHexagon) .add_attr_option("vtcm-capacity") .set_default_keys({"hexagon"}); -TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU); +TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU) // line break + .set_default_keys({"cpu"}); TVM_REGISTER_TARGET_KIND("ext_dev", kDLExtDev); From 3310a956d1b359fb8f3488d738f078d143173091 Mon Sep 17 00:00:00 2001 From: Eric Lunderberg Date: Fri, 26 May 2023 06:33:51 -0500 Subject: [PATCH 4/6] Update IsHostFunc() to use "host" tag instead of "cpu" Current CI failures due to LowerTVMBuiltin not running on "hexagon" target, and would like to avoid conflating cpu/host. --- src/target/target_kind.cc | 8 ++++---- src/tir/transforms/ir_utils.cc | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc index 7ca8ddc20cdb..c88eb356142f 100644 --- a/src/target/target_kind.cc +++ b/src/target/target_kind.cc @@ -275,7 +275,7 @@ TVM_REGISTER_TARGET_KIND("llvm", kDLCPU) .add_attr_option("opt-level") // LLVM command line flags, see below .add_attr_option>("cl-opt") - .set_default_keys({"cpu"}) + .set_default_keys({"cpu", "host"}) // Force the external codegen kind attribute to be registered, even if no external // codegen targets are enabled by the TVM build. .set_attr(tvm::attr::kIsExternalCodegen, Bool(false)) @@ -308,7 +308,7 @@ TVM_REGISTER_TARGET_KIND("c", kDLCPU) .add_attr_option("march") .add_attr_option("workspace-byte-alignment") .add_attr_option("constants-byte-alignment") - .set_default_keys({"cpu"}) + .set_default_keys({"cpu", "host"}) .set_target_parser(tvm::target::parsers::cpu::ParseTarget); TVM_REGISTER_TARGET_KIND("cuda", kDLCUDA) @@ -422,10 +422,10 @@ TVM_REGISTER_TARGET_KIND("hexagon", kDLHexagon) .add_attr_option>("llvm-options") .add_attr_option("num-cores") .add_attr_option("vtcm-capacity") - .set_default_keys({"hexagon"}); + .set_default_keys({"hexagon", "host"}); TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU) // line break - .set_default_keys({"cpu"}); + .set_default_keys({"host"}); TVM_REGISTER_TARGET_KIND("ext_dev", kDLExtDev); diff --git a/src/tir/transforms/ir_utils.cc b/src/tir/transforms/ir_utils.cc index 604dbed325ec..ef340623312a 100644 --- a/src/tir/transforms/ir_utils.cc +++ b/src/tir/transforms/ir_utils.cc @@ -696,7 +696,7 @@ std::optional IsHostFunc(const PrimFunc& func) { if (func->HasNonzeroAttr(tvm::tir::attr::kIsHostFunc)) { return true; } else if (auto target = func->GetAttr(tvm::attr::kTarget)) { - return target.value()->HasKey("cpu"); + return target.value()->HasKey("host"); } else { return std::nullopt; } From c9611338de31525bb4206676b6b4687df7c28bee Mon Sep 17 00:00:00 2001 From: Eric Lunderberg Date: Tue, 30 May 2023 10:57:28 -0500 Subject: [PATCH 5/6] Avoid "host" tag for now --- src/target/target_kind.cc | 8 ++++---- src/tir/transforms/ir_utils.cc | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc index c88eb356142f..44dee859d017 100644 --- a/src/target/target_kind.cc +++ b/src/target/target_kind.cc @@ -275,7 +275,7 @@ TVM_REGISTER_TARGET_KIND("llvm", kDLCPU) .add_attr_option("opt-level") // LLVM command line flags, see below .add_attr_option>("cl-opt") - .set_default_keys({"cpu", "host"}) + .set_default_keys({"cpu"}) // Force the external codegen kind attribute to be registered, even if no external // codegen targets are enabled by the TVM build. .set_attr(tvm::attr::kIsExternalCodegen, Bool(false)) @@ -308,7 +308,7 @@ TVM_REGISTER_TARGET_KIND("c", kDLCPU) .add_attr_option("march") .add_attr_option("workspace-byte-alignment") .add_attr_option("constants-byte-alignment") - .set_default_keys({"cpu", "host"}) + .set_default_keys({"cpu"}) .set_target_parser(tvm::target::parsers::cpu::ParseTarget); TVM_REGISTER_TARGET_KIND("cuda", kDLCUDA) @@ -422,10 +422,10 @@ TVM_REGISTER_TARGET_KIND("hexagon", kDLHexagon) .add_attr_option>("llvm-options") .add_attr_option("num-cores") .add_attr_option("vtcm-capacity") - .set_default_keys({"hexagon", "host"}); + .set_default_keys({"hexagon", "cpu"}); TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU) // line break - .set_default_keys({"host"}); + .set_default_keys({"cpu"}); TVM_REGISTER_TARGET_KIND("ext_dev", kDLExtDev); diff --git a/src/tir/transforms/ir_utils.cc b/src/tir/transforms/ir_utils.cc index ef340623312a..604dbed325ec 100644 --- a/src/tir/transforms/ir_utils.cc +++ b/src/tir/transforms/ir_utils.cc @@ -696,7 +696,7 @@ std::optional IsHostFunc(const PrimFunc& func) { if (func->HasNonzeroAttr(tvm::tir::attr::kIsHostFunc)) { return true; } else if (auto target = func->GetAttr(tvm::attr::kTarget)) { - return target.value()->HasKey("host"); + return target.value()->HasKey("cpu"); } else { return std::nullopt; } From f001d9d933e55ab5cbe885d6d6ac859f401bcd67 Mon Sep 17 00:00:00 2001 From: Eric Lunderberg Date: Fri, 2 Jun 2023 11:39:26 -0500 Subject: [PATCH 6/6] Update HEXAGON_AOT_LLVM_TARGET to be recognized as host --- python/tvm/contrib/hexagon/pytest_plugin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/contrib/hexagon/pytest_plugin.py b/python/tvm/contrib/hexagon/pytest_plugin.py index 585a6cc3c5bb..bfbf398ec554 100644 --- a/python/tvm/contrib/hexagon/pytest_plugin.py +++ b/python/tvm/contrib/hexagon/pytest_plugin.py @@ -40,7 +40,7 @@ RNG_SEEDED = False HEXAGON_AOT_LLVM_TARGET = ( - "llvm -keys=hexagon " + "llvm -keys=hexagon,cpu " "-mattr=+hvxv68,+hvx-length128b,+hvx-qfloat,-hvx-ieee-fp " "-mcpu=hexagonv68 -mtriple=hexagon" )