diff --git a/python/tvm/contrib/hexagon/pytest_plugin.py b/python/tvm/contrib/hexagon/pytest_plugin.py index 585a6cc3c5bb..bfbf398ec554 100644 --- a/python/tvm/contrib/hexagon/pytest_plugin.py +++ b/python/tvm/contrib/hexagon/pytest_plugin.py @@ -40,7 +40,7 @@ RNG_SEEDED = False HEXAGON_AOT_LLVM_TARGET = ( - "llvm -keys=hexagon " + "llvm -keys=hexagon,cpu " "-mattr=+hvxv68,+hvx-length128b,+hvx-qfloat,-hvx-ieee-fp " "-mcpu=hexagonv68 -mtriple=hexagon" ) diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc index 3c4e885ef9b5..44dee859d017 100644 --- a/src/target/target_kind.cc +++ b/src/target/target_kind.cc @@ -422,9 +422,10 @@ TVM_REGISTER_TARGET_KIND("hexagon", kDLHexagon) .add_attr_option>("llvm-options") .add_attr_option("num-cores") .add_attr_option("vtcm-capacity") - .set_default_keys({"hexagon"}); + .set_default_keys({"hexagon", "cpu"}); -TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU); +TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU) // line break + .set_default_keys({"cpu"}); TVM_REGISTER_TARGET_KIND("ext_dev", kDLExtDev); diff --git a/src/tir/transforms/lower_tvm_builtin.cc b/src/tir/transforms/lower_tvm_builtin.cc index ea418635bc2a..837a3e6d3587 100644 --- a/src/tir/transforms/lower_tvm_builtin.cc +++ b/src/tir/transforms/lower_tvm_builtin.cc @@ -629,9 +629,11 @@ namespace transform { Pass LowerTVMBuiltin() { auto pass_func = [](PrimFunc f, IRModule m, PassContext ctx) { - auto* n = f.CopyOnWrite(); - n->body = BuiltinLower().Build(n->body); - VLOG(2) << "LowerTVMBuiltin: " << f; + if (IsHostFunc(f).value_or(false)) { + auto global_symbol = f->GetAttr(tvm::attr::kGlobalSymbol); + f.CopyOnWrite()->body = BuiltinLower().Build(f->body); + VLOG(2) << "LowerTVMBuiltin: " << f; + } return f; }; return CreatePrimFuncPass(pass_func, 0, "tir.LowerTVMBuiltin", {}); diff --git a/src/tir/transforms/split_host_device.cc b/src/tir/transforms/split_host_device.cc index 9270b356ba22..2de831e8ad0c 100644 --- a/src/tir/transforms/split_host_device.cc +++ b/src/tir/transforms/split_host_device.cc @@ -108,12 +108,12 @@ PrimFunc SplitHostDevice(PrimFunc func, IRModule* device_mod, const GlobalVar& g HostDeviceSplitter splitter(device_mod, name_prefix); - auto body = splitter(func->body); - - if (!body.same_as(func->body)) { + if (auto body = splitter(func->body); !body.same_as(func->body)) { func.CopyOnWrite()->body = body; - auto target_host = target->GetHost().value_or(Target("llvm")); - func = WithAttr(std::move(func), tvm::attr::kTarget, target_host); + } + + if (auto target_host = target->GetHost()) { + func = WithAttr(std::move(func), tvm::attr::kTarget, target_host.value()); } return func; diff --git a/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py b/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py index d224a688d298..2e0784cc3126 100644 --- a/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py +++ b/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py @@ -56,7 +56,7 @@ def check_packed_func(target="llvm"): # Construct a valid IRModule to be lowered: mod = tvm.IRModule.from_expr(tvm.tir.PrimFunc([a_buffer, b_buffer, c_buffer], stmt)) - target = tvm.target.Target(target) + target = tvm.target.Target(target, host="llvm") mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", target))(mod) mod = tvm.tir.transform.Apply(lambda f: f.with_attr("global_symbol", "main"))(mod) mod = tvm.tir.transform.MakePackedAPI()(mod) @@ -189,6 +189,97 @@ def variance4(rxplaceholder: T.Buffer((T.int64(1), T.int64(32), T.int64(25690112 tvm.build(func, target="llvm") # should not crash +class TestLowerDeviceAllocate(tvm.testing.CompareBeforeAfter): + """Device allocations are lowered to TVMBackend* calls + + This test validates the current behavior of LowerTVMBuiltin. This + unit test may be improved in the future by addressing: + + - The AttrStmt for "storage_alignment" occurs outside the LetStmt + that defines the pointer, which is currently required by + CodeGenLLVM. This fails to match when `map_free_vars=False` + (default), because the first occurrence is undefined. + + - The call to TVMBackendFreeWorkspace uses the allocated pointer, + but occurs outside the LetStmt. + + - TVMScript always produces "handle" dtype for + `T.tvm_throw_last_error`, while LowerTVMBuiltin outputs "int32" + dtype. + """ + + transform = tvm.tir.transform.LowerTVMBuiltin() + + def before(): + T.func_attr({"target": T.target("llvm")}) + T.attr("dummy", "device_type", 2) # kDLCuda + T.attr("dummy", "device_id", 0) + ptr = T.allocate([16], "float32") + buf = T.decl_buffer(16, "float32", data=ptr) + buf[0] = 0.0 + + def expected(): + T.func_attr({"target": T.target("llvm")}) + ptr = T.handle("float32", "global") + T.attr(ptr, "storage_alignment", 64) + with T.LetStmt(T.TVMBackendAllocWorkspace(2, 0, T.uint64(64), 2, 32), var=ptr): + if T.isnullptr(ptr): + T.Call("int32", "tir.tvm_throw_last_error", []) + buf = T.decl_buffer((16,), data=ptr) + buf[0] = T.float32(0) + if T.TVMBackendFreeWorkspace(2, 0, ptr) != 0: + T.Call("int32", "tir.tvm_throw_last_error", []) + + def test_compare(self, before, expected, transform): + after = transform(before) + tvm.ir.assert_structural_equal(after, expected, map_free_vars=True) + + +class TestLowerCPUAllocation(tvm.testing.CompareBeforeAfter): + """CPU allocations can be handled at codegen time""" + + transform = tvm.tir.transform.LowerTVMBuiltin() + + def before(): + T.func_attr({"target": T.target("llvm")}) + T.attr("dummy", "device_type", 1) # kDLCPU + T.attr("dummy", "device_id", 0) + ptr = T.allocate([16], "float32") + buf = T.decl_buffer(16, "float32", data=ptr) + buf[0] = 0.0 + + def expected(): + T.func_attr({"target": T.target("llvm")}) + ptr = T.allocate([16], "float32") + buf = T.decl_buffer(16, "float32", data=ptr) + buf[0] = 0.0 + + +class TestLowerAllocateRequiresDeviceID(tvm.testing.CompareBeforeAfter): + transform = tvm.tir.transform.LowerTVMBuiltin() + + def before(): + T.func_attr({"target": T.target("llvm")}) + T.attr("dummy", "device_id", 0) + ptr = T.allocate([16], "float32") + buf = T.decl_buffer(16, "float32", data=ptr) + buf[0] = 0.0 + + expected = tvm.TVMError + + +class TestLowerAllocateRequiresDeviceType(tvm.testing.CompareBeforeAfter): + transform = tvm.tir.transform.LowerTVMBuiltin() + + def before(): + T.func_attr({"target": T.target("llvm")}) + T.attr("dummy", "device_id", 0) + ptr = T.allocate([16], "float32") + buf = T.decl_buffer(16, "float32", data=ptr) + buf[0] = 0.0 + + expected = tvm.TVMError + + if __name__ == "__main__": - test_call_packed_return_non_i32() - test_lower_packed_func() + tvm.testing.main() diff --git a/tests/python/unittest/test_tir_transform_split_host_device.py b/tests/python/unittest/test_tir_transform_split_host_device.py index cf866ae005c8..1599b9a031a0 100644 --- a/tests/python/unittest/test_tir_transform_split_host_device.py +++ b/tests/python/unittest/test_tir_transform_split_host_device.py @@ -168,5 +168,21 @@ def main_kernel(n: T.int32): return mod +class TestSplitHostDevice(BaseCompare): + """Like TestSplitHostDevice, but no device regions to extract + + Even if there are no device regions, the host-side function should + still have its "target" attribute updated. + """ + + def before(): + T.func_attr({"target": T.target("ext_dev", host="llvm")}) + T.evaluate(0) + + def expected(): + T.func_attr({"target": T.target("llvm")}) + T.evaluate(0) + + if __name__ == "__main__": tvm.testing.main()