Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/tvm/contrib/hexagon/pytest_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
RNG_SEEDED = False

HEXAGON_AOT_LLVM_TARGET = (
"llvm -keys=hexagon "
"llvm -keys=hexagon,cpu "
"-mattr=+hvxv68,+hvx-length128b,+hvx-qfloat,-hvx-ieee-fp "
"-mcpu=hexagonv68 -mtriple=hexagon"
)
Expand Down
5 changes: 3 additions & 2 deletions src/target/target_kind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -422,9 +422,10 @@ TVM_REGISTER_TARGET_KIND("hexagon", kDLHexagon)
.add_attr_option<Array<String>>("llvm-options")
.add_attr_option<Integer>("num-cores")
.add_attr_option<Integer>("vtcm-capacity")
.set_default_keys({"hexagon"});
.set_default_keys({"hexagon", "cpu"});

TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU);
TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU) // line break
.set_default_keys({"cpu"});

TVM_REGISTER_TARGET_KIND("ext_dev", kDLExtDev);

Expand Down
8 changes: 5 additions & 3 deletions src/tir/transforms/lower_tvm_builtin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -629,9 +629,11 @@ namespace transform {

Pass LowerTVMBuiltin() {
auto pass_func = [](PrimFunc f, IRModule m, PassContext ctx) {
auto* n = f.CopyOnWrite();
n->body = BuiltinLower().Build(n->body);
VLOG(2) << "LowerTVMBuiltin: " << f;
if (IsHostFunc(f).value_or(false)) {
auto global_symbol = f->GetAttr<String>(tvm::attr::kGlobalSymbol);
f.CopyOnWrite()->body = BuiltinLower().Build(f->body);
VLOG(2) << "LowerTVMBuiltin: " << f;
}
return f;
};
return CreatePrimFuncPass(pass_func, 0, "tir.LowerTVMBuiltin", {});
Expand Down
10 changes: 5 additions & 5 deletions src/tir/transforms/split_host_device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,12 @@ PrimFunc SplitHostDevice(PrimFunc func, IRModule* device_mod, const GlobalVar& g

HostDeviceSplitter splitter(device_mod, name_prefix);

auto body = splitter(func->body);

if (!body.same_as(func->body)) {
if (auto body = splitter(func->body); !body.same_as(func->body)) {
func.CopyOnWrite()->body = body;
auto target_host = target->GetHost().value_or(Target("llvm"));
func = WithAttr(std::move(func), tvm::attr::kTarget, target_host);
}

if (auto target_host = target->GetHost()) {
func = WithAttr(std::move(func), tvm::attr::kTarget, target_host.value());
}

return func;
Expand Down
97 changes: 94 additions & 3 deletions tests/python/unittest/test_tir_transform_lower_tvm_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def check_packed_func(target="llvm"):
# Construct a valid IRModule to be lowered:
mod = tvm.IRModule.from_expr(tvm.tir.PrimFunc([a_buffer, b_buffer, c_buffer], stmt))

target = tvm.target.Target(target)
target = tvm.target.Target(target, host="llvm")
mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", target))(mod)
mod = tvm.tir.transform.Apply(lambda f: f.with_attr("global_symbol", "main"))(mod)
mod = tvm.tir.transform.MakePackedAPI()(mod)
Expand Down Expand Up @@ -189,6 +189,97 @@ def variance4(rxplaceholder: T.Buffer((T.int64(1), T.int64(32), T.int64(25690112
tvm.build(func, target="llvm") # should not crash


class TestLowerDeviceAllocate(tvm.testing.CompareBeforeAfter):
"""Device allocations are lowered to TVMBackend* calls

This test validates the current behavior of LowerTVMBuiltin. This
unit test may be improved in the future by addressing:

- The AttrStmt for "storage_alignment" occurs outside the LetStmt
that defines the pointer, which is currently required by
CodeGenLLVM. This fails to match when `map_free_vars=False`
(default), because the first occurrence is undefined.

- The call to TVMBackendFreeWorkspace uses the allocated pointer,
but occurs outside the LetStmt.

- TVMScript always produces "handle" dtype for
`T.tvm_throw_last_error`, while LowerTVMBuiltin outputs "int32"
dtype.
"""

transform = tvm.tir.transform.LowerTVMBuiltin()

def before():
T.func_attr({"target": T.target("llvm")})
T.attr("dummy", "device_type", 2) # kDLCuda
T.attr("dummy", "device_id", 0)
ptr = T.allocate([16], "float32")
buf = T.decl_buffer(16, "float32", data=ptr)
buf[0] = 0.0

def expected():
T.func_attr({"target": T.target("llvm")})
ptr = T.handle("float32", "global")
T.attr(ptr, "storage_alignment", 64)
with T.LetStmt(T.TVMBackendAllocWorkspace(2, 0, T.uint64(64), 2, 32), var=ptr):
if T.isnullptr(ptr):
T.Call("int32", "tir.tvm_throw_last_error", [])
buf = T.decl_buffer((16,), data=ptr)
buf[0] = T.float32(0)
if T.TVMBackendFreeWorkspace(2, 0, ptr) != 0:
T.Call("int32", "tir.tvm_throw_last_error", [])

def test_compare(self, before, expected, transform):
after = transform(before)
tvm.ir.assert_structural_equal(after, expected, map_free_vars=True)


class TestLowerCPUAllocation(tvm.testing.CompareBeforeAfter):
"""CPU allocations can be handled at codegen time"""

transform = tvm.tir.transform.LowerTVMBuiltin()

def before():
T.func_attr({"target": T.target("llvm")})
T.attr("dummy", "device_type", 1) # kDLCPU
T.attr("dummy", "device_id", 0)
ptr = T.allocate([16], "float32")
buf = T.decl_buffer(16, "float32", data=ptr)
buf[0] = 0.0

def expected():
T.func_attr({"target": T.target("llvm")})
ptr = T.allocate([16], "float32")
buf = T.decl_buffer(16, "float32", data=ptr)
buf[0] = 0.0


class TestLowerAllocateRequiresDeviceID(tvm.testing.CompareBeforeAfter):
transform = tvm.tir.transform.LowerTVMBuiltin()

def before():
T.func_attr({"target": T.target("llvm")})
T.attr("dummy", "device_id", 0)
ptr = T.allocate([16], "float32")
buf = T.decl_buffer(16, "float32", data=ptr)
buf[0] = 0.0

expected = tvm.TVMError


class TestLowerAllocateRequiresDeviceType(tvm.testing.CompareBeforeAfter):
transform = tvm.tir.transform.LowerTVMBuiltin()

def before():
T.func_attr({"target": T.target("llvm")})
T.attr("dummy", "device_id", 0)
ptr = T.allocate([16], "float32")
buf = T.decl_buffer(16, "float32", data=ptr)
buf[0] = 0.0

expected = tvm.TVMError


if __name__ == "__main__":
test_call_packed_return_non_i32()
test_lower_packed_func()
tvm.testing.main()
16 changes: 16 additions & 0 deletions tests/python/unittest/test_tir_transform_split_host_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,5 +168,21 @@ def main_kernel(n: T.int32):
return mod


class TestSplitHostDevice(BaseCompare):
"""Like TestSplitHostDevice, but no device regions to extract

Even if there are no device regions, the host-side function should
still have its "target" attribute updated.
"""

def before():
T.func_attr({"target": T.target("ext_dev", host="llvm")})
T.evaluate(0)

def expected():
T.func_attr({"target": T.target("llvm")})
T.evaluate(0)


if __name__ == "__main__":
tvm.testing.main()