diff --git a/include/tvm/tir/transform.h b/include/tvm/tir/transform.h index b1957d2a9cb8..2113d58f1ffa 100644 --- a/include/tvm/tir/transform.h +++ b/include/tvm/tir/transform.h @@ -212,6 +212,17 @@ TVM_DLL Pass InstrumentBoundCheckers(); */ TVM_DLL Pass MakePackedAPI(int num_unpacked_args); +/*! + * \brief Transform the high-level PrimFunc to a C signature that can be used + * to call the operator directly. + * + * The main task of this function is to create code that maps the values in the + * api_args to Var that is required by body + * + * \return The pass. + */ +TVM_DLL Pass MakeUnpackedAPI(); + /*! * \brief Remap the thread axis * diff --git a/python/tvm/tir/transform/transform.py b/python/tvm/tir/transform/transform.py index be55b48da71e..26b22f99c215 100644 --- a/python/tvm/tir/transform/transform.py +++ b/python/tvm/tir/transform/transform.py @@ -347,6 +347,17 @@ def MakePackedAPI(num_unpacked_params=0): return _ffi_api.MakePackedAPI(num_unpacked_params) +def MakeUnpackedAPI(): + """Transform the PrimFuncs in the module to a C API compatible with internal calls. + + Returns + ------- + fpass : tvm.transform.Pass + The result pass + """ + return _ffi_api.MakeUnpackedAPI() + + def SplitHostDevice(): """Split the function into a host function and device functions. diff --git a/src/driver/driver_api.cc b/src/driver/driver_api.cc index f30cecbf7f05..7c304727080e 100644 --- a/src/driver/driver_api.cc +++ b/src/driver/driver_api.cc @@ -200,8 +200,15 @@ std::pair SplitDevHostFuncs(IRModule mod_mixed, const Target mixed_pass_list.push_back(tir::transform::ThreadSync("warp")); mixed_pass_list.push_back(tir::transform::InferFragment()); mixed_pass_list.push_back(tir::transform::LowerThreadAllreduce()); - mixed_pass_list.push_back(tir::transform::MakePackedAPI(0)); + + if (target->GetAttr("unpacked-api").value_or(Bool(false))) { + mixed_pass_list.push_back(tir::transform::MakeUnpackedAPI()); + } else { + mixed_pass_list.push_back(tir::transform::MakePackedAPI(0)); + } + mixed_pass_list.push_back(tir::transform::SplitHostDevice()); + auto opt_mixed = transform::Sequential(mixed_pass_list); mod_mixed = opt_mixed(std::move(mod_mixed)); diff --git a/src/relay/backend/aot_executor_codegen.cc b/src/relay/backend/aot_executor_codegen.cc index a005247d424a..66294d1dd076 100644 --- a/src/relay/backend/aot_executor_codegen.cc +++ b/src/relay/backend/aot_executor_codegen.cc @@ -137,10 +137,17 @@ class AOTExecutorCodegen : public ExprVisitor { // Pack the sid inside the TVMValue auto sid_array = te::Var(MakeString("sid_", sid, "_value"), DataType::Handle()); auto sid_value = sids_table_[sid]; - tvm::PrimExpr set_tensor = - tvm::tir::Call(DataType::Handle(), tvm::tir::builtin::tvm_struct_set(), - {sid_array, 0, tir::builtin::kArrData, sid_value}); - stmts_.push_back(tir::LetStmt(sid_array, StackAlloca("array", 1), tir::Evaluate(set_tensor))); + + if (!use_unpacked_api_) { + tvm::PrimExpr set_tensor = + tvm::tir::Call(DataType::Handle(), tvm::tir::builtin::tvm_struct_set(), + {sid_array, 0, tir::builtin::kArrData, sid_value}); + stmts_.push_back( + tir::LetStmt(sid_array, StackAlloca("array", 1), tir::Evaluate(set_tensor))); + } else { + stmts_.push_back(tir::LetStmt(sid_array, sid_value, tir::Evaluate(0))); + } + sid_vars.push_back(sid_array); } return sid_vars; @@ -161,16 +168,16 @@ class AOTExecutorCodegen : public ExprVisitor { auto param_handle = tvm::tir::Call(DataType::Handle(), tvm::tir::builtin::lookup_param(), {tir::StringImm(params_by_expr_[expr])}); - tvm::PrimExpr set_param_array = - tvm::tir::Call(DataType::Handle(), tvm::tir::builtin::tvm_struct_set(), - {param_array, 0, tir::builtin::kArrData, param_handle}); - lookup_call.push_back(tir::Evaluate(set_param_array)); - - tir::Stmt lookup_body = tir::SeqStmt(lookup_call); + if (!use_unpacked_api_) { + tvm::PrimExpr set_param_array = + tvm::tir::Call(DataType::Handle(), tvm::tir::builtin::tvm_struct_set(), + {param_array, 0, tir::builtin::kArrData, param_handle}); + stmts_.push_back( + tir::LetStmt(param_array, StackAlloca("arg_value", 1), tir::Evaluate(set_param_array))); + } else { + stmts_.push_back(tir::LetStmt(param_array, param_handle, tir::Evaluate(0))); + } - // Allocate the DLTensors on the stack - lookup_body = tir::LetStmt(param_array, StackAlloca("arg_value", 1), lookup_body); - stmts_.push_back(lookup_body); return param_array; } @@ -206,15 +213,20 @@ class AOTExecutorCodegen : public ExprVisitor { } auto ret_expr = Downcast(call); - // Pack the return(s) value. A call node can produce multiple outputs for (const auto& var : PackSid(ret_expr)) { args.push_back(var); } - // Use tvm_call_packed to execute the function - create_func_call_stmts.push_back(tir::Evaluate( - tvm::tir::Call(DataType::Int(32), tvm::tir::builtin::tvm_call_cpacked(), args))); + // Use tvm_call_packed to execute the function unless we're calling directly + auto calling_pattern = tvm::tir::builtin::tvm_call_cpacked(); + if (use_unpacked_api_) { + calling_pattern = tvm::tir::builtin::call_extern(); + } + + create_func_call_stmts.push_back( + tir::Evaluate(tvm::tir::Call(DataType::Int(32), calling_pattern, args))); + tir::Stmt body = tir::SeqStmt(create_func_call_stmts); stmts_.push_back(body); } @@ -226,16 +238,20 @@ class AOTExecutorCodegen : public ExprVisitor { * copy-on-write fashion. */ void CopyToOutput(te::Var out, te::Var in, size_t size) { - auto retval_get = tvm::tir::Call(DataType::Handle(), tvm::tir::builtin::tvm_struct_get(), - {in, 0, tir::builtin::kArrData}); - // Define intermediate DLTensor to load/store the data auto tmp0 = te::Var("tmp0", DataType::Handle()); auto tmp1 = te::Var("tmp1", DataType::Handle()); te::Var loop_idx("i", DataType::Int(32)); auto retval_i = tir::Load(DataType::UInt(8), tmp0, loop_idx, tir::const_true()); - auto tostore = tvm::tir::Call(DataType::Handle(), tvm::tir::builtin::tvm_struct_get(), - {out, 0, tir::builtin::kArrData}); + + PrimExpr retval_get = tvm::tir::Call(DataType::Handle(), tvm::tir::builtin::tvm_struct_get(), + {in, 0, tir::builtin::kArrData}); + PrimExpr tostore = tvm::tir::Call(DataType::Handle(), tvm::tir::builtin::tvm_struct_get(), + {out, 0, tir::builtin::kArrData}); + if (use_unpacked_api_) { + retval_get = in; + tostore = out; + } // Copy the variable from the input to the output tir::Stmt copy = tir::For( @@ -535,6 +551,15 @@ class AOTExecutorCodegen : public ExprVisitor { TargetsMap targets_; /*! \brief target host */ Target target_host_; + /*! + * \brief unpacked api toggle + * When set to true the code generated will use unpacked calls to functions: + * func(void* arg0, void* arg1) + * Rather than packed calls: + * func(void* args) + * Defaults to using the packed calling convention + */ + Bool use_unpacked_api_; /*! * \brief parameters (i.e. ConstantNodes found in the graph). @@ -564,21 +589,20 @@ class AOTExecutorCodegen : public ExprVisitor { public: AOTExecutorCodegen(runtime::Module* mod, const TargetsMap& targets, Target target_host) - : mod_(mod), return_sid_() { - compile_engine_ = CompileEngine::Global(); - targets_ = targets; - target_host_ = target_host; - } + : mod_(mod), + targets_(targets), + target_host_(target_host), + use_unpacked_api_(target_host->GetAttr("unpacked-api").value_or(Bool(false))), + compile_engine_(CompileEngine::Global()) {} LoweredOutput Codegen(relay::Function func) { // Get the module, storage map and token sizes auto pf = GetPackedFunc("relay.backend.GraphPlanMemory"); storage_device_map_ = (*pf)(func); - int input_index = 0; for (auto input : func->params) { input_vars_.push_back(input); - main_signature_.push_back(tir::Var(MakeString("input_", input_index), DataType::Handle())); + main_signature_.push_back(tir::Var("input", DataType::Handle())); } // Define the storage allocator ids @@ -592,7 +616,7 @@ class AOTExecutorCodegen : public ExprVisitor { // Find the return sid return_sid_ = AotReturnSidVisitor(storage_device_map_).FindReturnSid(func); for (unsigned int output_index = 0; output_index < return_sid_.size(); output_index++) { - main_signature_.push_back(tir::Var(MakeString("output_", output_index), DataType::Handle())); + main_signature_.push_back(tir::Var("output", DataType::Handle())); } VisitExpr(func->body); diff --git a/src/target/source/source_module.cc b/src/target/source/source_module.cc index 661df9305036..992df61980f8 100644 --- a/src/target/source/source_module.cc +++ b/src/target/source/source_module.cc @@ -192,17 +192,59 @@ class CSourceCrtMetadataModuleNode : public runtime::ModuleNode { << "}\n"; } + void GenerateEntrypointForUnpackedAPI() { + code_ << "TVM_DLL int32_t " << ::tvm::runtime::symbol::tvm_run_func_prefix << "("; + int total_args = (metadata_->num_inputs + metadata_->num_outputs); + for (int i = 0; i < total_args; ++i) { + code_ << "arg" << i; + if (i + 1 != total_args) { + code_ << ","; + } + } + code_ << ");\n"; + code_ << "static int32_t " << ::tvm::runtime::symbol::tvm_module_main; + code_ << "(void* args, void* type_code, int num_args, void* out_value, void* " + "out_type_code, void* resource_handle) {\n"; + code_ << "return " << ::tvm::runtime::symbol::tvm_run_func_prefix << "("; + for (int i = 0; i < metadata_->num_inputs; ++i) { + code_ << "((DLTensor*)(((TVMValue*)args)[" << i << "].v_handle))[0].data,"; + } + for (int i = 0; i < metadata_->num_outputs; ++i) { + int j = metadata_->num_inputs + i; + code_ << "((DLTensor*)(((TVMValue*)args)[" << j << "].v_handle))[0].data"; + if (i + 1 != metadata_->num_outputs) { + code_ << ","; + } + } + code_ << ");\n"; + code_ << "}\n"; + } + + void GenerateEntrypointForPackedAPI() { + code_ << "TVM_DLL int32_t " << ::tvm::runtime::symbol::tvm_run_func_prefix; + code_ << "(void* args, void* type_code, int num_args, void* out_value, void* " + "out_type_code, void* resource_handle);\n"; + code_ << "static int32_t " << ::tvm::runtime::symbol::tvm_module_main; + code_ << "(void* args, void* type_code, int num_args, void* out_value, void* " + "out_type_code, void* resource_handle) {\n"; + code_ << "return " << ::tvm::runtime::symbol::tvm_run_func_prefix; + code_ << "(args, type_code, num_args, out_value, out_type_code, resource_handle);\n"; + code_ << "}\n"; + } + void GenerateAOTDescriptor() { code_ << "#include \"tvm/runtime/crt/internal/aot_executor/aot_executor.h\"\n"; code_ << "#include \"tvm/runtime/c_runtime_api.h\"\n"; code_ << "#ifdef __cplusplus\n"; code_ << "extern \"C\"\n"; code_ << "#endif\n"; - code_ << "TVM_DLL int32_t " << ::tvm::runtime::symbol::tvm_run_func_prefix; - code_ << "(void* args, void* type_code, int num_args, void* out_value, void* " - "out_type_code, void* resource_handle);\n"; + if (target_->GetAttr("unpacked-api").value_or(Bool(false))) { + GenerateEntrypointForUnpackedAPI(); + } else { + GenerateEntrypointForPackedAPI(); + } code_ << "const tvm_model_t network = {\n" - << " .run_func = &" << ::tvm::runtime::symbol::tvm_run_func_prefix << ",\n" + << " .run_func = &" << ::tvm::runtime::symbol::tvm_module_main << ",\n" << " .num_input_tensors = " << metadata_->num_inputs << ",\n" << " .num_output_tensors = " << metadata_->num_outputs << ", \n" << "};\n"; diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc index 08e998e0f035..b2a813a874e6 100644 --- a/src/target/target_kind.cc +++ b/src/target/target_kind.cc @@ -258,6 +258,7 @@ TVM_REGISTER_TARGET_KIND("llvm", kDLCPU) .add_attr_option("system-lib") .add_attr_option("runtime") .add_attr_option("link-params", Bool(false)) + .add_attr_option("unpacked-api") .set_default_keys({"cpu"}); TVM_REGISTER_TARGET_KIND("c", kDLCPU) @@ -268,6 +269,7 @@ TVM_REGISTER_TARGET_KIND("c", kDLCPU) .add_attr_option("march") .add_attr_option("executor") .add_attr_option("workspace-byte-alignment") + .add_attr_option("unpacked-api") .set_default_keys({"cpu"}); TVM_REGISTER_TARGET_KIND("cuda", kDLCUDA) diff --git a/src/tir/transforms/make_unpacked_api.cc b/src/tir/transforms/make_unpacked_api.cc new file mode 100644 index 000000000000..154d0bfa5787 --- /dev/null +++ b/src/tir/transforms/make_unpacked_api.cc @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file make_unpacked_api.cc Lower PrimFunc to a standard C function API. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "arg_binder.h" +#include "ir_utils.h" + +namespace tvm { +namespace tir { + +PrimFunc MakeUnpackedAPI(PrimFunc&& func) { + auto global_symbol = func->GetAttr(tvm::attr::kGlobalSymbol); + ICHECK(global_symbol) << "MakeUnpackedAPI: Expect PrimFunc to have the global_symbol attribute"; + + auto target = func->GetAttr(tvm::attr::kTarget); + ICHECK(target.defined()) << "MakeUnpackedAPI: Require the target attribute"; + + auto* func_ptr = func.CopyOnWrite(); + + // Setup device context + int target_device_type = target.value()->kind->device_type; + Integer device_type(target_device_type); + Integer device_id(0); + PrimExpr node = StringImm("default"); + const Stmt nop = Evaluate(0); + std::vector device_init; + + // Create arg to buffer binder + std::unordered_map vmap; + ArgBinder binder(&vmap); + + // Collect variables and buffers to map between + Array args; + std::vector> var_def; + std::vector> buffer_def; + + for (int i = 0; i < static_cast(func_ptr->params.size()); ++i) { + Var param = func_ptr->params[i]; + Var v_arg = Var("arg" + std::to_string(i), param->dtype); + + auto it = func_ptr->buffer_map.find(param); + if (it != func_ptr->buffer_map.end()) { + buffer_def.emplace_back(v_arg, (*it).second); + } else { + var_def.emplace_back(v_arg, param); + } + + args.push_back(v_arg); + } + + // Bind variables then bind buffers to them to ensure correct ordering + for (const auto& kv : var_def) { + binder.Bind(kv.second, kv.first, kv.first->name_hint, true); + } + for (const auto& kv : buffer_def) { + binder.Bind(kv.second->data, kv.first, kv.first->name_hint, true); + } + + if (buffer_def.size()) { + device_init.push_back(AttrStmt(node, attr::device_id, device_id, nop)); + device_init.push_back(AttrStmt(node, attr::device_type, device_type, nop)); + } + + func_ptr->body = MergeNest({device_init, binder.init_nest(), binder.asserts()}, func_ptr->body); + func_ptr->params = args; + func_ptr->ret_type = PrimType(DataType::Int(32)); + + // return the function. + return std::move(func); +} + +namespace transform { + +Pass MakeUnpackedAPI() { + auto pass_func = [](IRModule m, PassContext ctx) { + IRModuleNode* mptr = m.CopyOnWrite(); + std::vector> updates; + + for (const auto& kv : mptr->functions) { + if (auto* n = kv.second.as()) { + PrimFunc func = GetRef(n); + if (func->GetAttr(tvm::attr::kCallingConv, Integer(CallingConv::kDefault)) == + CallingConv::kDefault) { + auto updated_func = MakeUnpackedAPI(std::move(func)); + updates.push_back({kv.first, updated_func}); + } + } + } + + for (const auto& pair : updates) { + mptr->AddUnchecked(pair.first, pair.second); + } + return m; + }; + + return tvm::transform::CreateModulePass(pass_func, 0, "tir.MakeUnpackedAPI", {}); +} + +TVM_REGISTER_GLOBAL("tir.transform.MakeUnpackedAPI").set_body_typed(MakeUnpackedAPI); +} // namespace transform +} // namespace tir +} // namespace tvm diff --git a/tests/python/relay/aot/aot_test_utils.py b/tests/python/relay/aot/aot_test_utils.py index c1917674873d..a54ffb80f051 100644 --- a/tests/python/relay/aot/aot_test_utils.py +++ b/tests/python/relay/aot/aot_test_utils.py @@ -165,12 +165,18 @@ def extract_main_workspace_sizebytes(extract_dir): def compile_and_run( - mod, input_list, output_list, use_calculated_workspaces, params=None, workspace_byte_alignment=8 + mod, + input_list, + output_list, + target_options, + use_calculated_workspaces, + params=None, + workspace_byte_alignment=8, ): """ This method verifies the generated source """ - target = f"c -runtime=c --link-params --executor=aot --workspace-byte-alignment={workspace_byte_alignment}" + target = f"c -runtime=c --link-params --executor=aot --workspace-byte-alignment={workspace_byte_alignment} {target_options}" cflags = f"-DTVM_RUNTIME_ALLOC_ALIGNMENT_BYTES={workspace_byte_alignment} " # The calculated workspaces will not account for stack allocator tags used for debugging diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py index 02b4de3a64f3..4f8de450d9f1 100644 --- a/tests/python/relay/aot/test_crt_aot.py +++ b/tests/python/relay/aot/test_crt_aot.py @@ -44,7 +44,8 @@ @pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -def test_conv_with_params(use_calculated_workspaces): +@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) +def test_conv_with_params(use_calculated_workspaces, target_options): RELAY_MODEL = """ #[version = "0.0.5"] def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5), int8]) { @@ -73,11 +74,12 @@ def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5), output_list = generate_ref_data(mod, inputs, params) input_list = [input_data] - compile_and_run(mod, input_list, output_list, use_calculated_workspaces, params) + compile_and_run(mod, input_list, output_list, target_options, use_calculated_workspaces, params) @pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -def test_add_with_params(use_calculated_workspaces): +@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) +def test_add_with_params(use_calculated_workspaces, target_options): x = relay.var("x", shape=(1, 10)) y = relay.var("y", shape=(1, 10)) z = relay.add(x, y) @@ -91,11 +93,14 @@ def test_add_with_params(use_calculated_workspaces): output_list = generate_ref_data(func, inputs, params) input_list = [y_in] - compile_and_run(func, input_list, output_list, use_calculated_workspaces, params) + compile_and_run( + func, input_list, output_list, target_options, use_calculated_workspaces, params + ) @pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -def test_conv2d(use_calculated_workspaces): +@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) +def test_conv2d(use_calculated_workspaces, target_options): """Test a subgraph with a single conv2d operator.""" def conv2d_direct(): @@ -137,11 +142,12 @@ def group_conv2d(): for mod, inputs, out_shape in [conv2d_direct(), group_conv2d()]: output_list = generate_ref_data(mod, inputs) input_list = [inputs["data"], inputs["weight"]] - compile_and_run(mod, input_list, output_list, use_calculated_workspaces) + compile_and_run(mod, input_list, output_list, target_options, use_calculated_workspaces) @pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -def test_concatenate(use_calculated_workspaces): +@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) +def test_concatenate(use_calculated_workspaces, target_options): dtype = "float32" x = relay.var("x", shape=(10, 5), dtype=dtype) y = relay.var("y", shape=(10, 5), dtype=dtype) @@ -157,11 +163,12 @@ def test_concatenate(use_calculated_workspaces): output_list = generate_ref_data(func, inputs) input_list = [inputs["x"], inputs["y"], inputs["z"]] - compile_and_run(func, input_list, output_list, use_calculated_workspaces) + compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces) @pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -def test_nested_tuples(use_calculated_workspaces): +@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) +def test_nested_tuples(use_calculated_workspaces, target_options): x = relay.var("x", shape=(10,)) x1 = x + relay.const(1.0) x2 = x1 + relay.const(1.0) @@ -174,39 +181,43 @@ def test_nested_tuples(use_calculated_workspaces): inputs = {"x": x_data} output_list = generate_ref_data(func, inputs) input_list = [x_data] - compile_and_run(func, input_list, output_list, use_calculated_workspaces) + compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces) @pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -def test_tuple_getitem(use_calculated_workspaces): +@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) +def test_tuple_getitem(use_calculated_workspaces, target_options): func = relay.Function([], relay.TupleGetItem(relay.Tuple([relay.const(1), relay.const(2)]), 0)) output_list = generate_ref_data(func, {}) input_list = [] - compile_and_run(func, input_list, output_list, use_calculated_workspaces) + compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces) @pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -def test_id(use_calculated_workspaces): +@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) +def test_id(use_calculated_workspaces, target_options): x = relay.var("x", "float32") ident = relay.Function([x], x) one = np.array(1.0, "float32") inputs = {"x": one} output_list = generate_ref_data(ident, inputs) input_list = [one] - compile_and_run(ident, input_list, output_list, use_calculated_workspaces) + compile_and_run(ident, input_list, output_list, target_options, use_calculated_workspaces) @pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -def test_add_const(use_calculated_workspaces): +@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) +def test_add_const(use_calculated_workspaces, target_options): two = relay.add(relay.const(1), relay.const(1)) func = relay.Function([], two) output_list = generate_ref_data(func, {}) input_list = [] - compile_and_run(func, input_list, output_list, use_calculated_workspaces) + compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces) @pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -def test_mul_param(use_calculated_workspaces): +@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) +def test_mul_param(use_calculated_workspaces, target_options): x = relay.var("x", shape=(10, 10)) y = relay.var("y", shape=(1, 10)) func = relay.Function([x, y], relay.multiply(x, y)) @@ -215,11 +226,12 @@ def test_mul_param(use_calculated_workspaces): inputs = {"x": x_data, "y": y_data} output_list = generate_ref_data(func, inputs) input_list = [inputs["x"], inputs["y"]] - compile_and_run(func, input_list, output_list, use_calculated_workspaces) + compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces) @pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -def test_subtract(use_calculated_workspaces): +@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) +def test_subtract(use_calculated_workspaces, target_options): i = relay.var("i", shape=[], dtype="int32") sub = relay.subtract(i, relay.const(1, dtype="int32")) func = relay.Function([i], sub, ret_type=relay.TensorType([], "int32")) @@ -227,11 +239,12 @@ def test_subtract(use_calculated_workspaces): inputs = {"i": i_data} output_list = generate_ref_data(func, inputs) input_list = [inputs["i"]] - compile_and_run(func, input_list, output_list, use_calculated_workspaces) + compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces) @pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -def test_tuple_output(use_calculated_workspaces): +@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) +def test_tuple_output(use_calculated_workspaces, target_options): x = relay.var("x", shape=(6, 9)) y = relay.split(x, 3).astuple() a = relay.TupleGetItem(y, 0) @@ -243,15 +256,17 @@ def test_tuple_output(use_calculated_workspaces): inputs = {"x": x_data} output_list = generate_ref_data(func, inputs) input_list = [inputs["x"]] - compile_and_run(func, input_list, output_list, use_calculated_workspaces) + compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces) @pytest.mark.parametrize( "use_calculated_workspaces_and_alignment", [(True, 1), (True, 16), (False, 1)] ) -def test_mobilenet(use_calculated_workspaces_and_alignment): +@pytest.mark.parametrize("target_options", ["--unpacked-api"]) +def test_mobilenet(use_calculated_workspaces_and_alignment, target_options): use_calculated_workspaces = use_calculated_workspaces_and_alignment[0] workspace_byte_alignment = use_calculated_workspaces_and_alignment[1] + mod, params = testing.mobilenet.get_workload(batch_size=1) data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape] data = np.random.uniform(size=data_shape).astype("float32") @@ -259,7 +274,13 @@ def test_mobilenet(use_calculated_workspaces_and_alignment): output_list = generate_ref_data(mod, inputs, params) input_list = [inputs["data"]] compile_and_run( - mod, input_list, output_list, use_calculated_workspaces, params, workspace_byte_alignment + mod, + input_list, + output_list, + target_options, + use_calculated_workspaces, + params, + workspace_byte_alignment, ) @@ -318,7 +339,8 @@ def visit_call(self, call): @pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -def test_byoc_utvm(use_calculated_workspaces): +@pytest.mark.parametrize("target_options", [""]) +def test_byoc_utvm(use_calculated_workspaces, target_options): """This is a simple test case to check BYOC capabilities of AOT""" x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) @@ -361,7 +383,7 @@ def test_byoc_utvm(use_calculated_workspaces): output_list = generate_ref_data(mod, map_inputs) input_list = [map_inputs["x"]] input_list.extend([map_inputs["w{}".format(i)] for i in range(8)]) - compile_and_run(mod, input_list, output_list, use_calculated_workspaces) + compile_and_run(mod, input_list, output_list, target_options, use_calculated_workspaces) if __name__ == "__main__": diff --git a/tests/python/unittest/test_tir_transform_make_unpacked_api.py b/tests/python/unittest/test_tir_transform_make_unpacked_api.py new file mode 100644 index 000000000000..9d917466758b --- /dev/null +++ b/tests/python/unittest/test_tir_transform_make_unpacked_api.py @@ -0,0 +1,155 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import pytest + +import tvm +from tvm import te +import numpy + + +@pytest.fixture +def mod_without_attrs(): + ib = tvm.tir.ir_builder.create() + A = tvm.tir.decl_buffer(name="A", shape=[1]) + stmt = ib.get() + return tvm.IRModule.from_expr(tvm.tir.PrimFunc([A], stmt)) + + +@pytest.fixture +def mod(mod_without_attrs): + mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", tvm.target.Target("llvm")))( + mod_without_attrs + ) + mod = tvm.tir.transform.Apply(lambda f: f.with_attr("global_symbol", "main"))(mod) + + return mod + + +def test_fails_if_not_global_symbol(mod_without_attrs): + mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", tvm.target.Target("llvm")))( + mod_without_attrs + ) + with pytest.raises(tvm.TVMError, match="Expect PrimFunc to have the global_symbol attribute"): + f = tvm.tir.transform.MakeUnpackedAPI()(mod)["main"] + + +def test_fails_if_no_target(mod_without_attrs): + mod = tvm.tir.transform.Apply(lambda f: f.with_attr("global_symbol", "main"))(mod_without_attrs) + with pytest.raises(tvm.TVMError, match="Require the target attribute"): + f = tvm.tir.transform.MakeUnpackedAPI()(mod)["main"] + + +@tvm.testing.parametrize_targets("c", "llvm", "cuda") +def test_device_setup(mod, target, dev): + mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", tvm.target.Target(target)))(mod) + f = tvm.tir.transform.MakeUnpackedAPI()(mod)["main"] + assert len(f.params) == 1 + assert f.params[0].name == "arg0" + assert f.body.node == "default" + assert f.body.attr_key == "device_id" + assert f.body.value == 0 + assert f.body.body.node == "default" + assert f.body.body.attr_key == "device_type" + assert f.body.body.value == dev.device_type + + +def test_no_buffers_no_device_setup(): + ib = tvm.tir.ir_builder.create() + A = ib.pointer("float32", name="A") + stmt = ib.get() + mod = tvm.IRModule.from_expr(tvm.tir.PrimFunc([A], stmt)) + mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", tvm.target.Target("llvm")))(mod) + mod = tvm.tir.transform.Apply(lambda f: f.with_attr("global_symbol", "main"))(mod) + + f = tvm.tir.transform.MakeUnpackedAPI()(mod)["main"] + assert len(f.params) == 1 + assert f.body.var.name == "A" + assert f.body.value.name == "arg0" + + +def test_argument_mapping(mod): + f = tvm.tir.transform.MakeUnpackedAPI()(mod)["main"] + assert len(f.params) == 1 + assert f.params[0].name == "arg0" + assert f.body.body.body.var.name == "A" + assert f.body.body.body.value.name == "arg0" + + +def test_argument_mapping_multiple(): + ib = tvm.tir.ir_builder.create() + A = tvm.tir.decl_buffer(name="A", shape=[1]) + B = tvm.tir.decl_buffer(name="B", shape=[1]) + + stmt = ib.get() + mod = tvm.IRModule.from_expr(tvm.tir.PrimFunc([A, B], stmt)) + mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", tvm.target.Target("llvm")))(mod) + mod = tvm.tir.transform.Apply(lambda f: f.with_attr("global_symbol", "main"))(mod) + + f = tvm.tir.transform.MakeUnpackedAPI()(mod)["main"] + assert len(f.params) == 2 + assert f.params[0].name == "arg0" + assert f.params[1].name == "arg1" + assert f.body.body.body.var.name == "A" + assert f.body.body.body.value.name == "arg0" + assert f.body.body.body.body.var.name == "B" + assert f.body.body.body.body.value.name == "arg1" + + +def test_argument_mapping_multiple_matching(): + ib = tvm.tir.ir_builder.create() + A = tvm.tir.decl_buffer(name="A", shape=[1]) + B = tvm.tir.decl_buffer(name="B", shape=[1]) + stmt = ib.get() + mod = tvm.IRModule.from_expr(tvm.tir.PrimFunc([A, A], stmt)) + mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", tvm.target.Target("llvm")))(mod) + mod = tvm.tir.transform.Apply(lambda f: f.with_attr("global_symbol", "main"))(mod) + + f = tvm.tir.transform.MakeUnpackedAPI()(mod)["main"] + assert len(f.params) == 2 + assert f.params[0].name == "arg0" + assert f.params[1].name == "arg1" + assert f.body.body.body.var.name == "A" + assert f.body.body.body.value.name == "arg0" + assert f.body.body.body.body.condition.a.name == "A" + assert f.body.body.body.body.condition.b.name == "arg1" + + +def test_body(): + ib = tvm.tir.ir_builder.create() + A = tvm.tir.decl_buffer(name="A", shape=[1]) + B = tvm.tir.decl_buffer(name="B", shape=[1]) + C = ib.buffer_ptr(A) + + stmt = ib.get() + mod = tvm.IRModule.from_expr(tvm.tir.PrimFunc([A, B, C], stmt)) + mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", tvm.target.Target("llvm")))(mod) + mod = tvm.tir.transform.Apply(lambda f: f.with_attr("global_symbol", "main"))(mod) + f = tvm.tir.transform.MakeUnpackedAPI()(mod)["main"] + assert len(f.params) == 3 + assert f.params[0].name == "arg0" + assert f.params[1].name == "arg1" + assert f.params[2].name == "arg2" + assert f.body.body.body.var.name == "A" + assert f.body.body.body.value.name == "arg2" + assert f.body.body.body.body.var.name == "B" + assert f.body.body.body.body.value.name == "arg1" + assert f.body.body.body.body.body.condition.a.name == "A" + assert f.body.body.body.body.body.condition.b.name == "arg0" + + +if __name__ == "__main__": + pytest.main([__file__])