diff --git a/apps/microtvm/zephyr/aot_demo/src/main.c b/apps/microtvm/zephyr/aot_demo/src/main.c index 43cc7b33987b..0c16572fc744 100644 --- a/apps/microtvm/zephyr/aot_demo/src/main.c +++ b/apps/microtvm/zephyr/aot_demo/src/main.c @@ -32,6 +32,7 @@ #include "input_data.h" #include "output_data.h" +#include "tvmgen_default.h" #include "zephyr_uart.h" #ifdef CONFIG_ARCH_POSIX @@ -194,18 +195,18 @@ void main(void) { } TVMLogf("Zephyr AOT Runtime\n"); - void* inputs[1] = { - input_data, + struct tvmgen_default_inputs inputs = { + .input_1 = input_data, }; - void* outputs[1] = { - output_data, + struct tvmgen_default_outputs outputs = { + .output = output_data, }; StackMemoryManager_Init(&app_workspace, g_aot_memory, WORKSPACE_SIZE); double elapsed_time = 0; TVMPlatformTimerStart(); - int ret_val = tvm_runtime_run(&tvmgen_default_network, inputs, outputs); + int ret_val = tvmgen_default_run(&inputs, &outputs); TVMPlatformTimerStop(&elapsed_time); if (ret_val != 0) { diff --git a/include/tvm/runtime/module.h b/include/tvm/runtime/module.h index 9dd7423c6679..71be8d218d2d 100644 --- a/include/tvm/runtime/module.h +++ b/include/tvm/runtime/module.h @@ -230,8 +230,10 @@ constexpr const char* tvm_module_main = "__tvm_main__"; constexpr const char* tvm_param_prefix = "__tvm_param__"; /*! \brief A PackedFunc that looks up linked parameters by storage_id. */ constexpr const char* tvm_lookup_linked_param = "_lookup_linked_param"; -/*! \brief The main AOT executor function */ +/*! \brief The main AOT executor function generated from TIR */ constexpr const char* tvm_run_func_suffix = "run_model"; +/*! \brief Model entrypoint generated as an interface to the AOT function outside of TIR */ +constexpr const char* tvm_entrypoint_suffix = "run"; } // namespace symbol // implementations of inline functions. diff --git a/python/tvm/micro/interface_api.py b/python/tvm/micro/interface_api.py new file mode 100644 index 000000000000..915bee08175c --- /dev/null +++ b/python/tvm/micro/interface_api.py @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Defines functions for generating a C interface header""" + +import os + +from tvm.relay.backend.utils import mangle_module_name + + +def _emit_brief(header_file, module_name, description): + header_file.write("/*!\n") + header_file.write(f' * \\brief {description} for TVM module "{module_name}" \n') + header_file.write(" */\n") + + +def generate_c_interface_header(module_name, inputs, outputs, output_path): + """Generates a C interface header for a given modules inputs and outputs + + Parameters + ---------- + module_name : str + Name of the module to be used in defining structs and naming the header + inputs : list[str] + List of module input names to be placed in generated structs + outputs : list[str] + List of module output names to be placed in generated structs + output_path : str + Path to the output folder to generate the header into + """ + + mangled_name = mangle_module_name(module_name) + metadata_header = os.path.join(output_path, f"{mangled_name}.h") + with open(metadata_header, "w") as header_file: + header_file.write( + "#include \n" + f"#ifndef {mangled_name.upper()}_H_\n" + f"#define {mangled_name.upper()}_H_\n" + ) + + _emit_brief(header_file, module_name, "Input tensor pointers") + header_file.write(f"struct {mangled_name}_inputs {{\n") + for input_name in inputs: + header_file.write(f" void* {input_name};\n") + header_file.write("};\n\n") + + _emit_brief(header_file, module_name, "Output tensor pointers") + header_file.write(f"struct {mangled_name}_outputs {{\n") + for output_name in outputs: + header_file.write(f" void* {output_name};\n") + header_file.write("};\n\n") + + header_file.write( + "/*!\n" + f' * \\brief entrypoint function for TVM module "{module_name}"\n' + " * \\param inputs Input tensors for the module \n" + " * \\param outputs Output tensors for the module \n" + " */\n" + f"int32_t {mangled_name}_run(\n" + f" struct {mangled_name}_inputs* inputs,\n" + f" struct {mangled_name}_outputs* outputs\n" + ");\n" + ) + + header_file.write(f"#endif // {mangled_name.upper()}_H_\n") diff --git a/python/tvm/micro/model_library_format.py b/python/tvm/micro/model_library_format.py index ad49ee7d9578..5e682c72ed73 100644 --- a/python/tvm/micro/model_library_format.py +++ b/python/tvm/micro/model_library_format.py @@ -25,7 +25,9 @@ import tarfile import typing +from tvm.ir.type import TupleType from .._ffi import get_global_func +from .interface_api import generate_c_interface_header from ..contrib import utils from ..driver import build_module from ..runtime import ndarray as _nd @@ -55,7 +57,6 @@ def _populate_codegen_dir(mod, codegen_dir: str, module_name: str = None): """ dso_modules = mod._collect_dso_modules() - dso_module_handles = [m.handle.value for m in dso_modules] non_dso_modules = mod._collect_from_import_tree(lambda m: m not in dso_modules) if non_dso_modules: raise UnsupportedInModelLibraryFormatError( @@ -213,6 +214,39 @@ def _build_function_memory_map(function_metadata): return ret +def _get_main_relay_func(mod: executor_factory.ExecutorFactoryModule): + main_func = mod.function_metadata[MAIN_FUNC_NAME_STR] + target = list(main_func.relay_primfuncs.keys())[0] + return main_func.relay_primfuncs[target] + + +def _convert_tuple_to_outputs(ret_type, offset=0): + outputs = [] + added_fields = len(ret_type.fields) + for output_index in range(added_fields): + next_output = offset + len(outputs) + if isinstance(ret_type.fields[output_index], TupleType): + outputs.extend(_convert_tuple_to_outputs(ret_type.fields[output_index], next_output)) + else: + outputs.append(f"output{next_output}") + return outputs + + +def _get_inputs_and_outputs_from_module(mod): + main_func = _get_main_relay_func(mod) + inputs = [argument.name_hint for argument in main_func.params] + + outputs = ["output"] + if isinstance(main_func.ret_type, TupleType): + outputs = _convert_tuple_to_outputs(main_func.ret_type) + + return inputs, outputs + + +def _should_generate_interface_header(mod): + return any(target.attrs.get("interface-api") == "c" for target in mod.target.values()) + + def _make_tar(source_dir, tar_file_path): """Build a tar file from source_dir.""" with tarfile.open(tar_file_path, "w") as tar_f: @@ -260,6 +294,12 @@ def _export_graph_model_library_format( codegen_dir.mkdir() _populate_codegen_dir(mod.lib, codegen_dir, mod.libmod_name) + if _should_generate_interface_header(mod): + include_path = codegen_dir / "host" / "include" + include_path.mkdir() + inputs, outputs = _get_inputs_and_outputs_from_module(mod) + generate_c_interface_header(mod.libmod_name, inputs, outputs, include_path) + parameters_dir = tempdir / "parameters" parameters_dir.mkdir() param_filename = parameters_dir / f"{mod.libmod_name}.params" diff --git a/src/relay/backend/aot_executor_codegen.cc b/src/relay/backend/aot_executor_codegen.cc index fd6ee27eb6be..221df958a8cb 100644 --- a/src/relay/backend/aot_executor_codegen.cc +++ b/src/relay/backend/aot_executor_codegen.cc @@ -650,7 +650,7 @@ class AOTExecutorCodegen : public ExprVisitor { /*! \brief mod */ runtime::Module* mod_; /*! \brief list of input expressions (i.e., variable passed by the user) */ - std::vector input_vars_; + std::vector input_vars_; /*! \brief input and output variables belonging to the main function signature */ Array main_signature_; /*! \brief target device */ @@ -782,8 +782,12 @@ class AOTExecutorCodegen : public ExprVisitor { ret.lowered_funcs.Set(target_host_str, mod_run); } ret.function_metadata = std::move(function_metadata_); - ret.metadata = runtime::Metadata(input_vars_.size(), return_sid_.size(), - runtime::kTvmExecutorAot, mod_name); + + std::vector input_var_names(input_vars_.size()); + std::transform(input_vars_.begin(), input_vars_.end(), input_var_names.begin(), + [](Var input_var) -> String { return input_var->name_hint(); }); + ret.metadata = + runtime::Metadata(input_var_names, return_sid_.size(), runtime::kTvmExecutorAot, mod_name); return ret; } }; diff --git a/src/runtime/meta_data.h b/src/runtime/meta_data.h index 002012a1e1cc..66d9a44099da 100644 --- a/src/runtime/meta_data.h +++ b/src/runtime/meta_data.h @@ -54,8 +54,8 @@ inline String get_name_mangled(const String& module_name, const String& name) { */ class MetadataNode : public Object { public: - /*! \brief number of inputs of the main function */ - int num_inputs = 1; + /*! \brief input information for the main function */ + Array inputs; /*! \brief number of outputs of the main function */ int num_outputs = 1; /*! \brief the executor to be used to run the model */ @@ -73,9 +73,9 @@ class MetadataNode : public Object { */ class Metadata : public ObjectRef { public: - TVM_DLL Metadata(int num_inputs, int num_outputs, String executor, String mod_name) { + TVM_DLL Metadata(Array inputs, int num_outputs, String executor, String mod_name) { auto n = make_object(); - n->num_inputs = num_inputs; + n->inputs = inputs; n->num_outputs = num_outputs; n->executor = executor; n->mod_name = mod_name; diff --git a/src/target/source/source_module.cc b/src/target/source/source_module.cc index ac4d7e3666ea..7728773b13d7 100644 --- a/src/target/source/source_module.cc +++ b/src/target/source/source_module.cc @@ -192,25 +192,26 @@ class CSourceCrtMetadataModuleNode : public runtime::ModuleNode { << "}\n"; } - void GenerateEntrypointForUnpackedAPI(const std::string& run_func) { + void GenerateEntrypointForUnpackedAPI(const std::string& entrypoint_name, + const std::string& run_func) { code_ << "TVM_DLL int32_t " << run_func << "("; - int total_args = (metadata_->num_inputs + metadata_->num_outputs); - for (int i = 0; i < total_args; ++i) { - code_ << "arg" << i; + unsigned int total_args = (metadata_->inputs.size() + metadata_->num_outputs); + for (unsigned int i = 0; i < total_args; ++i) { + code_ << "void* arg" << i; if (i + 1 != total_args) { code_ << ","; } } code_ << ");\n"; - code_ << "static int32_t " << ::tvm::runtime::symbol::tvm_module_main; + code_ << "int32_t " << entrypoint_name; code_ << "(void* args, void* type_code, int num_args, void* out_value, void* " "out_type_code, void* resource_handle) {\n"; code_ << "return " << run_func << "("; - for (int i = 0; i < metadata_->num_inputs; ++i) { + for (unsigned int i = 0; i < metadata_->inputs.size(); ++i) { code_ << "((DLTensor*)(((TVMValue*)args)[" << i << "].v_handle))[0].data,"; } for (int i = 0; i < metadata_->num_outputs; ++i) { - int j = metadata_->num_inputs + i; + int j = metadata_->inputs.size() + i; code_ << "((DLTensor*)(((TVMValue*)args)[" << j << "].v_handle))[0].data"; if (i + 1 != metadata_->num_outputs) { code_ << ","; @@ -220,11 +221,12 @@ class CSourceCrtMetadataModuleNode : public runtime::ModuleNode { code_ << "}\n"; } - void GenerateEntrypointForPackedAPI(const std::string& run_func) { + void GenerateEntrypointForPackedAPI(const std::string& entrypoint_name, + const std::string& run_func) { code_ << "TVM_DLL int32_t " << run_func; code_ << "(void* args, void* type_code, int num_args, void* out_value, void* " "out_type_code, void* resource_handle);\n"; - code_ << "static int32_t " << ::tvm::runtime::symbol::tvm_module_main; + code_ << "int32_t " << entrypoint_name; code_ << "(void* args, void* type_code, int num_args, void* out_value, void* " "out_type_code, void* resource_handle) {\n"; code_ << "return " << run_func; @@ -232,25 +234,70 @@ class CSourceCrtMetadataModuleNode : public runtime::ModuleNode { code_ << "}\n"; } + void GenerateCInterfaceEntrypoint(const std::string& entrypoint_name, const std::string& run_func, + const std::string& mod_name) { + code_ << "#include <" << mod_name << ".h>\n"; + code_ << "TVM_DLL int32_t " << run_func << "("; + unsigned int total_args = (metadata_->inputs.size() + metadata_->num_outputs); + for (unsigned int i = 0; i < total_args; ++i) { + code_ << "void* arg" << i; + if (i + 1 != total_args) { + code_ << ","; + } + } + code_ << ");\n"; + code_ << "int32_t " << entrypoint_name << "("; + code_ << "struct " << runtime::get_name_mangled(mod_name, "inputs") << "* inputs," + << "struct " << runtime::get_name_mangled(mod_name, "outputs") << "* outputs" + << ") {"; + code_ << "return " << run_func << "("; + for (const auto& input : metadata_->inputs) { + code_ << "inputs->" << input << ","; + } + if (metadata_->num_outputs == 1) { + code_ << "outputs->output"; + } else { + for (int i = 0; i < metadata_->num_outputs; ++i) { + code_ << "outputs->output" << i; + if (i + 1 != metadata_->num_outputs) { + code_ << ","; + } + } + } + code_ << ");\n"; + code_ << "}\n"; + } + void GenerateAOTDescriptor() { - const std::string run_func = ::tvm::runtime::symbol::tvm_run_func_suffix; - const std::string run_func_mangled = runtime::get_name_mangled(metadata_->mod_name, run_func); + const std::string run_func_suffix = ::tvm::runtime::symbol::tvm_run_func_suffix; + const std::string tvm_entrypoint_suffix = ::tvm::runtime::symbol::tvm_entrypoint_suffix; + const std::string run_func_mangled = + runtime::get_name_mangled(metadata_->mod_name, run_func_suffix); + const std::string entrypoint_mangled = + runtime::get_name_mangled(metadata_->mod_name, tvm_entrypoint_suffix); const std::string network_mangled = runtime::get_name_mangled(metadata_->mod_name, "network"); - code_ << "#include \"tvm/runtime/crt/internal/aot_executor/aot_executor.h\"\n"; + auto unpacked_api = target_->GetAttr("unpacked-api").value_or(Bool(false)); + auto interface_api = target_->GetAttr("interface-api").value_or(String("packed")); + code_ << "#include \"tvm/runtime/c_runtime_api.h\"\n"; code_ << "#ifdef __cplusplus\n"; - code_ << "extern \"C\"\n"; + code_ << "extern \"C\" {\n"; code_ << "#endif\n"; - if (target_->GetAttr("unpacked-api").value_or(Bool(false))) { - GenerateEntrypointForUnpackedAPI(run_func_mangled); + + if (unpacked_api) { + if (interface_api == "c") { + GenerateCInterfaceEntrypoint(entrypoint_mangled, run_func_mangled, metadata_->mod_name); + } else { + GenerateEntrypointForUnpackedAPI(entrypoint_mangled, run_func_mangled); + } } else { - GenerateEntrypointForPackedAPI(run_func_mangled); + ICHECK_EQ(interface_api, "packed") << "Packed interface required for packed operators"; + GenerateEntrypointForPackedAPI(entrypoint_mangled, run_func_mangled); } - code_ << "const tvm_model_t " << network_mangled << " = {\n" - << " .run_func = &" << ::tvm::runtime::symbol::tvm_module_main << ",\n" - << " .num_input_tensors = " << metadata_->num_inputs << ",\n" - << " .num_output_tensors = " << metadata_->num_outputs << ", \n" - << "};\n"; + + code_ << "#ifdef __cplusplus\n"; + code_ << "}\n"; + code_ << "#endif\n"; } void CreateSource() { diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc index a56916248858..3ad04eb3d577 100644 --- a/src/target/target_kind.cc +++ b/src/target/target_kind.cc @@ -299,6 +299,7 @@ TVM_REGISTER_TARGET_KIND("llvm", kDLCPU) .add_attr_option("runtime") .add_attr_option("link-params", Bool(false)) .add_attr_option("unpacked-api") + .add_attr_option("interface-api") .set_default_keys({"cpu"}); TVM_REGISTER_TARGET_KIND("c", kDLCPU) @@ -310,6 +311,7 @@ TVM_REGISTER_TARGET_KIND("c", kDLCPU) .add_attr_option("executor") .add_attr_option("workspace-byte-alignment") .add_attr_option("unpacked-api") + .add_attr_option("interface-api") .set_default_keys({"cpu"}); TVM_REGISTER_TARGET_KIND("cuda", kDLCUDA) diff --git a/tests/micro/zephyr/test_zephyr_aot.py b/tests/micro/zephyr/test_zephyr_aot.py index 48bdc5d3a283..d1c9d393770a 100644 --- a/tests/micro/zephyr/test_zephyr_aot.py +++ b/tests/micro/zephyr/test_zephyr_aot.py @@ -35,6 +35,7 @@ from tvm.micro.contrib import zephyr from tvm.contrib import utils from tvm.contrib.download import download_testdata +from tvm.micro.interface_api import generate_c_interface_header import conftest @@ -184,7 +185,7 @@ def test_tflite(platform, west_cmd, skip_build, tvm_debug): ) target = tvm.target.target.micro( - model, options=["-link-params=1", "--executor=aot", "--unpacked-api=1"] + model, options=["-link-params=1", "--executor=aot", "--unpacked-api=1", "--interface-api=c"] ) with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): lowered = relay.build(relay_mod, target, params=params) @@ -196,6 +197,7 @@ def test_tflite(platform, west_cmd, skip_build, tvm_debug): ) sample = np.load(sample_path) model_files_path = os.path.join(runtime_path, "include") + generate_c_interface_header(lowered.libmod_name, ["input_1"], ["output"], model_files_path) _create_header_file((f"input_data"), sample, model_files_path) _create_header_file( "output_data", np.zeros(shape=output_shape, dtype="float32"), model_files_path diff --git a/tests/python/relay/aot/aot_test.mk b/tests/python/relay/aot/aot_test.mk index 2426d9fd2963..81e31762611f 100644 --- a/tests/python/relay/aot/aot_test.mk +++ b/tests/python/relay/aot/aot_test.mk @@ -34,7 +34,8 @@ PKG_CFLAGS = ${PKG_COMPILE_OPTS} \ -I$(DMLC_CORE)/include \ -I$(TVM_ROOT)/3rdparty/dlpack/include \ -I$(AOT_ROOT)\ - -I$(build_dir) + -I$(build_dir) \ + -I$(CODEGEN_ROOT)/host/include $(ifeq VERBOSE,1) QUIET ?= diff --git a/tests/python/relay/aot/aot_test_utils.py b/tests/python/relay/aot/aot_test_utils.py index 1c4dddc4c718..900eb67e2b48 100644 --- a/tests/python/relay/aot/aot_test_utils.py +++ b/tests/python/relay/aot/aot_test_utils.py @@ -16,24 +16,20 @@ # under the License. import os -import io -import struct -import numpy as np +import itertools import pathlib -import shutil import subprocess -import tempfile import tarfile import json +import pytest +import numpy as np import tvm from tvm import relay -from tvm.relay import transform from tvm.contrib import utils, graph_executor from tvm.relay.backend import compile_engine from tvm.relay.backend.utils import mangle_module_name -from tvm.contrib import utils from tvm.micro import export_model_library_format @@ -82,6 +78,26 @@ def convert_to_list(x): return mod, params +def parametrize_aot_options(test): + """Parametrize over valid option combinations""" + + interface_api = ["packed", "c"] + use_unpacked_api = [True, False] + use_calculated_workspaces = [True, False] + + all_combinations = itertools.product(interface_api, use_unpacked_api, use_calculated_workspaces) + # Filter out packed operators with c interface + valid_combinations = filter( + lambda parameters: not (parameters[0] == "c" and parameters[1] == False), + all_combinations, + ) + + return pytest.mark.parametrize( + ["interface_api", "use_unpacked_api", "use_calculated_workspaces"], + valid_combinations, + )(test) + + def subprocess_with_stdout_and_log(cmd, cwd, logfile, stdout): """ This method runs a process and logs the output to both a log file and stdout @@ -102,10 +118,6 @@ def subprocess_with_stdout_and_log(cmd, cwd, logfile, stdout): print(text, end="") -def emit_main_network_definition(main_file, mod_name): - main_file.write(f'extern tvm_model_t {mangle_name(mod_name,"network")};\n') - - def emit_main_prologue(main_file, workspace_bytes): # Add TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES because of memory alignment. main_file.write( @@ -133,46 +145,121 @@ def emit_main_prologue(main_file, workspace_bytes): ) -def emit_main_data(main_file, input_list, output_list, mod_name): - for i in range(0, len(input_list)): - main_file.write(f'#include "{mangle_name(mod_name,"input_data")}{i}.h"\n') +def emit_main_data(main_file, input_map, output_list, mod_name): + for key in input_map: + main_file.write(f'#include "{mangle_name(mod_name,"input_data")}_{key}.h"\n') for i in range(0, len(output_list)): main_file.write(f'#include "{mangle_name(mod_name,"expected_output_data")}{i}.h"\n') main_file.write(f'#include "{mangle_name(mod_name,"output_data")}{i}.h"\n') -def emit_main_run(main_file, input_list, output_list, mod_name): +def emit_main_data_structs(main_file, input_map, output_list, mod_name): + main_file.write( + f"struct {mangle_name(mod_name, 'inputs')} {mangle_name(mod_name, 'inputs')} = {{" + ) + for key in input_map: + main_file.write(f"\t.{key} = {mangle_name(mod_name, 'input_data')}_{key},\n") + main_file.write("};\n") + + main_file.write( + f"struct {mangle_name(mod_name, 'outputs')} {mangle_name(mod_name, 'outputs')} = {{" + ) num_outputs = len(output_list) - num_inputs = len(input_list) + if num_outputs == 1: + main_file.write(f"\t.output = {mangle_name(mod_name, 'output_data')}0,\n") + else: + for i in range(0, num_outputs): + main_file.write(f"\t.output{i} = {mangle_name(mod_name, 'output_data')}{i},\n") + main_file.write("};\n") - main_file.write(f'void* {mangle_name(mod_name,"inputs")}[{num_inputs}] = {{ ') - for i in range(0, len(input_list)): - main_file.write(f'{mangle_name(mod_name,"input_data")}{i}, ') +def emit_main_data_setup(main_file, input_map, output_list, mod_name): + num_outputs = len(output_list) + num_inputs = len(input_map) + + main_file.write(f'void* {mangle_name(mod_name,"inputs")}[{num_inputs}] = {{ ') + for key in input_map: + main_file.write(f'{mangle_name(mod_name,"input_data")}_{key}, ') main_file.write("};\n") main_file.write(f'void* {mangle_name(mod_name,"outputs")}[{num_outputs}] = {{ ') - for i in range(0, len(output_list)): + for i in range(0, num_outputs): main_file.write(f'{mangle_name(mod_name,"output_data")}{i}, ') main_file.write("};\n") + + +def emit_main_c_interface_call(main_file, mod_name): + main_file.write( + f'{mangle_name(mod_name,"run")}(&{mangle_name(mod_name,"inputs")}, &{mangle_name(mod_name,"outputs")});\n' + ) + + +def emit_main_fake_packed_values(main_file): + main_file.write( + """ + static DLDevice fake_device = {kDLCPU, 0}; + static int64_t fake_dims = 0; + static int64_t fake_shape = {0}; + """ + ) + + +def emit_main_packed_call(main_file, input_map, output_list, mod_name): + tensors_name = mangle_name(mod_name, "tensors") + values_name = mangle_name(mod_name, "values") + typeids_name = mangle_name(mod_name, "typeids") + + def fake_tensor(source, source_index, packed_index): + main_file.write( + f""" + {tensors_name}[{packed_index}].device = fake_device; + {tensors_name}[{packed_index}].data = {source}[{source_index}]; + {tensors_name}[{packed_index}].shape = &fake_shape; + {tensors_name}[{packed_index}].ndim = fake_dims; + {tensors_name}[{packed_index}].byte_offset = 0; + {tensors_name}[{packed_index}].strides = NULL; + {values_name}[{packed_index}].v_handle = &{tensors_name}[{packed_index}]; + """ + ) + + num_outputs = len(output_list) + num_inputs = len(input_map) + num_tensors = num_inputs + num_outputs main_file.write( - f'tvm_runtime_run(&{mangle_name(mod_name,"network")}, {mangle_name(mod_name,"inputs")}, {mangle_name(mod_name,"outputs")});' + f""" + DLTensor {tensors_name}[{num_tensors}]; + TVMValue {values_name}[{num_tensors}]; + int32_t {typeids_name}[{num_tensors}]; + """ + ) + + for i in range(0, num_inputs): + fake_tensor(mangle_name(mod_name, "inputs"), i, i) + for i in range(0, num_outputs): + fake_tensor(mangle_name(mod_name, "outputs"), i, i + num_inputs) + + main_file.write( + f'{mangle_name(mod_name, "run")}({values_name}, {typeids_name}, 0, NULL, 0, NULL);\n' ) main_file.write("\n") def emit_main_compare(main_file, output_list, mod_name): - for i in range(0, len(output_list)): + num_outputs = len(output_list) + actual_data_name = mangle_name(mod_name, "output_data") + expected_data_name = mangle_name(mod_name, "expected_output_data") + + for i in range(0, num_outputs): is_float_dtype = output_list[i].dtype == "float32" - main_file.write(f'for (int i = 0; i<{mangle_name(mod_name,"output_data")}{i}_len; i++){{\n') + main_file.write(f"for (int i = 0; i<{actual_data_name}{i}_len; i++){{\n") if is_float_dtype: main_file.write( - f'if (fabs({mangle_name(mod_name,"output_data")}{i}[i]-{mangle_name(mod_name,"expected_output_data")}{i}[i]) > 0.001f){{\n\tprintf("ko\\n");\n\treturn -1;}}\n' + f'if (fabs({actual_data_name}{i}[i]-{expected_data_name}{i}[i]) > 0.001f){{\n\tprintf("ko\\n");\n\treturn -1;}}\n' ) else: main_file.write( - f'if ({mangle_name(mod_name,"output_data")}{i}[i]!={mangle_name(mod_name, "expected_output_data")}{i}[i]){{\n\tprintf("ko\\n");\n\treturn -1;}}\n' + f'if ({actual_data_name}{i}[i]!={expected_data_name}{i}[i]){{\n\tprintf("ko\\n");\n\treturn -1;}}\n' ) main_file.write("}\n") @@ -191,33 +278,48 @@ def emit_main_epilogue(main_file): def emit_main_common_includes(main_file): main_file.write("#include \n") main_file.write("#include \n") - main_file.write('#include "tvm/runtime/crt/internal/aot_executor/aot_executor.h"\n') + main_file.write('#include "tvm/runtime/c_runtime_api.h"\n') main_file.write('#include "tvm/runtime/crt/stack_allocator.h"\n') -def create_main(test_name, input_list_map, output_list_map, output_path, workspace_bytes): +def emit_main_micro_include(main_file, mod_name): + main_file.write(f"#include <{mangle_module_name(mod_name)}.h>\n") + + +def create_main(test_name, input_map, output_list_map, output_path, interface_api, workspace_bytes): file_path = pathlib.Path(f"{output_path}/" + test_name).resolve() # create header file raw_path = file_path.with_suffix(".c").resolve() with open(raw_path, "w") as main_file: emit_main_common_includes(main_file) - for k in input_list_map: - emit_main_network_definition(main_file, k) + if interface_api == "c": + for mod_name in input_map: + emit_main_micro_include(main_file, mod_name) emit_main_prologue(main_file, workspace_bytes) - - for k in input_list_map: - emit_main_data(main_file, input_list_map[k], output_list_map[k], k) - + for mod_name in input_map: + emit_main_data(main_file, input_map[mod_name], output_list_map[mod_name], mod_name) emit_main_init_memory_manager(main_file) - for k in input_list_map: - emit_main_run(main_file, input_list_map[k], output_list_map[k], k) - - for k in input_list_map: - emit_main_compare(main_file, output_list_map[k], k) - + if interface_api == "c": + for mod_name in input_map: + emit_main_data_structs( + main_file, input_map[mod_name], output_list_map[mod_name], mod_name + ) + emit_main_c_interface_call(main_file, mod_name) + else: + emit_main_fake_packed_values(main_file) + for mod_name in input_map: + emit_main_data_setup( + main_file, input_map[mod_name], output_list_map[mod_name], mod_name + ) + emit_main_packed_call( + main_file, input_map[mod_name], output_list_map[mod_name], mod_name + ) + + for mod_name in input_map: + emit_main_compare(main_file, output_list_map[mod_name], mod_name) emit_main_epilogue(main_file) @@ -258,19 +360,22 @@ def extract_main_workspace_sizebytes(extract_dir): def compile_and_run( mod, - input_list, + inputs, output_list, - target_options, + interface_api, + use_unpacked_api, use_calculated_workspaces, params=None, workspace_byte_alignment=8, - mod_name=None, + mod_name="default", enable_op_fusion=True, ): """ This method verifies the generated source """ - target = f"c -runtime=c --link-params --executor=aot --workspace-byte-alignment={workspace_byte_alignment} {target_options}" + base_target = "c -runtime=c --link-params --executor=aot" + extra_target = f"--workspace-byte-alignment={workspace_byte_alignment} --interface-api={interface_api} --unpacked-api={int(use_unpacked_api)}" + target = f"{base_target} {extra_target}" cflags = f"-DTVM_RUNTIME_ALLOC_ALIGNMENT_BYTES={workspace_byte_alignment} " # The calculated workspaces will not account for stack allocator tags used for debugging @@ -300,8 +405,8 @@ def compile_and_run( else: workspace_bytes = 16384 * 1024 - for i in range(len(input_list)): - create_header_file((f'{mangle_name(mod_name, "input_data")}{i}'), input_list[i], build_path) + for key in inputs: + create_header_file(f'{mangle_name(mod_name, "input_data")}_{key}', inputs[key], build_path) for i in range(len(output_list)): create_header_file( @@ -314,16 +419,23 @@ def compile_and_run( ) create_main( - "test.c", {mod_name: input_list}, {mod_name: output_list}, build_path, workspace_bytes + "test.c", + {mod_name: inputs}, + {mod_name: output_list}, + build_path, + interface_api, + workspace_bytes, ) # Verify that compiles fine file_dir = os.path.dirname(os.path.abspath(__file__)) + codegen_path = os.path.join(base_path, "codegen") makefile = os.path.join(file_dir, "aot_test.mk") make_cmd = ( f"make CFLAGS='{cflags}' -f {makefile} build_dir=" + build_path + f" TVM_ROOT={file_dir}/../../../.." + + f" CODEGEN_ROOT={codegen_path}" ) compile_log_path = os.path.join(build_path, "test_compile.log") @@ -337,12 +449,21 @@ def compile_and_run( def compile_and_run_multiple_models( - mod_map, input_list_map, output_list_map, target_options, param_map + mod_map, + input_list_map, + output_list_map, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + param_map, + workspace_byte_alignment=8, ): """ This method verifies the generated source """ - target = f"c -runtime=c --link-params --executor=aot {target_options}" + base_target = "c -runtime=c --link-params --executor=aot" + extra_target = f"--workspace-byte-alignment={workspace_byte_alignment} --interface-api={interface_api} --unpacked-api={int(use_unpacked_api)}" + target = f"{base_target} {extra_target}" tmp_path = utils.tempdir() tmp_dir = tmp_path.temp_dir @@ -364,9 +485,9 @@ def compile_and_run_multiple_models( input_list = input_list_map[mod_name] output_list = output_list_map[mod_name] - for i in range(len(input_list_map[mod_name])): + for key in input_list: create_header_file( - (f'{mangle_name(mod_name,"input_data")}{i}'), input_list[i], build_path + (f'{mangle_name(mod_name,"input_data")}_{key}'), input_list[key], build_path ) for i in range(len(output_list_map[mod_name])): @@ -379,12 +500,25 @@ def compile_and_run_multiple_models( (f'{mangle_name(mod_name,"expected_output_data")}{i}'), output_list[i], build_path ) - create_main("test.c", input_list_map, output_list_map, build_path, workspace_bytes=16384 * 1024) + create_main( + "test.c", + input_list_map, + output_list_map, + build_path, + interface_api, + workspace_bytes=16384 * 1024, + ) # Verify that compiles fine file_dir = os.path.dirname(os.path.abspath(__file__)) + codegen_path = os.path.join(base_path, "codegen") makefile = os.path.join(file_dir, "aot_test.mk") - make_cmd = f"make -f {makefile} build_dir=" + build_path + f" TVM_ROOT={file_dir}/../../../.." + make_cmd = ( + f"make -f {makefile} build_dir=" + + build_path + + f" TVM_ROOT={file_dir}/../../../.." + + f" CODEGEN_ROOT={codegen_path}" + ) compile_log_path = os.path.join(build_path, "test_compile.log") ret = subprocess_with_stdout_and_log(make_cmd, ".", compile_log_path, False) diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py index 13cbfa71b6ae..26eca2688436 100644 --- a/tests/python/relay/aot/test_crt_aot.py +++ b/tests/python/relay/aot/test_crt_aot.py @@ -15,37 +15,48 @@ # specific language governing permissions and limitations # under the License. -import os -import io -import struct +from collections import OrderedDict + import numpy as np -import pathlib -import shutil -import subprocess -import tempfile -import tarfile import pytest import tvm from tvm import relay -from tvm.relay import transform -from tvm.relay.op.contrib import get_pattern_table -from tvm.contrib import utils -from tvm.relay.backend import compile_engine -from tvm.contrib import utils -from tvm.contrib import graph_executor -from tvm.micro import export_model_library_format -from tvm.relay import testing +from tvm.relay import testing, transform from tvm.relay.op.annotation import compiler_begin, compiler_end -from tvm.contrib import utils from tvm.relay.expr_functor import ExprMutator +from aot_test_utils import ( + generate_ref_data, + convert_to_relay, + compile_and_run, + compile_and_run_multiple_models, + parametrize_aot_options, +) -from aot_test_utils import * +def test_error_c_interface_with_packed_api(): + interface_api = "c" + use_unpacked_api = False + use_calculated_workspaces = True -@pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) -def test_conv_with_params(use_calculated_workspaces, target_options): + two = relay.add(relay.const(1), relay.const(1)) + func = relay.Function([], two) + output_list = generate_ref_data(func, {}) + input_list = [] + + with pytest.raises(tvm.TVMError, match="Packed interface required for packed operators"): + compile_and_run( + func, + input_list, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + ) + + +@parametrize_aot_options +def test_conv_with_params(interface_api, use_unpacked_api, use_calculated_workspaces): RELAY_MODEL = """ #[version = "0.0.5"] def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5), int8]) { @@ -73,13 +84,19 @@ def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5), inputs = {"data": input_data} output_list = generate_ref_data(mod, inputs, params) - input_list = [input_data] - compile_and_run(mod, input_list, output_list, target_options, use_calculated_workspaces, params) + compile_and_run( + mod, + inputs, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + params, + ) -@pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) -def test_add_with_params(use_calculated_workspaces, target_options): +@parametrize_aot_options +def test_add_with_params(interface_api, use_unpacked_api, use_calculated_workspaces): x = relay.var("x", shape=(1, 10)) y = relay.var("y", shape=(1, 10)) z = relay.add(x, y) @@ -92,15 +109,19 @@ def test_add_with_params(use_calculated_workspaces, target_options): inputs = {"y": y_in} output_list = generate_ref_data(func, inputs, params) - input_list = [y_in] compile_and_run( - func, input_list, output_list, target_options, use_calculated_workspaces, params + func, + inputs, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + params, ) -@pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) -def test_conv2d(use_calculated_workspaces, target_options): +@parametrize_aot_options +def test_conv2d(use_calculated_workspaces, interface_api, use_unpacked_api): """Test a subgraph with a single conv2d operator.""" def conv2d_direct(): @@ -119,7 +140,8 @@ def conv2d_direct(): i_data = np.random.uniform(0, 1, ishape).astype(dtype) w1_data = np.random.uniform(0, 1, w1shape).astype(dtype) - return mod, {"data": i_data, "weight": w1_data}, (1, 32, 14, 14) + inputs = OrderedDict([("data", i_data), ("weight", w1_data)]) + return mod, inputs, (1, 32, 14, 14) def group_conv2d(): dtype = "float32" @@ -137,17 +159,23 @@ def group_conv2d(): i_data = np.random.uniform(0, 1, ishape).astype(dtype) w_data = np.random.uniform(0, 1, w2shape).astype(dtype) - return mod, {"data": i_data, "weight": w_data}, (1, 32, 14, 14) + inputs = OrderedDict([("data", i_data), ("weight", w_data)]) + return mod, inputs, (1, 32, 14, 14) for mod, inputs, out_shape in [conv2d_direct(), group_conv2d()]: output_list = generate_ref_data(mod, inputs) - input_list = [inputs["data"], inputs["weight"]] - compile_and_run(mod, input_list, output_list, target_options, use_calculated_workspaces) - - -@pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) -def test_concatenate(use_calculated_workspaces, target_options): + compile_and_run( + mod, + inputs, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + ) + + +@parametrize_aot_options +def test_concatenate(interface_api, use_unpacked_api, use_calculated_workspaces): dtype = "float32" x = relay.var("x", shape=(10, 5), dtype=dtype) y = relay.var("y", shape=(10, 5), dtype=dtype) @@ -159,16 +187,21 @@ def test_concatenate(use_calculated_workspaces, target_options): x_data = np.random.rand(10, 5).astype(dtype) y_data = np.random.rand(10, 5).astype(dtype) t_data = np.random.uniform(size=()).astype(dtype) - inputs = {"x": x_data, "y": y_data, "z": t_data} + inputs = OrderedDict([("x", x_data), ("y", y_data), ("z", t_data)]) output_list = generate_ref_data(func, inputs) - input_list = [inputs["x"], inputs["y"], inputs["z"]] - compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces) + compile_and_run( + func, + inputs, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + ) -@pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) -def test_nested_tuples(use_calculated_workspaces, target_options): +@parametrize_aot_options +def test_nested_tuples(interface_api, use_unpacked_api, use_calculated_workspaces): x = relay.var("x", shape=(10,)) x1 = x + relay.const(1.0) x2 = x1 + relay.const(1.0) @@ -180,71 +213,109 @@ def test_nested_tuples(use_calculated_workspaces, target_options): x_data = np.random.uniform(size=(10,)).astype(np.float32) inputs = {"x": x_data} output_list = generate_ref_data(func, inputs) - input_list = [x_data] - compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces) + compile_and_run( + func, + inputs, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + ) -@pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) -def test_tuple_getitem(use_calculated_workspaces, target_options): + +@parametrize_aot_options +def test_tuple_getitem(interface_api, use_unpacked_api, use_calculated_workspaces): func = relay.Function([], relay.TupleGetItem(relay.Tuple([relay.const(1), relay.const(2)]), 0)) output_list = generate_ref_data(func, {}) - input_list = [] - compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces) + inputs = {} + + compile_and_run( + func, + inputs, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + ) -@pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) -def test_id(use_calculated_workspaces, target_options): +@parametrize_aot_options +def test_id(interface_api, use_unpacked_api, use_calculated_workspaces): x = relay.var("x", "float32") ident = relay.Function([x], x) one = np.array(1.0, "float32") inputs = {"x": one} output_list = generate_ref_data(ident, inputs) - input_list = [one] - compile_and_run(ident, input_list, output_list, target_options, use_calculated_workspaces) + compile_and_run( + ident, + inputs, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + ) -@pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) -def test_add_const(use_calculated_workspaces, target_options): + +@parametrize_aot_options +def test_add_const(interface_api, use_unpacked_api, use_calculated_workspaces): two = relay.add(relay.const(1), relay.const(1)) func = relay.Function([], two) output_list = generate_ref_data(func, {}) - input_list = [] - compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces) + inputs = {} + compile_and_run( + func, + inputs, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + ) -@pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) -def test_mul_param(use_calculated_workspaces, target_options): + +@parametrize_aot_options +def test_mul_param(interface_api, use_unpacked_api, use_calculated_workspaces): x = relay.var("x", shape=(10, 10)) y = relay.var("y", shape=(1, 10)) func = relay.Function([x, y], relay.multiply(x, y)) x_data = np.random.rand(10, 10).astype("float32") y_data = np.random.rand(1, 10).astype("float32") - inputs = {"x": x_data, "y": y_data} + + inputs = OrderedDict([("x", x_data), ("y", y_data)]) output_list = generate_ref_data(func, inputs) - input_list = [inputs["x"], inputs["y"]] - compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces) + compile_and_run( + func, + inputs, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + ) -@pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) -def test_subtract(use_calculated_workspaces, target_options): + +@parametrize_aot_options +def test_subtract(interface_api, use_unpacked_api, use_calculated_workspaces): i = relay.var("i", shape=[], dtype="int32") sub = relay.subtract(i, relay.const(1, dtype="int32")) func = relay.Function([i], sub, ret_type=relay.TensorType([], "int32")) i_data = np.array(1, dtype="int32") inputs = {"i": i_data} output_list = generate_ref_data(func, inputs) - input_list = [inputs["i"]] - compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces) + compile_and_run( + func, + inputs, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + ) -@pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) -def test_tuple_output(use_calculated_workspaces, target_options): +@parametrize_aot_options +def test_tuple_output(interface_api, use_unpacked_api, use_calculated_workspaces): x = relay.var("x", shape=(6, 9)) y = relay.split(x, 3).astuple() a = relay.TupleGetItem(y, 0) @@ -255,29 +326,34 @@ def test_tuple_output(use_calculated_workspaces, target_options): x_data = np.random.rand(6, 9).astype("float32") inputs = {"x": x_data} output_list = generate_ref_data(func, inputs) - input_list = [inputs["x"]] - compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces) + compile_and_run( + func, + inputs, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + ) @pytest.mark.parametrize( - "use_calculated_workspaces_and_alignment", [(True, 1), (True, 16), (False, 1)] + ["use_calculated_workspaces", "workspace_byte_alignment"], [(True, 1), (True, 16), (False, 1)] ) -@pytest.mark.parametrize("target_options", ["--unpacked-api"]) -def test_mobilenet(use_calculated_workspaces_and_alignment, target_options): - use_calculated_workspaces = use_calculated_workspaces_and_alignment[0] - workspace_byte_alignment = use_calculated_workspaces_and_alignment[1] +def test_mobilenet(use_calculated_workspaces, workspace_byte_alignment): + use_unpacked_api = True + interface_api = "c" mod, params = testing.mobilenet.get_workload(batch_size=1) data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape] data = np.random.uniform(size=data_shape).astype("float32") inputs = {"data": data} output_list = generate_ref_data(mod, inputs, params) - input_list = [inputs["data"]] compile_and_run( mod, - input_list, + inputs, output_list, - target_options, + interface_api, + use_unpacked_api, use_calculated_workspaces, params, workspace_byte_alignment, @@ -339,9 +415,11 @@ def visit_call(self, call): @pytest.mark.parametrize("use_calculated_workspaces", [True, False]) -@pytest.mark.parametrize("target_options", [""]) -def test_byoc_microtvm(use_calculated_workspaces, target_options): +def test_byoc_microtvm(use_calculated_workspaces): """This is a simple test case to check BYOC capabilities of AOT""" + use_unpacked_api = False + interface_api = "packed" + x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) w1 = relay.var("w1", shape=(10, 10)) @@ -379,18 +457,23 @@ def test_byoc_microtvm(use_calculated_workspaces, target_options): for _ in range(8): w_data.append(np.random.rand(10, 10).astype("float32")) - map_inputs = {"w{}".format(i): w_data[i] for i in range(8)} - map_inputs["x"] = x_data + map_inputs = OrderedDict([("x", x_data)] + [("w{}".format(i), w_data[i]) for i in range(8)]) output_list = generate_ref_data(mod, map_inputs) input_list = [map_inputs["x"]] input_list.extend([map_inputs["w{}".format(i)] for i in range(8)]) compile_and_run( - mod, input_list, output_list, target_options, use_calculated_workspaces, mod_name="my_mod" + mod, + map_inputs, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + mod_name="my_mod", ) -@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) -def test_add_name_mangling_with_params(target_options): +@parametrize_aot_options +def test_add_name_mangling_with_params(interface_api, use_unpacked_api, use_calculated_workspaces): x = relay.var("x", shape=(1, 10)) y = relay.var("y", shape=(1, 10)) z = relay.add(x, y) @@ -403,27 +486,26 @@ def test_add_name_mangling_with_params(target_options): inputs = {"y": y_in} output_list = generate_ref_data(func, inputs, params) - input_list = [y_in] compile_and_run( func, - input_list, + inputs, output_list, - target_options, - use_calculated_workspaces=False, + interface_api, + use_unpacked_api, + use_calculated_workspaces, params=params, mod_name="my_mod", ) -@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) -def test_multiple_models(target_options): +@parametrize_aot_options +def test_multiple_models(interface_api, use_unpacked_api, use_calculated_workspaces): # Identity model without params x = relay.var("x", "float32") mod1 = relay.Function([x], x) one = np.array(1.0, "float32") inputs1 = {"x": one} output_list1 = generate_ref_data(mod1, inputs1) - input_list1 = [one] params1 = None # Convolution model @@ -453,15 +535,20 @@ def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5), params2 = {"weight": weight_data} inputs2 = {"data": input_data} output_list2 = generate_ref_data(mod2, inputs2, params2) - input_list2 = [input_data] - input_list_map = {"mod1": input_list1, "mod2": input_list2} + input_list_map = {"mod1": inputs1, "mod2": inputs2} output_list_map = {"mod1": output_list1, "mod2": output_list2} mod_map = {"mod1": mod1, "mod2": mod2} param_map = {"mod1": params1, "mod2": params2} compile_and_run_multiple_models( - mod_map, input_list_map, output_list_map, target_options, param_map + mod_map, + input_list_map, + output_list_map, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + param_map, ) @@ -473,6 +560,10 @@ def test_quant_mobilenet_tfl(): import tvm.relay.testing.tf as tf_testing + interface_api = "packed" + use_unpacked_api = False + use_calculated_workspaces = True + tflite_model_file = tf_testing.get_workload_official( "https://storage.googleapis.com/download.tensorflow.org/" "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz", @@ -486,12 +577,19 @@ def test_quant_mobilenet_tfl(): mod, params = convert_to_relay(tflite_model_buf, data, "input") inputs = {"input": data} output_list = generate_ref_data(mod, inputs, params) - input_list = [inputs["input"]] - compile_and_run(mod, input_list, output_list, "--unpacked-api=0", True, params) + compile_and_run( + mod, + inputs, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + params=params, + ) -@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"]) -def test_transpose(target_options): +@parametrize_aot_options +def test_transpose(interface_api, use_unpacked_api, use_calculated_workspaces): """Test that non-inpleaceable operations (e.g., transpose) do not happen in-place.""" dtype = "float32" @@ -506,11 +604,18 @@ def test_transpose(target_options): x_data = np.random.rand(10, 5).astype(dtype) y_data = np.random.rand(10, 5).astype(dtype) t_data = np.random.uniform(size=()).astype(dtype) - inputs = {"x": x_data, "y": y_data, "z": t_data} + inputs = {"x": x_data, "y": y_data, "z": t_data} output_list = generate_ref_data(func, inputs) - input_list = [inputs["x"], inputs["y"], inputs["z"]] - compile_and_run(func, input_list, output_list, target_options, True, enable_op_fusion=False) + compile_and_run( + func, + inputs, + output_list, + interface_api, + use_unpacked_api, + use_calculated_workspaces, + enable_op_fusion=False, + ) if __name__ == "__main__": diff --git a/tests/python/unittest/test_micro_model_library_format.py b/tests/python/unittest/test_micro_model_library_format.py index a15e37925eea..5a32385632fc 100644 --- a/tests/python/unittest/test_micro_model_library_format.py +++ b/tests/python/unittest/test_micro_model_library_format.py @@ -102,14 +102,20 @@ def validate_graph_json(extract_dir, factory): @tvm.testing.requires_micro @pytest.mark.parametrize( - "target", + "executor,target,should_generate_interface", [ - ("graph", tvm.target.target.micro("host")), - ("aot", tvm.target.target.micro("host", options="-executor=aot")), + ("graph", tvm.target.target.micro("host"), False), + ("aot", tvm.target.target.micro("host", options="-executor=aot"), False), + ( + "aot", + tvm.target.target.micro( + "host", options="-executor=aot --unpacked-api=1 --interface-api=c" + ), + True, + ), ], ) -def test_export_model_library_format_c(target): - executor, _target = target +def test_export_model_library_format_c(executor, target, should_generate_interface): with utils.TempDirectory.set_keep_for_debug(True): with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): relay_mod = tvm.parser.fromtext( @@ -122,8 +128,8 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[ ) factory = tvm.relay.build( relay_mod, - _target, - target_host=_target, + target, + target_host=target, mod_name="add", params={"c": numpy.array([[2.0, 4.0]], dtype="float32")}, ) @@ -147,7 +153,7 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[ metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ" ) assert (datetime.datetime.now() - export_datetime) < datetime.timedelta(seconds=60 * 5) - assert metadata["target"] == {"1": str(_target)} + assert metadata["target"] == {"1": str(target)} if executor == "graph": assert metadata["memory"]["sids"] == [ {"storage_id": 0, "size_bytes": 2, "input_binding": "a"}, @@ -173,6 +179,9 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[ assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "add_lib0.c")) assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "add_lib1.c")) + assert should_generate_interface == os.path.exists( + os.path.join(extract_dir, "codegen", "host", "include", "tvmgen_add.h") + ) if executor == "graph": validate_graph_json(extract_dir, factory) @@ -265,13 +274,9 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[ @tvm.testing.requires_micro @pytest.mark.parametrize( "target", - [ - ("graph", tvm.target.target.micro("host")), - ("aot", tvm.target.target.micro("host", options="-executor=aot")), - ], + [tvm.target.target.micro("host"), tvm.target.target.micro("host", options="-executor=aot")], ) def test_export_model_library_format_workspace(target): - executor, _target = target with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): relay_mod = tvm.parser.fromtext( """ @@ -285,7 +290,7 @@ def @main(%p0: Tensor[(1, 56, 56, 128), int16], %p1: Tensor[(3, 3, 128, 1), int1 } """ ) - factory = tvm.relay.build(relay_mod, _target, target_host=_target, mod_name="qnn_conv2d") + factory = tvm.relay.build(relay_mod, target, target_host=target, mod_name="qnn_conv2d") temp_dir = utils.tempdir() mlf_tar_path = temp_dir.relpath("lib.tar") @@ -306,7 +311,7 @@ def @main(%p0: Tensor[(1, 56, 56, 128), int16], %p1: Tensor[(3, 3, 128, 1), int1 metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ" ) assert (datetime.datetime.now() - export_datetime) < datetime.timedelta(seconds=60 * 5) - assert metadata["target"] == {"1": str(_target)} + assert metadata["target"] == {"1": str(target)} assert metadata["memory"]["functions"]["main"] == [ { "constants_size_bytes": 0, @@ -327,9 +332,6 @@ def @main(%p0: Tensor[(1, 56, 56, 128), int16], %p1: Tensor[(3, 3, 128, 1), int1 @tvm.testing.requires_micro def test_export_non_dso_exportable(): module = tvm.support.FrontendTestModule() - factory = executor_factory.GraphExecutorFactoryModule( - None, tvm.target.target.micro("host"), '"graph_json"', module, "test_module", {}, {} - ) temp_dir = utils.tempdir() import tvm.micro as micro