diff --git a/apps/microtvm/arduino/template_project/src/example_project/model.c b/apps/microtvm/arduino/template_project/src/example_project/model.c index 553665191b14..25d609dacce1 100644 --- a/apps/microtvm/arduino/template_project/src/example_project/model.c +++ b/apps/microtvm/arduino/template_project/src/example_project/model.c @@ -86,7 +86,7 @@ tvm_crt_error_t TVMPlatformGenerateRandom(uint8_t* buffer, size_t num_bytes) { void TVMInitialize() { StackMemoryManager_Init(&app_workspace, g_aot_memory, WORKSPACE_SIZE); } void TVMExecute(void* input_data, void* output_data) { - int ret_val = tvmgen_default_run_model(input_data, output_data); + int ret_val = tvmgen_default___tvm_main__(input_data, output_data); if (ret_val != 0) { TVMPlatformAbort(kTvmErrorPlatformCheckFailure); } diff --git a/apps/microtvm/zephyr_cmsisnn/src/main.c b/apps/microtvm/zephyr_cmsisnn/src/main.c index 274bd63d3ea5..31f6cd0cc1d0 100644 --- a/apps/microtvm/zephyr_cmsisnn/src/main.c +++ b/apps/microtvm/zephyr_cmsisnn/src/main.c @@ -34,7 +34,7 @@ extern float output_storage[12]; extern const size_t output_len; -static uint8_t g_crt_workspace[TVMGEN_DEFAULT_WORKSPACE_SIZE + 512]; +static uint8_t g_crt_workspace[TVMGEN_DEFAULT_WORKSPACE_SIZE]; tvm_workspace_t app_workspace; void TVMLogf(const char* msg, ...) { diff --git a/include/tvm/runtime/module.h b/include/tvm/runtime/module.h index 7b5326a44921..2e2a79b1ca53 100644 --- a/include/tvm/runtime/module.h +++ b/include/tvm/runtime/module.h @@ -235,8 +235,6 @@ constexpr const char* tvm_module_main = "__tvm_main__"; constexpr const char* tvm_param_prefix = "__tvm_param__"; /*! \brief A PackedFunc that looks up linked parameters by storage_id. */ constexpr const char* tvm_lookup_linked_param = "_lookup_linked_param"; -/*! \brief The main AOT executor function generated from TIR */ -constexpr const char* tvm_run_func_suffix = "run_model"; /*! \brief Model entrypoint generated as an interface to the AOT function outside of TIR */ constexpr const char* tvm_entrypoint_suffix = "run"; } // namespace symbol diff --git a/src/relay/backend/aot_executor_codegen.cc b/src/relay/backend/aot_executor_codegen.cc index 2168ea74a0ff..a25ef458906c 100644 --- a/src/relay/backend/aot_executor_codegen.cc +++ b/src/relay/backend/aot_executor_codegen.cc @@ -658,8 +658,7 @@ class AOTExecutorCodegen : public MixedModeVisitor { // Define the PrimFunc attributes Map dict_attrs; - String run_func_name = - runtime::get_name_mangled(mod_name, runtime::symbol::tvm_run_func_suffix); + String run_func_name = runtime::get_name_mangled(mod_name, runtime::symbol::tvm_module_main); dict_attrs.Set("global_symbol", run_func_name); dict_attrs.Set("runner_function", Bool(true)); dict_attrs.Set(tvm::attr::kTarget, target_host_); @@ -702,6 +701,35 @@ class AOTExecutorCodegen : public MixedModeVisitor { } } + /*! + * brief Calculate workspace sizes for PrimFuncs in the IRModule + */ + Map CalculateWorkspaceSizes( + const IRModule& lowered_mod, const Map& function_metadata) { + Executor executor_config = lowered_mod->GetAttr(tvm::attr::kExecutor).value(); + Integer workspace_byte_alignment = + executor_config->GetAttr("workspace-byte-alignment").value_or(16); + Map updated_function_metadata; + for (const auto& kv : lowered_mod->functions) { + GlobalVar global_var = kv.first; + BaseFunc base_func = kv.second; + if (base_func->IsInstance()) { + tir::PrimFunc pfunc = Downcast(base_func); + Target tgt = pfunc->GetAttr(tvm::attr::kTarget).value(); + const auto& ws = CalculateWorkspaceBytes(pfunc, workspace_byte_alignment); + if (function_metadata.count(global_var->name_hint)) { + updated_function_metadata.Set(global_var->name_hint, + function_metadata[global_var->name_hint]); + updated_function_metadata[global_var->name_hint]->workspace_sizes.Set(tgt, ws); + } else { + FunctionInfo finfo{{{tgt, ws}}, {}, {}, {{tgt, pfunc}}, {}}; + updated_function_metadata.Set(global_var->name_hint, finfo); + } + } + } + return updated_function_metadata; + } + /*! * brief Run USMP to plan memory for lowered IRModule */ @@ -710,17 +738,8 @@ class AOTExecutorCodegen : public MixedModeVisitor { Integer workspace_byte_alignment = executor_config->GetAttr("workspace-byte-alignment").value_or(16); IRModule lowered_mod = mod->ShallowCopy(); + function_metadata_ = CalculateWorkspaceSizes(lowered_mod, function_metadata_); lowered_mod = tir::transform::UnifiedStaticMemoryPlanner()(lowered_mod); - // Update workspace size based on the pool allocations. - for (const auto& kv : function_metadata_) { - if (lowered_mod->ContainGlobalVar(kv.first) && - lowered_mod->Lookup(kv.first)->IsInstance()) { - tir::PrimFunc pfunc = Downcast(lowered_mod->Lookup(kv.first)); - Target tgt = pfunc->GetAttr(tvm::attr::kTarget).value(); - const auto& ws = CalculateWorkspaceBytes(pfunc, workspace_byte_alignment); - kv.second->workspace_sizes.Set(tgt, ws); - } - } Optional> allocated_pool_infos = lowered_mod->GetAttr>(tvm::attr::kPoolArgs); backend::FunctionInfo main_func_info = @@ -752,17 +771,18 @@ class AOTExecutorCodegen : public MixedModeVisitor { Integer workspace_byte_alignment = executor_config->GetAttr("workspace-byte-alignment").value_or(16); IRModule lowered_mod = mod->ShallowCopy(); + function_metadata_ = CalculateWorkspaceSizes(lowered_mod, function_metadata_); // Running StorageRewrite just on the main function tir::PrimFunc tir_main_func = - Downcast(lowered_mod->Lookup(::tvm::runtime::symbol::tvm_run_func_suffix)); + Downcast(lowered_mod->Lookup(::tvm::runtime::symbol::tvm_module_main)); IRModule main_func_mod; - main_func_mod->Update(lowered_mod->GetGlobalVar(::tvm::runtime::symbol::tvm_run_func_suffix), + main_func_mod->Update(lowered_mod->GetGlobalVar(::tvm::runtime::symbol::tvm_module_main), tir_main_func); main_func_mod = tir::transform::StorageRewrite()(main_func_mod); - lowered_mod->Update(lowered_mod->GetGlobalVar(::tvm::runtime::symbol::tvm_run_func_suffix), - main_func_mod->Lookup(::tvm::runtime::symbol::tvm_run_func_suffix)); + lowered_mod->Update(lowered_mod->GetGlobalVar(::tvm::runtime::symbol::tvm_module_main), + main_func_mod->Lookup(::tvm::runtime::symbol::tvm_module_main)); tir_main_func = - Downcast(lowered_mod->Lookup(::tvm::runtime::symbol::tvm_run_func_suffix)); + Downcast(lowered_mod->Lookup(::tvm::runtime::symbol::tvm_module_main)); // Use the PrimFunc to calculate the workspace required to service the allocates Integer main_workspace_size_bytes = CalculateWorkspaceBytes(tir_main_func, workspace_byte_alignment); @@ -920,7 +940,7 @@ class AOTExecutorCodegen : public MixedModeVisitor { // function and replacing it with its TIR version. We should try to make this a Pass. lowered_mod->Remove(lowered_mod->GetGlobalVar("main")); auto prim_func = CreateMainFunc(mod_name, lowered_main_func->params.size()); - lowered_mod->Update(GlobalVar(::tvm::runtime::symbol::tvm_run_func_suffix), prim_func); + lowered_mod->Update(GlobalVar(::tvm::runtime::symbol::tvm_module_main), prim_func); // Parallel for loops are not supported in AoT codegen. lowered_mod = tir::transform::ConvertForLoopsToSerial()(lowered_mod); @@ -960,7 +980,7 @@ class AOTExecutorCodegen : public MixedModeVisitor { Map pool_var_info; std::vector pool_vars; tir::PrimFunc tir_main_func = - Downcast(lowered_mod->Lookup(::tvm::runtime::symbol::tvm_run_func_suffix)); + Downcast(lowered_mod->Lookup(::tvm::runtime::symbol::tvm_module_main)); Optional> allocated_pool_infos = tir_main_func->GetAttr>(tvm::attr::kPoolArgs); if (allocated_pool_infos) { diff --git a/src/target/source/source_module.cc b/src/target/source/source_module.cc index 907eb061303f..97461ca2091f 100644 --- a/src/target/source/source_module.cc +++ b/src/target/source/source_module.cc @@ -474,7 +474,7 @@ class CSourceCrtMetadataModuleNode : public runtime::ModuleNode { } void GenerateAOTDescriptor() { - const std::string run_func_suffix = ::tvm::runtime::symbol::tvm_run_func_suffix; + const std::string run_func_suffix = ::tvm::runtime::symbol::tvm_module_main; const std::string tvm_entrypoint_suffix = ::tvm::runtime::symbol::tvm_entrypoint_suffix; const std::string run_func_mangled = runtime::get_name_mangled(metadata_->mod_name, run_func_suffix); diff --git a/src/tir/usmp/transform/assign_pool_info.cc b/src/tir/usmp/transform/assign_pool_info.cc index 9d8e36137c37..a2304f3b9e3d 100644 --- a/src/tir/usmp/transform/assign_pool_info.cc +++ b/src/tir/usmp/transform/assign_pool_info.cc @@ -42,7 +42,7 @@ class PoolInfoAssigner : public StmtExprMutator { public: explicit PoolInfoAssigner(const IRModule& module) { PrimFunc main_func = - Downcast(module->Lookup(::tvm::runtime::symbol::tvm_run_func_suffix)); + Downcast(module->Lookup(::tvm::runtime::symbol::tvm_module_main)); ICHECK(main_func.defined()) << "main function is not in the module"; Optional target_host = main_func->GetAttr(tvm::attr::kTarget); ICHECK(target_host) << "main function does not have a target attr"; @@ -79,7 +79,7 @@ class PoolInfoAssigner : public StmtExprMutator { PoolInfo PoolInfoAssigner::CreateDefaultMemoryPool(const tvm::IRModule& module) { Map target_access; tir::PrimFunc tir_main_func = - Downcast(module->Lookup(::tvm::runtime::symbol::tvm_run_func_suffix)); + Downcast(module->Lookup(::tvm::runtime::symbol::tvm_module_main)); Target target_host = tir_main_func->GetAttr(tvm::attr::kTarget).value(); for (const auto& kv : module->functions) { BaseFunc func = kv.second; diff --git a/src/tir/usmp/transform/convert_pool_allocations_to_offsets.cc b/src/tir/usmp/transform/convert_pool_allocations_to_offsets.cc index 999ca37d2128..6abc48c31be0 100644 --- a/src/tir/usmp/transform/convert_pool_allocations_to_offsets.cc +++ b/src/tir/usmp/transform/convert_pool_allocations_to_offsets.cc @@ -331,7 +331,7 @@ PrimExpr PoolAllocationToOffsetConverter::VisitExpr_(const LoadNode* op) { } IRModule PoolAllocationToOffsetConverter::operator()() { - GlobalVar gv = module_->GetGlobalVar(::tvm::runtime::symbol::tvm_run_func_suffix); + GlobalVar gv = module_->GetGlobalVar(::tvm::runtime::symbol::tvm_module_main); PrimFunc main_func = Downcast(module_->Lookup(gv)); ScopeInfo si = UpdateFunctionScopeInfo(main_func); this->scope_stack.push(si); diff --git a/src/tir/usmp/unified_static_memory_planner.cc b/src/tir/usmp/unified_static_memory_planner.cc index 3b941d3cc021..e848440f029e 100644 --- a/src/tir/usmp/unified_static_memory_planner.cc +++ b/src/tir/usmp/unified_static_memory_planner.cc @@ -51,7 +51,7 @@ static std::unordered_map( IRModule PlanMemory(const IRModule& mod, String algo) { VLOG(1) << "workspace required = " << CalculateModuleWorkspaceSize(mod); - PrimFunc main_func = Downcast(mod->Lookup(::tvm::runtime::symbol::tvm_run_func_suffix)); + PrimFunc main_func = Downcast(mod->Lookup(::tvm::runtime::symbol::tvm_module_main)); BufferInfoAnalysis buffer_info_analysis = ExtractBufferInfo(main_func, mod); Array buffer_info_arr = CreateArrayBufferInfo(buffer_info_analysis->buffer_info_stmts); @@ -63,7 +63,7 @@ IRModule PlanMemory(const IRModule& mod, String algo) { buffer_info_analysis->buffer_info_stmts, buffer_info_pool_allocations); IRModule ret = transform::ConvertPoolAllocationsToOffsets(stmt_pool_allocations)(mod); tir::PrimFunc tir_main_func = - Downcast(ret->Lookup(::tvm::runtime::symbol::tvm_run_func_suffix)); + Downcast(ret->Lookup(::tvm::runtime::symbol::tvm_module_main)); Optional> allocated_pool_infos = tir_main_func->GetAttr>(tvm::attr::kPoolArgs); if (allocated_pool_infos) { diff --git a/src/tir/usmp/utils.cc b/src/tir/usmp/utils.cc index 5c95f7d7a7be..03fac325905c 100644 --- a/src/tir/usmp/utils.cc +++ b/src/tir/usmp/utils.cc @@ -181,7 +181,7 @@ class ModuleWorkspaceSizeCalculator : public StmtExprVisitor { for (const auto& gv_func : mod_->functions) { functions_.Set(gv_func.first->name_hint, Downcast(gv_func.second)); } - main_func_ = Downcast(module->Lookup(::tvm::runtime::symbol::tvm_run_func_suffix)); + main_func_ = Downcast(module->Lookup(::tvm::runtime::symbol::tvm_module_main)); ICHECK(main_func_.defined()) << "main function is not in the module"; Optional target_host = main_func_->GetAttr(tvm::attr::kTarget); ICHECK(target_host) << "main function does not have a target attr"; diff --git a/tests/python/contrib/test_ethosu/infra.py b/tests/python/contrib/test_ethosu/infra.py index 4bdaef7a74ca..b355c440e006 100644 --- a/tests/python/contrib/test_ethosu/infra.py +++ b/tests/python/contrib/test_ethosu/infra.py @@ -242,12 +242,13 @@ def build_source( def verify_source( models: List[AOTCompiledTestModel], accel="ethos-u55-256", + enable_usmp=True, ): """ This method verifies the generated source from an NPU module by building it and running on an FVP. """ interface_api = "c" - test_runner = create_test_runner(accel) + test_runner = create_test_runner(accel, enable_usmp) run_and_check( models, test_runner, diff --git a/tests/python/contrib/test_ethosu/test_networks.py b/tests/python/contrib/test_ethosu/test_networks.py index e9c6da5be18a..7e3140ff514a 100644 --- a/tests/python/contrib/test_ethosu/test_networks.py +++ b/tests/python/contrib/test_ethosu/test_networks.py @@ -71,7 +71,7 @@ def test_forward_mobilenet_v1(accel_type, enable_usmp): compiled_models = infra.build_source( mod, input_data, output_data, accel_type, output_tolerance=10, enable_usmp=enable_usmp ) - infra.verify_source(compiled_models, accel_type) + infra.verify_source(compiled_models, accel_type, enable_usmp=enable_usmp) if __name__ == "__main__": diff --git a/tests/python/relay/aot/aot_test_utils.py b/tests/python/relay/aot/aot_test_utils.py index b7021e5a8984..63817fc4b965 100644 --- a/tests/python/relay/aot/aot_test_utils.py +++ b/tests/python/relay/aot/aot_test_utils.py @@ -265,21 +265,29 @@ def emit_data_linkage(output_file, data_linkage): def emit_main_prologue( - main_file, custom_prologue, workspace_bytes, data_linkage, compiled_models, interface_api + main_file, + custom_prologue, + workspace_bytes, + data_linkage, + compiled_models, + interface_api, + use_stack_allocator=True, ): - # Add TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES because of memory alignment. - workspace_define = f"#define WORKSPACE_SIZE ({workspace_bytes}" - if interface_api == "c": - for compiled_model in compiled_models: - model = compiled_model.model - workspace_define += f" + TVMGEN_{model.name.upper()}_WORKSPACE_SIZE" - workspace_define += " + TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)\n" - main_file.write(workspace_define) - emit_data_linkage(main_file, data_linkage) - main_file.write("static uint8_t g_aot_memory[WORKSPACE_SIZE];\n") - main_file.write("tvm_workspace_t app_workspace;\n") - main_file.write( - """ + if use_stack_allocator: + workspace_define = f"#define WORKSPACE_SIZE ({workspace_bytes}" + if interface_api == "c": + for compiled_model in compiled_models: + model = compiled_model.model + workspace_define += f" + TVMGEN_{model.name.upper()}_WORKSPACE_SIZE" + # Add TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES because of memory alignment. + workspace_define += " + TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)\n" + main_file.write(workspace_define) + emit_data_linkage(main_file, data_linkage) + main_file.write("static uint8_t g_aot_memory[WORKSPACE_SIZE];\n") + main_file.write("tvm_workspace_t app_workspace;\n") + main_file.write( + """ + tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) { return StackMemoryManager_Allocate(&app_workspace, num_bytes, out_ptr); } @@ -287,7 +295,26 @@ def emit_main_prologue( tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) { return StackMemoryManager_Free(&app_workspace,ptr); } + """ + ) + else: + # An implementation is not needed for these if the stack allocator is not used + main_file.write( + """ + +tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) { + return kTvmErrorFunctionCallNotImplemented; +} +tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) { + return kTvmErrorFunctionCallNotImplemented; +} + + """ + ) + main_file.write( + """ + void TVMPlatformAbort(tvm_crt_error_t code) { exit(-1); } void TVMLogf(const char* msg, ...) { @@ -296,10 +323,10 @@ def emit_main_prologue( vfprintf(stdout, msg, args); va_end(args); } - + TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) {} int main(){\n -""" + """ ) main_file.write(custom_prologue) @@ -511,6 +538,7 @@ def create_main( data_linkage, interface_api, workspace_bytes, + use_stack_allocator=True, ): file_path = pathlib.Path(f"{output_path}/" + test_name).resolve() # create header file @@ -533,8 +561,10 @@ def create_main( data_linkage, compiled_models, interface_api, + use_stack_allocator, ) - emit_main_init_memory_manager(main_file) + if use_stack_allocator: + emit_main_init_memory_manager(main_file) if interface_api == "c": for compiled_model in compiled_models: @@ -709,11 +739,14 @@ def run_and_check( t = tarfile.open(tar_file) t.extractall(base_path) - workspace_bytes = model.extra_memory_in_bytes - use_usmp = runner.pass_config.get("tir.usmp.enable", False) - if interface_api == "packed" and not use_usmp: + # Interface C APIs does not need compiler generated + # workspace to generate the test application, because + # workspace size is codegen'd as a macro to + # tvmgen_.h. + if interface_api != "c": workspace_bytes += mlf_extract_workspace_size_bytes(tar_file) + workspace_bytes += model.extra_memory_in_bytes for key in model.inputs: sanitized_tensor_name = re.sub(r"\W", "_", key) create_header_file( @@ -738,6 +771,10 @@ def run_and_check( data_linkage, ) + use_usmp = runner.pass_config.get("tir.usmp.enable", False) + # We only need the stack allocator if USMP is not used + use_stack_allocator = not use_usmp + create_main( "test.c", models, @@ -748,6 +785,7 @@ def run_and_check( data_linkage, interface_api, workspace_bytes, + use_stack_allocator, ) # Verify that compiles fine @@ -868,3 +906,22 @@ def generate_ref_data(mod, input_data, params=None, target="llvm"): output_tensor_names = main.attrs["output_tensor_names"] return dict(zip(output_tensor_names, out)) + + +def create_relay_module_and_inputs_from_tflite_file(tflite_model_file): + """A helper function to create a Relay IRModule with inputs + and params from a tflite file""" + with open(tflite_model_file, "rb") as f: + tflite_model_buf = f.read() + mod, params = convert_to_relay(tflite_model_buf) + + inputs = dict() + for param in mod["main"].params: + name = str(param.name_hint) + data_shape = [int(i) for i in param.type_annotation.shape] + dtype = str(param.type_annotation.dtype) + in_min, in_max = (np.iinfo(dtype).min, np.iinfo(dtype).max) + data = np.random.randint(in_min, high=in_max, size=data_shape, dtype=dtype) + inputs[name] = data + + return mod, inputs, params diff --git a/tests/python/relay/aot/test_c_device_api.py b/tests/python/relay/aot/test_c_device_api.py index d369fd0a4a30..8252ee68ade8 100644 --- a/tests/python/relay/aot/test_c_device_api.py +++ b/tests/python/relay/aot/test_c_device_api.py @@ -93,7 +93,7 @@ def compile_to_main_func(interface_api="c", use_unpacked_api=True): pass_config=test_runner.pass_config, ) main_ir_module = compiled_models[0].executor_factory.lowered_ir_mods.items()[0][1] - main_func = main_ir_module["run_model"] + main_func = main_ir_module["__tvm_main__"] return main_func return compile_to_main_func @@ -124,7 +124,7 @@ def compile_to_main_func(interface_api="c", use_unpacked_api=True): pass_config=test_runner.pass_config, ) main_ir_module = list(compiled_models[0].executor_factory.lowered_ir_mods.values())[0] - main_func = main_ir_module["run_model"] + main_func = main_ir_module["__tvm_main__"] return main_func return compile_to_main_func diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py index f4f0806dca52..0147b8cf755a 100644 --- a/tests/python/relay/aot/test_crt_aot.py +++ b/tests/python/relay/aot/test_crt_aot.py @@ -28,6 +28,7 @@ from tvm.relay.testing import byoc from tvm.relay.op.annotation import compiler_begin, compiler_end from tvm.relay.backend import Executor, Runtime +from tvm.micro import model_library_format as mlf from aot_test_utils import ( AOTTestModel, AOT_DEFAULT_RUNNER, @@ -36,6 +37,7 @@ compile_and_run, compile_models, parametrize_aot_options, + create_relay_module_and_inputs_from_tflite_file, ) @@ -541,13 +543,7 @@ def test_quant_mobilenet_tfl(): "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz", "mobilenet_v1_1.0_224_quant.tflite", ) - with open(tflite_model_file, "rb") as f: - tflite_model_buf = f.read() - data_shape = (1, 224, 224, 3) - in_min, in_max = (0, 255) - data = np.random.randint(in_min, high=in_max, size=data_shape, dtype="uint8") - mod, params = convert_to_relay(tflite_model_buf) - inputs = {"input": data} + mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), @@ -843,5 +839,75 @@ def representative_dataset(): assert output_name in source +@pytest.mark.parametrize( + "workspace_byte_alignment,main_workspace_size", + [ + (8, 14880), + (16, 14880), + (256, 15616), + ], +) +def test_workspace_calculation(workspace_byte_alignment, main_workspace_size): + mod, params = tvm.relay.testing.synthetic.get_workload() + target = "c" + runtime = Runtime("crt") + executor = Executor( + "aot", + { + "workspace-byte-alignment": workspace_byte_alignment, + }, + ) + with tvm.transform.PassContext( + opt_level=3, + config={ + "tir.disable_vectorize": True, + }, + ): + lib = tvm.relay.build(mod, target, executor=executor, runtime=runtime, params=params) + + mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata) + assert mlf_memory_map["main"][0]["workspace_size_bytes"] == main_workspace_size + + +@tvm.testing.requires_package("tflite") +@tvm.testing.requires_cmsisnn +def test_workspace_calculation_cmsis_nn(): + """This tests cmsis_nn codegen for workspace calculation. + This is tested specially because cmsis-nn codegen creates + multiple PrimFuncs per offloaded relay function in a non + -hierarchical manner.""" + pytest.importorskip("tflite") + + from tvm.relay.op.contrib import cmsisnn + from tvm.contrib.download import download_testdata + + target = "c" + runtime = Runtime("crt") + executor = Executor( + "aot", + { + "workspace-byte-alignment": 16, + "interface-api": "c", + "unpacked-api": True, + }, + ) + + base_url = "https://github.com/ARM-software/ML-zoo/raw/48a22ee22325d15d2371a6df24eb7d67e21dcc97/models/keyword_spotting/cnn_small/tflite_int8" + file_to_download = "cnn_s_quantized.tflite" + file_saved = "cnn_s_quantized_15Dec2021.tflite" + model_file = download_testdata("{}/{}".format(base_url, file_to_download), file_saved) + mod, _, params = create_relay_module_and_inputs_from_tflite_file(model_file) + mod = cmsisnn.partition_for_cmsisnn(mod, params) + with tvm.transform.PassContext( + opt_level=3, + config={ + "tir.disable_vectorize": True, + }, + ): + lib = tvm.relay.build(mod, target, executor=executor, runtime=runtime, params=params) + mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata) + assert mlf_memory_map["main"][0]["workspace_size_bytes"] == 9904 + + if __name__ == "__main__": sys.exit(pytest.main([__file__] + sys.argv[1:])) diff --git a/tests/python/relay/aot/test_crt_aot_usmp.py b/tests/python/relay/aot/test_crt_aot_usmp.py index a27609cc07ad..73b34700ee27 100644 --- a/tests/python/relay/aot/test_crt_aot_usmp.py +++ b/tests/python/relay/aot/test_crt_aot_usmp.py @@ -39,6 +39,7 @@ compile_models, parametrize_aot_options, run_and_check, + create_relay_module_and_inputs_from_tflite_file, ) @@ -202,23 +203,6 @@ def test_byoc_microtvm(merge_compiler_regions): ) -def _get_relay_module_and_inputs_from_tflite_file(tflite_model_file): - with open(tflite_model_file, "rb") as f: - tflite_model_buf = f.read() - mod, params = convert_to_relay(tflite_model_buf) - - inputs = dict() - for param in mod["main"].params: - name = str(param.name_hint) - data_shape = [int(i) for i in param.type_annotation.shape] - dtype = str(param.type_annotation.dtype) - in_min, in_max = (np.iinfo(dtype).min, np.iinfo(dtype).max) - data = np.random.randint(in_min, high=in_max, size=data_shape, dtype=dtype) - inputs[name] = data - - return mod, inputs, params - - MOBILENET_V1_URL = ( "https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz", "mobilenet_v1_1.0_224_quant.tflite", @@ -253,7 +237,7 @@ def test_tflite_model_u1_usecase(model_url, usmp_algo, workspace_size): model_url[0], model_url[1], ) - mod, inputs, params = _get_relay_module_and_inputs_from_tflite_file(tflite_model_file) + mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( @@ -324,7 +308,7 @@ def test_tflite_model_u3_usecase_single_external_pool(model_url, usmp_algo): model_url[0], model_url[1], ) - mod, inputs, params = _get_relay_module_and_inputs_from_tflite_file(tflite_model_file) + mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( @@ -384,7 +368,7 @@ def test_tflite_model_u3_usecase_two_external_pools(model_url, usmp_algo): model_url[0], model_url[1], ) - mod, inputs, params = _get_relay_module_and_inputs_from_tflite_file(tflite_model_file) + mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( @@ -438,14 +422,14 @@ def test_tflite_model_u2_usecase_two_models_with_a_single_external_pool(model_ur model_urls[0][0], model_urls[0][1], ) - mod1, inputs1, params1 = _get_relay_module_and_inputs_from_tflite_file(tflite_model_file1) + mod1, inputs1, params1 = create_relay_module_and_inputs_from_tflite_file(tflite_model_file1) output_list1 = generate_ref_data(mod1, inputs1, params1) tflite_model_file2 = tf_testing.get_workload_official( model_urls[1][0], model_urls[1][1], ) - mod2, inputs2, params2 = _get_relay_module_and_inputs_from_tflite_file(tflite_model_file2) + mod2, inputs2, params2 = create_relay_module_and_inputs_from_tflite_file(tflite_model_file2) output_list2 = generate_ref_data(mod2, inputs2, params2) compiled_test_mods = compile_models( diff --git a/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py b/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py index ab40c646391c..07e31a989874 100644 --- a/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py +++ b/tests/python/unittest/test_tir_usmp_transform_convert_pool_allocations_to_offsets.py @@ -122,9 +122,9 @@ def tvmgen_default_fused_nn_max_pool2d_cast(placeholder_28: T.handle, T_cast_6: T.store(T_cast_7.data, (((ax0_ax1_fused_5*3584) + (ax2_5*64)) + ax3_3), T.cast(T.load("uint8", tensor_2, (((ax0_ax1_fused_5*3584) + (ax2_5*64)) + ax3_3)), "int16"), True) @T.prim_func - def run_model(input: T.handle, output: T.handle) -> None: + def __tvm_main__(input: T.handle, output: T.handle) -> None: # function attr dict - T.func_attr({"global_symbol": "run_model", "runner_function": True}) + T.func_attr({"global_symbol": "__tvm_main__", "runner_function": True}) # body T.attr("default", "device_id", 0) T.attr("default", "device_type", 1) @@ -140,7 +140,7 @@ def run_model(input: T.handle, output: T.handle) -> None: @tvm.script.ir_module class LinearStructurePlanned: @T.prim_func - def run_model(input: T.handle, fast_memory_0_var: T.handle, slow_memory_1_var: T.handle, output: T.handle) -> None: + def __tvm_main__(input: T.handle, fast_memory_0_var: T.handle, slow_memory_1_var: T.handle, output: T.handle) -> None: fast_memory_0_buffer_var = T.match_buffer(fast_memory_0_var, [200704], dtype="uint8", strides=[1], elem_offset=1, align=16) slow_memory_1_buffer_var = T.match_buffer(slow_memory_1_var, [1418528], dtype="uint8", strides=[1], elem_offset=1, align=16) # body @@ -217,7 +217,7 @@ def test_mobilenet_subgraph(): tir_mod = assign_poolinfos_to_allocates_in_irmodule( tir_mod, [fast_memory_pool, slow_memory_pool] ) - main_func = tir_mod["run_model"] + main_func = tir_mod["__tvm_main__"] buffer_analysis = tvm.tir.usmp.analysis.extract_buffer_info(main_func, tir_mod) buffer_info_map = buffer_analysis.buffer_info_stmts @@ -328,9 +328,9 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_add_clip_cast_cast_s T.store(T_cast_7.data, ax0_ax1_fused_ax2_fused_3 * 256 + ax3_outer_2 * 64 + ax3_inner_4, T.cast(T.max(T.min(T.q_multiply_shift(T.cast(T.cast(T.max(T.min(T.q_multiply_shift(T.load("int32", Conv2dOutput_3, ax3_inner_4) + T.load("int32", placeholder_26.data, ax3_outer_2 * 64 + ax3_inner_4), 1343014664, 31, -8, dtype="int32") + 136, 255), 0), "uint8"), "int32") - 136, 1073903788, 31, 1, dtype="int32") + T.load("int32", placeholder_28.data, ax0_ax1_fused_ax2_fused_3 * 256 + ax3_outer_2 * 64 + ax3_inner_4), 255), 0), "uint8"), True) @T.prim_func - def run_model(input: T.handle, output: T.handle) -> None: + def __tvm_main__(input: T.handle, output: T.handle) -> None: # function attr dict - T.func_attr({"global_symbol": "run_model", "runner_function": True}) + T.func_attr({"global_symbol": "__tvm_main__", "runner_function": True}) # body T.attr("default", "device_id", 0) T.attr("default", "device_type", 1) @@ -464,7 +464,7 @@ def tvmgen_default_fused_nn_conv2d_add_fixed_point_multiply_clip_cast_cast_1(pla T.store(T_cast_5.data, ax0_ax1_fused_ax2_fused_1 * 64 + ax3_inner_2, T.cast(T.cast(T.max(T.min(T.q_multiply_shift(T.load("int32", Conv2dOutput_1_let, ax3_inner_2) + T.load("int32", placeholder_15.data, ax3_inner_2), 1608879842, 31, -7, dtype="int32"), 255), 0), "uint8"), "int16"), True) @T.prim_func - def run_model(input: T.handle, global_workspace_0_var: T.handle, output: T.handle) -> None: + def __tvm_main__(input: T.handle, global_workspace_0_var: T.handle, output: T.handle) -> None: global_workspace_0_buffer_var = T.match_buffer(global_workspace_0_var, [7920256], dtype="uint8", strides=[1], elem_offset=1, align=16) # body T.attr("default", "device_id", 0) @@ -491,7 +491,7 @@ def test_resnet_subgraph(): tir_mod = ResnetStructure tir_mod = _assign_targets_to_primfuncs_irmodule(tir_mod, target) tir_mod = assign_poolinfos_to_allocates_in_irmodule(tir_mod, [global_workspace_pool]) - main_func = tir_mod["run_model"] + main_func = tir_mod["__tvm_main__"] buffer_analysis = tvm.tir.usmp.analysis.extract_buffer_info(main_func, tir_mod) buffer_info_map = buffer_analysis.buffer_info_stmts diff --git a/tests/scripts/task_demo_microtvm.sh b/tests/scripts/task_demo_microtvm.sh index 9ed9c671acc0..b5c18ec9e757 100755 --- a/tests/scripts/task_demo_microtvm.sh +++ b/tests/scripts/task_demo_microtvm.sh @@ -19,8 +19,7 @@ set -euxo pipefail pushd apps/microtvm/zephyr_cmsisnn -# Demo tests are disabled here due to https://github.com/apache/tvm/issues/10312 -# timeout 5m ./run_demo.sh + timeout 5m ./run_demo.sh popd pushd apps/microtvm/ethosu @@ -28,6 +27,6 @@ FVP_PATH="/opt/arm/FVP_Corstone_SSE-300_Ethos-U55" CMAKE_PATH="/opt/arm/cmake/bin/cmake" FREERTOS_PATH="/opt/freertos/FreeRTOSv202112.00" -# timeout 5m ./run_demo.sh --fvp_path $FVP_PATH --cmake_path $CMAKE_PATH -# timeout 5m ./run_demo.sh --fvp_path $FVP_PATH --cmake_path $CMAKE_PATH --freertos_path $FREERTOS_PATH + timeout 5m ./run_demo.sh --fvp_path $FVP_PATH --cmake_path $CMAKE_PATH + timeout 5m ./run_demo.sh --fvp_path $FVP_PATH --cmake_path $CMAKE_PATH --freertos_path $FREERTOS_PATH popd