apache · lhutton1 · Nov 9, 2022 · Aug 22, 2022
diff --git a/python/tvm/relay/backend/aot.py b/python/tvm/relay/backend/aot.py
@@ -19,6 +19,7 @@
 from typing import Dict
 
 from tvm import IRModule
+from tvm.relay.backend import Executor
 from tvm.ir.transform import Pass
 from .utils import CallType
 
@@ -67,3 +68,36 @@ def CreateFunctionMetadata(
 
     """
     return _aot.CreateFunctionMetadata(mod, workspace_byte_alignment, constant_byte_alignment)
+
+
+def CreateExecutorMetadata(
+    mod: IRModule,
+    mod_name: str,
+    executor: Executor,
+    workspace_byte_alignment: int,
+    constant_byte_alignment: int,
+) -> object:
+    """Create the executor metadata from an AOT module.
+
+    Parameters
+    ----------
+    mod : IRModule
+        The IRModule.
+    mod_name : str
+        The name of the module.
+    executor : Executor
+        The executor configuration.
+    workspace_byte_alignment : int
+        The alignment of the workspace buffer in bytes.
+    constant_byte_alignment : int
+        The alignment of the constant buffer in bytes.
+
+    Returns
+    -------
+    ExecutorCodegenMetadata
+        The executor metadata.
+
+    """
+    return _aot.CreateExecutorMetadata(
+        mod, mod_name, executor, workspace_byte_alignment, constant_byte_alignment
+    )
diff --git a/src/relay/backend/aot/aot_lower_main.cc b/src/relay/backend/aot/aot_lower_main.cc
@@ -493,6 +493,11 @@ class AOTMainLowerer : public MixedModeVisitor {
         Array<tir::Var>(main_signature_.begin() + input_vars_.size(),
                         main_signature_.begin() + input_vars_.size() + return_sid_.size());
     dict_attrs.Set("output_vars", output_vars);
+    Array<String> device_names;
+    for (const auto& it : devices_) {
+      device_names.push_back(it.first);
+    }
+    dict_attrs.Set("devices", device_names);
 
     tir::Stmt device_activations = GenerateAllDeviceHook("Activate");
     tir::Stmt device_deactivations = GenerateAllDeviceHook("Deactivate");

diff --git a/src/relay/backend/aot/create_executor_metadata.cc b/src/relay/backend/aot/create_executor_metadata.cc
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/aot/create_executor_metadata.cc
+ * \brief Create the ExecutorCodegenMetadata from a compiled IRModule.
+ */
+
+#include "./create_executor_metadata.h"
+
+#include "../utils.h"
+
+namespace tvm {
+namespace relay {
+namespace backend {
+namespace aot {
+
+ExecutorCodegenMetadata CreateExecutorMetadata(const IRModule& mod, String mod_name,
+                                               Executor executor, Integer workspace_byte_alignment,
+                                               Integer constant_byte_alignment) {
+  // Get relevant executor config information
+  std::string interface_api = executor->GetAttr<String>("interface-api").value_or("packed");
+  bool unpacked_api = executor->GetAttr<Bool>("unpacked-api").value_or(Bool(false));
+  // Get the input vars
+  auto tir_main_func = Downcast<tir::PrimFunc>(mod->Lookup(runtime::symbol::tvm_module_main));
+  Array<tir::Var> inputs = tir_main_func->GetAttr<Array<tir::Var>>("input_vars").value();
+  Array<TensorType> input_tensor_types;
+  for (const auto& input : inputs) {
+    auto buffer = tir_main_func->buffer_map.Get(input).value();
+    input_tensor_types.push_back(TensorType(buffer->shape, buffer->dtype));
+  }
+  // Extract USMP metadata to pass onto metadata sources
+  Map<tir::Var, tir::usmp::AllocatedPoolInfo> pool_var_info;
+  std::vector<tir::Var> pool_vars;
+  Optional<Array<tir::usmp::AllocatedPoolInfo>> allocated_pool_infos =
+      tir_main_func->GetAttr<Array<tir::usmp::AllocatedPoolInfo>>(tvm::attr::kPoolArgs);
+  if (allocated_pool_infos) {
+    for (const tir::usmp::AllocatedPoolInfo& allocated_pool_info : allocated_pool_infos.value()) {
+      int pool_var_index = allocated_pool_info->pool_var_idx.value()->value;
+      pool_vars.push_back(tir_main_func->params[pool_var_index]);
+      pool_var_info.Set(tir_main_func->params[pool_var_index], allocated_pool_info);
+    }
+  }
+  Map<String, tir::usmp::PoolAllocation> io_pool_allocations =
+      mod->GetAttr<Map<String, tir::usmp::PoolAllocation>>(tvm::attr::kIOTensorPoolAllocations)
+          .value_or({});
+
+  Array<tir::Var> outputs = tir_main_func->GetAttr<Array<tir::Var>>("output_vars").value();
+  Array<TensorType> output_tensor_types;
+  std::vector<String> output_var_names;
+  for (const auto& output : outputs) {
+    auto buffer = tir_main_func->buffer_map.Get(output).value();
+    output_tensor_types.push_back(TensorType(buffer->shape, buffer->dtype));
+    output_var_names.push_back(output->name_hint);
+  }
+  auto devices = tir_main_func->GetAttr<Array<String>>("devices").value_or({});
+
+  return ExecutorCodegenMetadata(inputs, input_tensor_types, output_var_names, output_tensor_types,
+                                 pool_vars, devices, runtime::kTvmExecutorAot, mod_name,
+                                 interface_api, unpacked_api, workspace_byte_alignment,
+                                 constant_byte_alignment, pool_var_info, io_pool_allocations);
+}
+
+TVM_REGISTER_GLOBAL("relay.backend.aot.CreateExecutorMetadata")
+    .set_body_typed(CreateExecutorMetadata);
+
+}  // namespace aot
+}  // namespace backend
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/relay/backend/aot/create_executor_metadata.h b/src/relay/backend/aot/create_executor_metadata.h
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#ifndef TVM_RELAY_BACKEND_AOT_CREATE_EXECUTOR_METADATA_H_
+#define TVM_RELAY_BACKEND_AOT_CREATE_EXECUTOR_METADATA_H_
+
+#include <tvm/ir/module.h>
+#include <tvm/relay/executor.h>
+#include <tvm/runtime/container/string.h>
+
+#include "../utils.h"
+
+namespace tvm {
+namespace relay {
+namespace backend {
+namespace aot {
+
+/*! \brief Create ExecutorCodegenMetadata needed for AOT execution.
+ * \param mod The module.
+ * \param mod_name The module name.
+ * \param executor The executor configuration.
+ * \param workspace_byte_alignment The alignment of the workspace pool.
+ * \param constant_byte_alignment The alignment of the constant pool.
+ * \return The ExecutorCodegenMetadata.
+ */
+ExecutorCodegenMetadata CreateExecutorMetadata(const IRModule& mod, String mod_name,
+                                               Executor executor, Integer workspace_byte_alignment,
+                                               Integer constant_byte_alignment);
+
+}  // namespace aot
+}  // namespace backend
+}  // namespace relay
+}  // namespace tvm
+
+#endif  // TVM_RELAY_BACKEND_AOT_CREATE_EXECUTOR_METADATA_H_
diff --git a/src/relay/backend/utils.h b/src/relay/backend/utils.h
@@ -168,11 +168,13 @@ class ExecutorCodegenMetadataNode : public Object {
     v->Visit("pools", &pools);
     v->Visit("devices", &devices);
     v->Visit("executor", &executor);
+    v->Visit("interface_api", &interface_api);
     v->Visit("unpacked_api", &unpacked_api);
     v->Visit("workspace_alignment", &workspace_alignment);
     v->Visit("constant_alignment", &constant_alignment);
     v->Visit("pool_inputs", &pool_inputs);
     v->Visit("io_pool_allocations", &io_pool_allocations);
+    v->Visit("mod_name", &mod_name);
   }
 
   static constexpr const char* _type_key = "MetadataObj";

diff --git a/tests/python/relay/aot/test_aot_create_executor_metadata.py b/tests/python/relay/aot/test_aot_create_executor_metadata.py
@@ -0,0 +1,176 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=line-too-long,missing-class-docstring,missing-module-docstring,missing-function-docstring,no-self-argument,unused-argument,invalid-name
+import numpy as np
+
+import tvm
+import tvm.testing
+from tvm.script import tir as T
+from tvm.runtime.ndarray import array
+from tvm.relay.backend import Executor
+from tvm.relay.backend.aot import CreateExecutorMetadata
+from tvm.relay import TensorType
+from tvm.tir.usmp.utils import PoolAllocation
+from tvm.ir.memory_pools import AllocatedPoolInfo, ConstantPoolInfo, WorkspacePoolInfo, ConstantInfo
+
+
+def _check_executor_metadata(executor_metadata, expected_metadata):
+    assert list(executor_metadata.inputs) == expected_metadata["inputs"]
+    assert list(executor_metadata.input_tensor_types) == expected_metadata["input_tensor_types"]
+    assert list(executor_metadata.outputs) == expected_metadata["outputs"]
+    assert list(executor_metadata.output_tensor_types) == expected_metadata["output_tensor_types"]
+    assert list(executor_metadata.pools) == expected_metadata["pools"]
+    assert executor_metadata.devices == expected_metadata["devices"]
+    assert executor_metadata.executor == expected_metadata["executor"]
+    assert executor_metadata.mod_name == expected_metadata["mod_name"]
+    assert executor_metadata.interface_api == expected_metadata["interface_api"]
+    assert executor_metadata.unpacked_api == expected_metadata["unpacked_api"]
+    assert executor_metadata.workspace_alignment == expected_metadata["workspace_alignment"]
+    assert executor_metadata.constant_alignment == expected_metadata["constant_alignment"]
+    assert set(executor_metadata.pool_inputs.keys()) == set(expected_metadata["pool_inputs"].keys())
+    assert set(executor_metadata.io_pool_allocations.keys()) == set(
+        expected_metadata["io_pool_allocations"].keys()
+    )
+
+
+def test_create_executor_metadata_single_func():
+    # fmt: off
+    @tvm.script.ir_module
+    class Module:
+        @T.prim_func
+        def __tvm_main__(
+            a: T.handle, output: T.handle, workspace: T.Ptr[T.uint8], constants: T.Ptr[T.uint8]
+        ) -> None:
+            # function attr dict
+            T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind": "llvm", "tag": "", "keys": ["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": ["test_device"]})
+            a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
+            output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
+            # body
+            sid_3 = T.allocate([140], "int8", "global.workspace")
+            sid_2 = T.allocate([140], "int8", "global.workspace")
+            sid_1 = T.allocate([140], "int8", "global.workspace")
+            constant_0 = T.allocate_const([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "float32", [5, 7])
+            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", a_buffer.data, sid_1, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
+            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", sid_1, constant_0, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
+            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", sid_2, sid_3, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
+            T.evaluate(T.tvm_call_cpacked("test_fused_add_1", sid_2, sid_3, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
+    # fmt: on
+
+    target = Module["__tvm_main__"].attrs["target"]
+    executor = Executor("aot", {"interface-api": "c"})
+    workspace_pool_info = AllocatedPoolInfo(
+        WorkspacePoolInfo("sram", [target]),
+        256,
+        3,
+    )
+    constant_pool_info = AllocatedPoolInfo(
+        ConstantPoolInfo(
+            "flash",
+            [target],
+            [ConstantInfo("a", 0, array(np.array([0])))],
+        ),
+        512,
+        2,
+    )
+    io_pool_allocations = {
+        "a": PoolAllocation(WorkspacePoolInfo("sram", [target]), 0),
+        "output": PoolAllocation(WorkspacePoolInfo("sram", [target]), 0),
+    }
+    mod = Module.with_attr("io_tensor_pool_allocations", io_pool_allocations)
+    mod["__tvm_main__"] = mod["__tvm_main__"].with_attr(
+        "pool_args",
+        [
+            constant_pool_info,
+            workspace_pool_info,
+        ],
+    )
+    f = mod["__tvm_main__"]
+    expected_metadata = {
+        "inputs": [f.params[0]],
+        "input_tensor_types": [TensorType((5, 7), "float32")],
+        "outputs": ["output"],
+        "output_tensor_types": [TensorType((5, 7), "float32")],
+        "pools": f.params[2:],
+        "devices": f.attrs["devices"],
+        "executor": "aot",
+        "mod_name": "test_mod",
+        "interface_api": "c",
+        "unpacked_api": False,
+        "workspace_alignment": 16,
+        "constant_alignment": 1,
+        "pool_inputs": {
+            f.params[2]: workspace_pool_info,
+            f.params[3]: constant_pool_info,
+        },
+        "io_pool_allocations": io_pool_allocations,
+    }
+
+    executor_metadata = CreateExecutorMetadata(mod, "test_mod", executor, 16, 1)
+
+    _check_executor_metadata(executor_metadata, expected_metadata)
+
+
+def test_create_executor_metadata_no_usmp():
+    # fmt: off
+    @tvm.script.ir_module
+    class Module:
+        @T.prim_func
+        def __tvm_main__(
+            a: T.handle, output: T.handle
+        ) -> None:
+            # function attr dict
+            T.func_attr({"global_symbol": "test_mod___tvm_main__", "runner_function": True, "target": T.target({"kind": "llvm", "tag": "", "keys": ["cpu"]}), "input_vars": [a], "output_vars": [output], "devices": ["test_device"]})
+            a_buffer = T.match_buffer(a, [5, 7], dtype="float32", align=16)
+            output_buffer = T.match_buffer(output, [5, 7], dtype="float32", align=16)
+            # body
+            sid_3 = T.allocate([140], "int8", "global.workspace")
+            sid_2 = T.allocate([140], "int8", "global.workspace")
+            sid_1 = T.allocate([140], "int8", "global.workspace")
+            constant_0 = T.allocate_const([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "float32", [5, 7])
+            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", a_buffer.data, sid_1, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
+            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", sid_1, constant_0, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
+            T.evaluate(T.tvm_call_cpacked("test_fused_add_0", sid_2, sid_3, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
+            T.evaluate(T.tvm_call_cpacked("test_fused_add_1", sid_2, sid_3, output_buffer.data, T.reinterpret(T.uint64(0), dtype="handle"), dtype="int32"))
+    # fmt: on
+
+    executor = Executor("aot", {"interface-api": "c"})
+    mod = Module
+    f = mod["__tvm_main__"]
+    expected_metadata = {
+        "inputs": [f.params[0]],
+        "input_tensor_types": [TensorType((5, 7), "float32")],
+        "outputs": ["output"],
+        "output_tensor_types": [TensorType((5, 7), "float32")],
+        "pools": f.params[2:],
+        "devices": f.attrs["devices"],
+        "executor": "aot",
+        "mod_name": "test_mod",
+        "interface_api": "c",
+        "unpacked_api": False,
+        "workspace_alignment": 16,
+        "constant_alignment": 1,
+        "pool_inputs": {},
+        "io_pool_allocations": {},
+    }
+
+    executor_metadata = CreateExecutorMetadata(mod, "test_mod", executor, 16, 1)
+
+    _check_executor_metadata(executor_metadata, expected_metadata)
+
+
+if __name__ == "__main__":
+    tvm.testing.main()