diff --git a/python/tvm/contrib/hexagon/build.py b/python/tvm/contrib/hexagon/build.py index a40903b822ba..16d3a30fd643 100644 --- a/python/tvm/contrib/hexagon/build.py +++ b/python/tvm/contrib/hexagon/build.py @@ -266,8 +266,7 @@ def get_aot_executor(self, module_name: Union[str, pathlib.Path], session: Sessi aot_module : AotModule Runtime AOT module that can be used to execute. """ - aot_mod = self.load_module(module_name, session) - return tvm.runtime.executor.AotModule(aot_mod["default"](session.device)) + return session.get_aot_executor(module_name) class HexagonLauncherAndroid(HexagonLauncherRPC): diff --git a/python/tvm/contrib/hexagon/session.py b/python/tvm/contrib/hexagon/session.py index 44c4d145555c..783e1cd3a014 100644 --- a/python/tvm/contrib/hexagon/session.py +++ b/python/tvm/contrib/hexagon/session.py @@ -24,6 +24,12 @@ import tvm from tvm import rpc as _rpc +import tvm.contrib.hexagon as hexagon +from tvm.relay.backend.executor_factory import ( + ExecutorFactoryModule, + AOTExecutorFactoryModule, + GraphExecutorFactoryModule, +) class Session: @@ -101,6 +107,9 @@ def upload(self, local_path: Union[str, pathlib.Path], remote_filename: str): def load_module(self, module: Union[str, pathlib.Path, tvm.runtime.Module]): """Load TVM module. + The session must be established (via __enter__) prior to + calling this function. + Parameters ---------- module : Union[str, pathlib.Path, tvm.runtime.Module] @@ -115,16 +124,16 @@ def load_module(self, module: Union[str, pathlib.Path, tvm.runtime.Module]): the file must already have been uploaded to the remote, and be placed in the remote workspace. - session : Session - - Remote session. The session must be established (via __enter__) - prior to calling this function. - Returns ------- TVMModule : TVM module object. """ + + assert ( + self.device is not None + ), "Hexagon session must be started using __enter__ prior to use" + if isinstance(module, tvm.runtime.Module): with tempfile.TemporaryDirectory() as temp_dir: temp_dir = pathlib.Path(temp_dir) @@ -136,3 +145,160 @@ def load_module(self, module: Union[str, pathlib.Path, tvm.runtime.Module]): assert isinstance(module, (str, pathlib.Path)), "Invalid path type:" + str(type(module)) return self._rpc.get_function("tvm.hexagon.load_module")(str(module)) + + def get_graph_executor( + self, + graph_json: str, + module_name: Union[str, pathlib.Path], + ): + """Create a local GraphModule which consumes a remote libmod. + + The session must be established (via __enter__) prior to + calling this function. + + Parameters + ---------- + + module_name : Union[str, pathlib.Path] + + The remote module filename, following the same restrictions + as `load_module`. + + graph_json : str + + The string with the graph JSON. + + Returns + ------- + GraphModule : + Runtime graph module that can be used to execute the graph. + + """ + + graph_mod = self.load_module(module_name) + return tvm.contrib.graph_executor.create(graph_json, graph_mod, self.device) + + def get_aot_executor( + self, + module_name: Union[str, pathlib.Path], + ): + """Create a local GraphModule which consumes a remote libmod. + + The session must be established (via __enter__) prior to + calling this function. + + Parameters + ---------- + + module_name : Union[str, pathlib.Path] + + The remote module filename, following the same restrictions + as `load_module`. + + Returns + ------- + GraphModule : + Runtime graph module that can be used to execute the graph. + + """ + + aot_mod = self.load_module(module_name) + return tvm.runtime.executor.AotModule(aot_mod["default"](self.device)) + + def get_executor_from_factory(self, module: ExecutorFactoryModule): + """Create a local GraphModule which consumes a remote libmod. + + Parameters + ---------- + + module : ExecutorFactoryModule + + The module to upload to the remote + session and load. + """ + if isinstance(module, AOTExecutorFactoryModule): + return self._aot_executor_from_factory(module) + if isinstance(module, GraphExecutorFactoryModule): + return self._graph_executor_from_factory(module) + + raise TypeError(f"Unsupported executor type: {type(module)}") + + def _graph_executor_from_factory( + self, + module: Union[str, pathlib.Path, GraphExecutorFactoryModule], + ): + """Create a local GraphModule which consumes a remote libmod. + + The session must be established (via __enter__) prior to + calling this function. + + Parameters + ---------- + + module : GraphExecutorFactoryModule + + The graph executor module to upload to the remote and load. + This will typically be the output of `tvm.relay.build`, + when passing `executor=Executor("graph")`. + + Returns + ------- + GraphModule : + Runtime graph module that can be used to execute the graph. + + """ + + graph_json = module.get_graph_json() + graph_mod = self.load_module(module.get_lib()) + + return tvm.contrib.graph_executor.create(graph_json, graph_mod, self.device) + + def _aot_executor_from_factory( + self, + module: Union[str, pathlib.Path, AOTExecutorFactoryModule], + ): + """Create a local GraphModule which consumes a remote libmod. + + The session must be established (via __enter__) prior to + calling this function. + + Parameters + ---------- + + module : AOTExecutorFactoryModule + + The graph executor module to upload to the remote and load. + This will typically be the output of `tvm.relay.build`, + when passing `executor=Executor("aot")`. + + Returns + ------- + GraphModule : + Runtime graph module that can be used to execute the graph. + + """ + + hexagon_arch = set( + target.mcpu.replace("hexagon", "") + for target in module.target.values() + if "hexagon" in target.keys + ) + assert hexagon_arch, "No hexagon target architecture found" + assert len(hexagon_arch) == 1, f"Inconsistent hexagon architecture found, {hexagon_arch}" + hexagon_arch = hexagon_arch.pop() + + with tempfile.TemporaryDirectory() as temp_dir: + temp_dir = pathlib.Path(temp_dir) + binary_name = "test_binary.so" + binary_path = temp_dir / binary_name + + module.export_library( + str(binary_path), + fcompile=hexagon.create_aot_shared, + hexagon_arch=hexagon_arch, + ) + + self.upload(binary_path, binary_name) + + aot_mod = self.load_module(binary_name) + return tvm.runtime.executor.AotModule(aot_mod["default"](self.device)) diff --git a/python/tvm/contrib/hexagon/tools.py b/python/tvm/contrib/hexagon/tools.py index 5e241a990fe2..edf2821d3136 100644 --- a/python/tvm/contrib/hexagon/tools.py +++ b/python/tvm/contrib/hexagon/tools.py @@ -160,6 +160,19 @@ def create_aot_shared(so_name: Union[str, pathlib.Path], files, hexagon_arch: st + "HEXAGON_SDK_PATH in your environment." ) + # The AOT C codegen uses TVM runtime functions + # (e.g. TVMBackendAllocWorkspace) directly. On Hexagon these calls + # should be made using functions pointers provided as __TVM* + # variables in the provided context. This workaround allows the + # the TVM runtime symbols to be visible to the compiled shared + # library. + # + # This workaround can be removed when AOT codegen can be done with + # LLVM codegen. + workaround_link_flags = os.environ.get("HEXAGON_SHARED_LINK_FLAGS") + if workaround_link_flags: + options.extend(workaround_link_flags.split()) + tvm_dir = pathlib.Path(os.path.dirname(os.path.realpath(__file__))) / ".." / ".." / ".." / ".." compute_arch = f"compute{hexagon_arch}" compile_options = [ diff --git a/tests/python/contrib/test_hexagon/test_launcher.py b/tests/python/contrib/test_hexagon/test_launcher.py index 3e72c38f1909..ec892c1c7a9d 100644 --- a/tests/python/contrib/test_hexagon/test_launcher.py +++ b/tests/python/contrib/test_hexagon/test_launcher.py @@ -144,7 +144,7 @@ def test_matmul(self, hexagon_session, M, N, K): @requires_hexagon_toolchain -def test_graph_executor(hexagon_launcher, hexagon_session): +def test_graph_executor(hexagon_session): dtype = "float32" data = relay.var("data", relay.TensorType((1, 64, 64, 3), dtype)) weight = relay.var("weight", relay.TensorType((5, 5, 3, 8), dtype)) @@ -170,10 +170,6 @@ def test_graph_executor(hexagon_launcher, hexagon_session): params = {"weight": weight_in} inputs = {"data": data_in} - temp = utils.tempdir() - dso_binary = "test_binary.so" - dso_binary_path = temp.relpath(dso_binary) - with tvm.transform.PassContext(opt_level=3): lowered = tvm.relay.build( relay_mod, @@ -181,16 +177,11 @@ def test_graph_executor(hexagon_launcher, hexagon_session): runtime=runtime, executor=executor, ) - lowered.get_lib().save(dso_binary_path) if hexagon_session is None: pytest.skip(msg="Skip hardware test since ANDROID_SERIAL_NUMBER is not set.") - hexagon_launcher.upload(dso_binary_path, dso_binary) - - graph_mod = hexagon_launcher.get_graph_executor( - lowered.get_graph_json(), dso_binary, hexagon_session - ) + graph_mod = hexagon_session.get_executor_from_factory(lowered) graph_mod.set_input(**params) graph_mod.run(**inputs) hexagon_output = graph_mod.get_output(0).numpy() @@ -212,7 +203,7 @@ def test_graph_executor(hexagon_launcher, hexagon_session): @requires_hexagon_toolchain -def test_graph_executor_multiple_conv2d(hexagon_launcher, hexagon_session): +def test_graph_executor_multiple_conv2d(hexagon_session): dtype = "float32" input_shape = (1, 8, 8, 3) w1_shape = (5, 5, 3, 1) @@ -246,10 +237,6 @@ def test_graph_executor_multiple_conv2d(hexagon_launcher, hexagon_session): runtime = Runtime("cpp") executor = Executor("graph") - temp = utils.tempdir() - dso_binary = "test_binary.so" - dso_binary_path = temp.relpath(dso_binary) - with tvm.transform.PassContext(opt_level=3): lowered = tvm.relay.build( relay_mod, @@ -257,13 +244,10 @@ def test_graph_executor_multiple_conv2d(hexagon_launcher, hexagon_session): runtime=runtime, executor=executor, ) - lowered.get_lib().save(dso_binary_path) if hexagon_session is None: pytest.skip(msg="Skip hardware test since ANDROID_SERIAL_NUMBER is not set.") - hexagon_launcher.upload(dso_binary_path, dso_binary) - weight1_data = np.random.rand(w1_shape[0], w1_shape[1], w1_shape[2], w1_shape[3]).astype( dtype=dtype ) @@ -277,9 +261,7 @@ def test_graph_executor_multiple_conv2d(hexagon_launcher, hexagon_session): params = {"weight1": weight1_data, "weight2": weight2_data} inputs = {"data": input_data} - graph_mod = hexagon_launcher.get_graph_executor( - lowered.get_graph_json(), dso_binary, hexagon_session - ) + graph_mod = hexagon_session.get_executor_from_factory(lowered) graph_mod.set_input(**params) graph_mod.run(**inputs) hexagon_output = graph_mod.get_output(0).numpy() @@ -312,7 +294,7 @@ def _workaround_create_aot_shared(): @requires_hexagon_toolchain -def test_aot_executor(hexagon_launcher, hexagon_session): +def test_aot_executor(hexagon_session): dtype = "float32" input_shape = (1, 128, 128, 3) w_shape = (5, 5, 3, 8) @@ -332,9 +314,6 @@ def test_aot_executor(hexagon_launcher, hexagon_session): relay_mod = relay.transform.InferType()(relay_mod) target_hexagon = tvm.target.hexagon("v68") - temp = utils.tempdir() - dso_binary = "test_binary.so" - dso_binary_path = temp / dso_binary weight_data = np.random.rand(w_shape[0], w_shape[1], w_shape[2], w_shape[3]).astype(dtype=dtype) input_data = np.random.rand( @@ -352,20 +331,11 @@ def test_aot_executor(hexagon_launcher, hexagon_session): runtime=Runtime("cpp"), executor=Executor("aot", {"unpacked-api": False, "interface-api": "c"}), ) - # Uncomment this once the workaround is not needed. - # lowered.export_library( - # dso_binary_path, fcompile=hexagon.create_aot_shared, hexagon_arch="v68" - # ) - lowered.export_library( - dso_binary_path, fcompile=_workaround_create_aot_shared(), hexagon_arch="v68" - ) if hexagon_session is None: pytest.skip(msg="Skip hardware test, ANDROID_SERIAL_NUMBER is not set.") - hexagon_launcher.upload(dso_binary_path, dso_binary) - - aot_mod = hexagon_launcher.get_aot_executor(dso_binary, hexagon_session) + aot_mod = hexagon_session.get_executor_from_factory(lowered) aot_mod.set_input(**inputs) aot_mod.run() hexagon_output = aot_mod.get_output(0).numpy() @@ -388,7 +358,7 @@ def test_aot_executor(hexagon_launcher, hexagon_session): @requires_hexagon_toolchain -def test_aot_executor_multiple_conv2d(hexagon_launcher, hexagon_session): +def test_aot_executor_multiple_conv2d(hexagon_session): dtype = "float32" input_shape = (1, 8, 8, 3) w1_shape = (5, 5, 3, 1) @@ -419,9 +389,6 @@ def test_aot_executor_multiple_conv2d(hexagon_launcher, hexagon_session): relay_mod = relay.transform.InferType()(relay_mod) target_hexagon = tvm.target.hexagon("v68") - temp = utils.tempdir() - dso_binary = "test_binary.so" - dso_binary_path = temp / dso_binary weight1_data = np.random.rand(w1_shape[0], w1_shape[1], w1_shape[2], w1_shape[3]).astype( dtype=dtype @@ -444,20 +411,11 @@ def test_aot_executor_multiple_conv2d(hexagon_launcher, hexagon_session): runtime=Runtime("cpp"), executor=Executor("aot", {"unpacked-api": False, "interface-api": "c"}), ) - # Uncomment this once the workaround is not needed. - # lowered.export_library( - # dso_binary_path, fcompile=hexagon.create_aot_shared, hexagon_arch="v68" - # ) - lowered.export_library( - dso_binary_path, fcompile=_workaround_create_aot_shared(), hexagon_arch="v68" - ) if hexagon_session is None: pytest.skip(msg="Skip hardware test, ANDROID_SERIAL_NUMBER is not set.") - hexagon_launcher.upload(dso_binary_path, dso_binary) - - aot_mod = hexagon_launcher.get_aot_executor(dso_binary, hexagon_session) + aot_mod = hexagon_session.get_executor_from_factory(lowered) aot_mod.set_input(**inputs) aot_mod.run() hexagon_output = aot_mod.get_output(0).numpy()