Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
9ce7ce3
[Hexagon] Add support for instrumentation based profiling for Hexagon
jverma-quic Oct 3, 2022
5d39093
Fix typos
jverma-quic Oct 3, 2022
4d0a141
Merge branch 'main' into hexagon-instr-profiling
jverma-quic Oct 5, 2022
ca5e6f3
Update python/tvm/contrib/hexagon/build.py
jverma-quic Oct 5, 2022
0213cd6
Merge branch 'main' into hexagon-instr-profiling
jverma-quic Oct 13, 2022
8b8e3b9
Address review comments
jverma-quic Oct 13, 2022
2b6b15d
Ignore profile builtins if llvm version < 15.0
jverma-quic Oct 14, 2022
470d047
Merge branch 'apache:main' into hexagon-instr-profiling
jverma-quic Oct 18, 2022
b9bd7c3
Add src/runtime/hexagon/profiler/lwp_handler.S to allowed list
jverma-quic Oct 18, 2022
9ab2ccd
Address reformatting issues
jverma-quic Oct 18, 2022
fd52d05
Fix pylint errors
jverma-quic Oct 19, 2022
1754d2d
Merge branch 'main' into hexagon-instr-profiling
jverma-quic Oct 19, 2022
39204ae
Address remaining linter failures
jverma-quic Oct 19, 2022
d1aae57
clang-format issue
jverma-quic Oct 19, 2022
c93cc23
Fix builtin names
jverma-quic Oct 19, 2022
a34689f
Merge branch 'apache:main' into hexagon-instr-profiling
jverma-quic Oct 20, 2022
514f8cf
Resolve test failure for the simulator run
jverma-quic Oct 20, 2022
0960de6
Merge branch 'apache:main' into hexagon-instr-profiling
jverma-quic Oct 20, 2022
80353b3
Allow for the tests to provide .so name
jverma-quic Oct 20, 2022
e7f708c
Merge branch 'apache:main' into hexagon-instr-profiling
jverma-quic Oct 21, 2022
774f61b
Merge branch 'apache:main' into hexagon-instr-profiling
jverma-quic Oct 25, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions apps/hexagon_launcher/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,46 @@ lowered = tvm.relay.build(
lowered.export_library("model-aot.so", tvm.contrib.hexagon.link_shared)
```


## Profiling using hexagon launcher

### Enabling lightweight profiling (LWP) instrumentation

This profiling option can be used to get function and loop level processor cycles.
This needs to be enabled explicitly while compiling a model. For example:

```
with tvm.transform.PassContext(config={'tir.instrument_lwp':True} ):
lib = relay.build(...)
```

Here, `instrument_lwp` is used to enable the tir pass which instruments the code with the builtin calls.

During codegen, profiling builtin calls can be replaced with a target specific handler to record runtime
information into a buffer. This buffer is written into a JSON file which is processed to construct
function and loop level profiling information.

To generate LWP JSON file, add `--gen_lwp_json` flag to launcher_android:

```
./launcher_android --in_config input.json --out_config output.json --gen_lwp_json
```

Please note that `--gen_lwp_json` flag by itself doesn't enable profiling and is only used to dump
the profiling data into a json file called lwp.json. This file will be created at the same location
on the device where launcher_android is executed from. To generate the data, profiling instrumentation
must be enabled while compiling a model as mentioned above.

Use this command to pull `lwp.json` from the device:

```
adb -s <DEVICE-ID> pull /path/to/lwp.json
```

**Note:** Please refer to src/runtime/hexagon/profiler/README.md for information on how
to enable profiling using Hexagon RPC launcher and also to learn about additional profiling related
config options.

# Disclaimer

The launcher does not perform any correctness verification. In order to verify
Expand Down
5 changes: 4 additions & 1 deletion apps/hexagon_launcher/cmake/hexagon/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.

cmake_minimum_required(VERSION 3.2)
project(HexagonLauncherRPCSkel C CXX)
project(HexagonLauncherRPCSkel C CXX ASM)

include("${CMAKE_CURRENT_SOURCE_DIR}/../HexagonLauncher.cmake")
# From the include above get
Expand Down Expand Up @@ -68,11 +68,14 @@ set(SKEL_SRCS
"${LAUNCHER_SRC}/launcher_core.cc"
"${LAUNCHER_SRC}/launcher_hexagon.cc"
)
set(PROFILER_DIR "${TVM_SOURCE_DIR}/src/runtime/hexagon/profiler")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
set(PROFILER_DIR "${TVM_SOURCE_DIR}/src/runtime/hexagon/profiler")
set(HEXAGON_PROFILER_DIR "${TVM_SOURCE_DIR}/src/runtime/hexagon/profiler")


add_library(launcher_rpc_skel SHARED
"${LAUNCHER_RPC_H}"
"${LAUNCHER_RPC_SKEL_C}"
"${SKEL_SRCS}"
"${PROFILER_DIR}/prof_utils.cc"
"${PROFILER_DIR}/lwp_handler.S"
)

ExternalProject_Add(static_hexagon_tvm_runtime
Expand Down
9 changes: 5 additions & 4 deletions apps/hexagon_launcher/launcher_android.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ AEEResult set_remote_stack_size(int size) {
}

struct RPCChannel : public ExecutionSession {
explicit RPCChannel(const std::string& uri) {
explicit RPCChannel(const std::string& uri, bool gen_lwp_json = false)
: ExecutionSession(gen_lwp_json) {
enable_unsigned_pd(true);
set_remote_stack_size(128 * 1024);

Expand Down Expand Up @@ -127,7 +128,7 @@ struct RPCChannel : public ExecutionSession {
}

bool run(uint64_t* pcycles, uint64_t* usecs) override {
AEEResult rc = launcher_rpc_run(handle, pcycles, usecs);
AEEResult rc = launcher_rpc_run(handle, pcycles, usecs, gen_lwp_json);
if (rc != AEE_SUCCESS) {
std::cout << "error running model: " << std::hex << rc << '\n';
}
Expand Down Expand Up @@ -158,8 +159,8 @@ struct RPCChannel : public ExecutionSession {
std::vector<void*> allocations;
};

ExecutionSession* create_execution_session() {
auto* session = new RPCChannel(launcher_rpc_URI CDSP_DOMAIN);
ExecutionSession* create_execution_session(bool gen_lwp_json) {
auto* session = new RPCChannel(launcher_rpc_URI CDSP_DOMAIN, gen_lwp_json);
if (session->handle == -1) {
delete session;
session = nullptr;
Expand Down
3 changes: 3 additions & 0 deletions apps/hexagon_launcher/launcher_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ struct Model {
};

struct ExecutionSession {
explicit ExecutionSession(bool lwp_json = false) : gen_lwp_json(lwp_json) {}

template <typename T>
T* alloc(size_t bytes, size_t align = 1) {
return reinterpret_cast<T*>(alloc_mem(bytes, align));
Expand All @@ -111,6 +113,7 @@ struct ExecutionSession {
virtual bool get_num_outputs(int* num_outputs) = 0;
virtual bool get_output(int output_idx, tensor_meta* output_meta, int meta_size,
void* output_data, int data_size) = 0;
bool gen_lwp_json = false;
};

bool read_model_config(const std::string& file_name, ModelConfig* model_config);
Expand Down
10 changes: 9 additions & 1 deletion apps/hexagon_launcher/launcher_hexagon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ extern "C" {
#include "launcher_rpc.h"

static std::unique_ptr<Model> TheModel;
bool WriteLWPOutput(const std::string&);

static AEEResult error_too_small(const std::string& func_name, const std::string& value_name,
int given, int needed) {
Expand Down Expand Up @@ -203,7 +204,7 @@ AEEResult __QAIC_HEADER(launcher_rpc_get_output)(remote_handle64 handle, int out
}

AEEResult __QAIC_HEADER(launcher_rpc_run)(remote_handle64 handle, uint64_t* pcycles,
uint64_t* usecs) {
uint64_t* usecs, int gen_lwp_json) {
if (!TheModel) {
// No model created.
LOG(ERROR) << __func__ << ": no model created";
Expand All @@ -220,5 +221,12 @@ AEEResult __QAIC_HEADER(launcher_rpc_run)(remote_handle64 handle, uint64_t* pcyc
*pcycles = pc_end - pc_begin;
*usecs = us_end - us_begin;

if (gen_lwp_json) {
if (!WriteLWPOutput("lwp.json")) {
LOG(ERROR) << "ERROR: failed to generate lwp json file";
return AEE_EFAILED;
}
}

return AEE_SUCCESS;
}
14 changes: 10 additions & 4 deletions apps/hexagon_launcher/launcher_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,14 @@
#include "launcher_core.h"
#include "launcher_util.h"

ExecutionSession* create_execution_session();
ExecutionSession* create_execution_session(bool gen_lwp_json);

int parse_command_line(int argc, char* argv[], std::string* in_path, std::string* out_path) {
int parse_command_line(int argc, char* argv[], std::string* in_path, std::string* out_path,
bool* gen_lwp_json) {
static option long_options[] = {
{"in_config", required_argument, nullptr, 0},
{"out_config", required_argument, nullptr, 0},
{"gen_lwp_json", optional_argument, nullptr, 0},
};

bool show_usage = false;
Expand All @@ -49,6 +51,9 @@ int parse_command_line(int argc, char* argv[], std::string* in_path, std::string
case 1:
*out_path = std::string(optarg);
break;
case 2:
*gen_lwp_json = true;
break;
}
}
if (in_path->empty() || out_path->empty() || show_usage) {
Expand All @@ -61,7 +66,8 @@ int parse_command_line(int argc, char* argv[], std::string* in_path, std::string

int main(int argc, char* argv[]) {
std::string in_path, out_path;
if (parse_command_line(argc, argv, &in_path, &out_path) != 0) {
bool gen_lwp_json;
if (parse_command_line(argc, argv, &in_path, &out_path, &gen_lwp_json) != 0) {
return 1;
}

Expand All @@ -70,7 +76,7 @@ int main(int argc, char* argv[]) {
return 1;
}

ExecutionSession* session_ptr = create_execution_session();
ExecutionSession* session_ptr = create_execution_session(gen_lwp_json);
if (session_ptr == nullptr) {
return 1;
}
Expand Down
2 changes: 1 addition & 1 deletion apps/hexagon_launcher/launcher_rpc.idl
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ interface launcher_rpc : remote_handle64 {
AEEResult set_input(in long input_idx, in buffer input_meta, in buffer input_value);
AEEResult get_num_outputs(rout long num_outputs);
AEEResult get_output(in long output_idx, rout buffer output_meta, rout buffer output_value);
AEEResult run(rout uint64_t pcycles, rout uint64_t usecs);
AEEResult run(rout uint64_t pcycles, rout uint64_t usecs, in long gen_lwp_json);
};
6 changes: 6 additions & 0 deletions cmake/modules/Hexagon.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -281,10 +281,14 @@ if(USE_HEXAGON_RPC)
# TODO(masahi): Remove rpc_local_session.cc after verifying that things work without it
"${TVMRT_SOURCE_DIR}/rpc/rpc_local_session.cc"
)
set(HEXAGON_PROFILER_DIR "${TVMRT_SOURCE_DIR}/hexagon/profiler")
# Add the hardware-specific RPC code into the skel library.
set_property(SOURCE ${HEXAGON_PROFILER_DIR}/lwp_handler.S PROPERTY LANGUAGE C)
add_library(hexagon_rpc_skel SHARED
"${TVMRT_SOURCE_DIR}/hexagon/rpc/hexagon/rpc_server.cc"
"${TVMRT_SOURCE_DIR}/hexagon/rpc/hexagon_rpc_skel.c"
"${HEXAGON_PROFILER_DIR}/prof_utils.cc"
"${HEXAGON_PROFILER_DIR}/lwp_handler.S"
)
target_include_directories(hexagon_rpc_skel
SYSTEM PRIVATE "${TVMRT_SOURCE_DIR}/hexagon/rpc"
Expand All @@ -293,6 +297,8 @@ if(USE_HEXAGON_RPC)
# executed via run_main_on_sim.
add_library(hexagon_rpc_sim SHARED
"${TVMRT_SOURCE_DIR}/hexagon/rpc/simulator/rpc_server.cc"
"${HEXAGON_PROFILER_DIR}/prof_utils.cc"
"${HEXAGON_PROFILER_DIR}/lwp_handler.S"
)
target_link_libraries(hexagon_rpc_sim
-Wl,--whole-archive tvm_runtime -Wl,--no-whole-archive
Expand Down
6 changes: 6 additions & 0 deletions cmake/modules/HexagonSDK.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,15 @@ function(_get_hexagon_sdk_property_impl
if(_property STREQUAL "SDK_INCLUDE")
set(_dirs "${_hexagon_sdk_root}/incs" "${_hexagon_sdk_root}/incs/stddef")
elseif(_property STREQUAL "QURT_INCLUDE")
# Set the Hexagon arch directory for runtime linker.
set(_rtld_dir "hexagon_toolv84_${_hexagon_arch}")
if(_hexagon_arch STREQUAL "v69")
set(_rtld_dir "hexagon_toolv84_v68") # Use hexagon_toolv84_v68 for v69
endif()
set(_dirs
"${_hexagon_sdk_root}/rtos/qurt/${_hexarch_dir}/include/posix"
"${_hexagon_sdk_root}/rtos/qurt/${_hexarch_dir}/include/qurt"
"${_hexagon_sdk_root}/ipc/fastrpc/rtld/ship/${_rtld_dir}"
)
elseif(_property STREQUAL "QURT_LIB")
set(_dirs "${_hexagon_sdk_root}/rtos/qurt/${_hexarch_dir}/lib/pic")
Expand Down
10 changes: 10 additions & 0 deletions include/tvm/tir/builtin.h
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,16 @@ TVM_DLL const Op& assume();
*/
TVM_DLL const Op& undef();

/*!
* \brief Profiling intrinsic
*/
TVM_DLL const Op& start_profile_intrinsic();

/*!
* \brief Profiling intrinsic
*/
TVM_DLL const Op& end_profile_intrinsic();

/*! \brief The kind of structure field info used in intrinsic */
enum TVMStructFieldKind : int {
// array head address
Expand Down
6 changes: 6 additions & 0 deletions include/tvm/tir/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,12 @@ TVM_DLL Pass RemoveWeightLayoutRewriteBlock(bool skip_ndarray_rewrite = false);
*/
TVM_DLL Pass ManifestSharedMemoryLocalStage();

/*!
* \brief Insert intrinsic calls to instrument function and loop level profiling.
* \return The pass.
*/
TVM_DLL Pass InstrumentProfileIntrinsics();

} // namespace transform
} // namespace tir
} // namespace tvm
Expand Down
71 changes: 71 additions & 0 deletions python/tvm/contrib/hexagon/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from typing import Union

import tvm
from tvm.contrib.hexagon.hexagon_profiler import HexagonProfiler
from ..._ffi import libinfo
from .session import Session

Expand Down Expand Up @@ -336,6 +337,29 @@ def get_graph_debug_executor(
"""
return session.get_graph_debug_executor(graph_json, module, dump_root=dump_root)

@abc.abstractmethod
def get_profile_output(
self,
hex_profiler: HexagonProfiler,
session: Session,
) -> str:
"""Extract profile output.

Parameters
----------
hex_profiler : HexagonProfiler
HexagonProfiler object that contains the profiling related information.
session : Session
Remote session. The session must be established (via __enter__)
prior to calling this function.

Returns
-------
profile_data : str
Path of the profiling data file
"""
...


class HexagonLauncherAndroid(HexagonLauncherRPC):
"""Hexagon Launcher for Android."""
Expand Down Expand Up @@ -392,6 +416,7 @@ def _copy_to_remote(
self, local_path: Union[str, pathlib.Path], remote_path: Union[str, pathlib.Path]
):
"""Abstract method implementation. See description in HexagonLauncherRPC."""

_check_call_verbose(self._adb_device_sub_cmd + ["push", str(local_path), str(remote_path)])

def _create_remote_directory(self, remote_path: Union[str, pathlib.Path]) -> pathlib.Path:
Expand Down Expand Up @@ -629,6 +654,32 @@ def stop_server(self):
if not self._hexagon_debug:
self.cleanup_directory()

def get_profile_output(
self,
hex_profiler: HexagonProfiler,
session: Session,
):
"""Abstract method implementation. See description in HexagonLauncherRPC."""
profile_data = ""
if hex_profiler.is_lwp_enabled():
temp_dir = hex_profiler.get_temp_dir()
remote_path = hex_profiler.get_remote_path()
if not temp_dir:
raise RuntimeError("tempdir not passed")
fname = "lwp.json"
out_path = os.path.join(remote_path, fname)
profile_data = temp_dir.relpath(fname)
ret = session.get_profile_output(hex_profiler.get_mode(), fname)
if ret:
subprocess.check_call(self._adb_device_sub_cmd + ["pull", out_path, profile_data])
else:
raise RuntimeError("Error generating profile output")
elif hex_profiler.profiling_mode == "etm":
hex_profiler.pull_files_for_etm_processing(self._workspace)
else:
raise RuntimeError("Profiling not enabled")
return profile_data


class HexagonLauncherSimulator(HexagonLauncherRPC):
"""Hexagon Launcher for Hexagon simulator."""
Expand Down Expand Up @@ -735,6 +786,26 @@ def stop_server(self):
"""Abstract method implementation. See description in HexagonLauncherRPC."""
self._server_process.terminate()

def get_profile_output(
self,
hex_profiler: HexagonProfiler,
session: Session,
):
"""Abstract method implementation. See description in HexagonLauncherRPC."""
profile_data = ""
if hex_profiler.is_lwp_enabled():
fname = "lwp.json"
profile_data = f"{self._workspace}/{fname}"
ret = session.get_profile_output(hex_profiler.get_mode(), fname)
if not ret:
raise RuntimeError("Error generating profile output")
elif hex_profiler.profiling_mode == "etm":
raise RuntimeError("ETM Profiling not supported on the simulator")
else:
raise RuntimeError("Profiling not enabled")

return profile_data


# https://stackoverflow.com/a/52872579/2689797
def _is_port_in_use(port: int) -> bool:
Expand Down
Loading