From b4e5960bb4f804e42c0abe60045621ae1d075f27 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 24 Jun 2019 12:00:45 -0500 Subject: [PATCH] [RUNTIME] Initial implementation of Hexagon runtime support This Contribution is being provided by Qualcomm Innovation Center, Inc. See NOTICE file for more details. --- CMakeLists.txt | 3 + NOTICE | 14 + cmake/config.cmake | 3 + cmake/modules/Hexagon.cmake | 104 ++++ include/tvm/runtime/c_runtime_api.h | 1 + include/tvm/runtime/device_api.h | 5 +- python/tvm/__init__.py | 2 +- python/tvm/_ffi/runtime_ctypes.py | 2 + python/tvm/ndarray.py | 15 + python/tvm/rpc/client.py | 4 + python/tvm/target.py | 12 + src/codegen/build_module.cc | 7 +- .../device/fastrpc/tvm_hexagon_remote.h | 137 ++++++ .../device/fastrpc/tvm_hexagon_remote_nd.h | 101 ++++ .../hexagon/device/hexagon_device_log.h | 55 +++ .../hexagon/device/hexagon_device_target.cc | 456 +++++++++++++++++ .../hexagon/device/hexagon_dsprpcapi.cc | 113 +++++ .../hexagon/device/hexagon_dsprpcapi.h | 206 ++++++++ src/runtime/hexagon/device/hexagon_stubapi.cc | 112 +++++ src/runtime/hexagon/device/hexagon_stubapi.h | 278 +++++++++++ src/runtime/hexagon/hexagon_device_api.cc | 173 +++++++ src/runtime/hexagon/hexagon_module.cc | 343 +++++++++++++ src/runtime/hexagon/hexagon_module.h | 152 ++++++ src/runtime/hexagon/sim/driver/sim_device.cc | 422 ++++++++++++++++ src/runtime/hexagon/sim/hexagon_device_sim.cc | 461 ++++++++++++++++++ src/runtime/hexagon/sim/hexagon_sim_proto.h | 88 ++++ 26 files changed, 3264 insertions(+), 5 deletions(-) create mode 100644 cmake/modules/Hexagon.cmake create mode 100644 src/runtime/hexagon/device/fastrpc/tvm_hexagon_remote.h create mode 100644 src/runtime/hexagon/device/fastrpc/tvm_hexagon_remote_nd.h create mode 100644 src/runtime/hexagon/device/hexagon_device_log.h create mode 100644 src/runtime/hexagon/device/hexagon_device_target.cc create mode 100644 src/runtime/hexagon/device/hexagon_dsprpcapi.cc create mode 100644 src/runtime/hexagon/device/hexagon_dsprpcapi.h create mode 100644 src/runtime/hexagon/device/hexagon_stubapi.cc create mode 100644 src/runtime/hexagon/device/hexagon_stubapi.h create mode 100644 src/runtime/hexagon/hexagon_device_api.cc create mode 100644 src/runtime/hexagon/hexagon_module.cc create mode 100644 src/runtime/hexagon/hexagon_module.h create mode 100644 src/runtime/hexagon/sim/driver/sim_device.cc create mode 100644 src/runtime/hexagon/sim/hexagon_device_sim.cc create mode 100644 src/runtime/hexagon/sim/hexagon_sim_proto.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 6500ba013e28..3dcc084e9ac1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,8 @@ tvm_option(USE_OPENGL "Build with OpenGL" OFF) tvm_option(USE_METAL "Build with Metal" OFF) tvm_option(USE_ROCM "Build with ROCM" OFF) tvm_option(ROCM_PATH "The path to rocm" /opt/rocm) +tvm_option(USE_HEXAGON "Build with Hexagon" OFF) +tvm_option(USE_HEXAGON_SDK "Path to the Hexagon SDK root (required for Hexagon support)" /path/to/sdk) tvm_option(USE_RPC "Build with RPC" ON) tvm_option(USE_LLVM "Build with LLVM, can be set to specific llvm-config path" OFF) tvm_option(USE_STACKVM_RUNTIME "Include stackvm into the runtime" OFF) @@ -201,6 +203,7 @@ endif(USE_GRAPH_RUNTIME) # Module rules include(cmake/modules/VTA.cmake) include(cmake/modules/CUDA.cmake) +include(cmake/modules/Hexagon.cmake) include(cmake/modules/OpenCL.cmake) include(cmake/modules/OpenGL.cmake) include(cmake/modules/Vulkan.cmake) diff --git a/NOTICE b/NOTICE index 45468c50ba1b..28fe178033ec 100644 --- a/NOTICE +++ b/NOTICE @@ -1 +1,15 @@ TVM End to End Deep Learning Compiler Stack: https://tvm.ai/ + +This product includes contributions provided by Qualcomm Technologies, Inc., +a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., +a California corporation, under certain additional terms and conditions +pursuant to Section 5 of the Apache 2.0 license. In this regard, with +respect to these Contributions, the term "Work" in Section 1 of the +Apache 2.0 license means only the specific subdirectory within the TVM repo +(currently at https://github.com/dmlc/tvm) to which these Contribution were +made. +In any case, these submissions are "Not a Contribution" with respect to its +permitted use with any of the "vta" and "verilog" subdirectories in the TVM +repo. +Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain +copyright of their respective Contributions. diff --git a/cmake/config.cmake b/cmake/config.cmake index 6239bc4e6dce..98d8311f37c6 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -135,3 +135,6 @@ set(USE_ANTLR OFF) # Whether use Relay debug mode set(USE_RELAY_DEBUG OFF) +# Whether to use hexagon device +set(USE_HEXAGON OFF) +set(USE_HEXAGON_SDK /path/to/sdk) diff --git a/cmake/modules/Hexagon.cmake b/cmake/modules/Hexagon.cmake new file mode 100644 index 000000000000..bac376683068 --- /dev/null +++ b/cmake/modules/Hexagon.cmake @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This Contribution is being provided by Qualcomm Technologies, Inc., +# a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., +# a California corporation, under certain additional terms and conditions +# pursuant to Section 5 of the Apache 2.0 license. In this regard, with +# respect to this Contribution, the term "Work" in Section 1 of the +# Apache 2.0 license means only the specific subdirectory within the TVM repo +# (currently at https://github.com/dmlc/tvm) to which this Contribution is +# made. +# In any case, this submission is "Not a Contribution" with respect to its +# permitted use with any of the "vta" and "verilog" subdirectories in the TVM +# repo. +# Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain +# copyright of their respective Contributions. + +function(find_hexagon_toolchain) + if (NOT "${USE_HEXAGON_TOOLCHAIN}" STREQUAL "") + set(TRY_PATH "${USE_HEXAGON_TOOLCHAIN}") + else() + set(TRY_PATH "${USE_HEXAGON_SDK}") + endif() + message(STATUS "Looking for Hexagon toolchain in ${TRY_PATH}") + file(GLOB_RECURSE HEXAGON_CLANG "${TRY_PATH}/*/hexagon-clang++") + if(HEXAGON_CLANG) + # The path is ${HEXAGON_TOOLCHAIN}/bin/hexagon-clang++. + get_filename_component(HEXAGON_TMP0 "${HEXAGON_CLANG}" DIRECTORY) + get_filename_component(HEXAGON_TMP1 "${HEXAGON_TMP0}" DIRECTORY) + set(HEXAGON_TOOLCHAIN "${HEXAGON_TMP1}" CACHE PATH + "Path to the Hexagon toolchain") + else(HEXAGON_CLANG) + message(SEND_ERROR "Cannot find Hexagon toolchain in ${TRY_PATH}") + endif() +endfunction() + +function(find_hexagon_sdk_root) + message(STATUS "Checking Hexagon SDK root: ${USE_HEXAGON_SDK}") + file(GLOB_RECURSE HEXAGON_AEESTDDEF "${USE_HEXAGON_SDK}/*/AEEStdDef.h") + if(HEXAGON_AEESTDDEF) + # The path is ${HEXAGON_SDK_ROOT}/incs/stddef/AEEStdDef.h. + get_filename_component(HEXAGON_TMP0 "${HEXAGON_AEESTDDEF}" DIRECTORY) + get_filename_component(HEXAGON_TMP1 "${HEXAGON_TMP0}" DIRECTORY) + get_filename_component(HEXAGON_TMP2 "${HEXAGON_TMP1}" DIRECTORY) + set(HEXAGON_SDK_ROOT "${HEXAGON_TMP2}" CACHE PATH + "Root directory of Hexagon SDK") + else(HEXAGON_AEESTDDEF) + message(SEND_ERROR "Cannot validate Hexagon SDK in ${USE_HEXAGON_SDK}") + endif() +endfunction() + +if(USE_HEXAGON STREQUAL "OFF") + return() +elseif(NOT USE_HEXAGON STREQUAL "sim" AND + NOT USE_HEXAGON STREQUAL "device") + message(SEND_ERROR "USE_HEXAGON must be one of [OFF|sim|device]") + return() +endif() +# If USE_HEXAGON is set to a valid value, make sure that USE_HEXAGON_SDK +# is defined. +if (NOT USE_HEXAGON_SDK) + message(SEND_ERROR "Please set USE_HEXAGON_SDK to the Hexagon SDK root") + return() +endif() + +if(USE_HEXAGON STREQUAL "sim") + find_hexagon_toolchain() + message(STATUS "Hexagon toolchain: ${HEXAGON_TOOLCHAIN}") + add_definitions("-DHEXAGON_TOOLCHAIN=\"${HEXAGON_TOOLCHAIN}\"") + file(GLOB RUNTIME_HEXAGON_SIM_SRCS src/runtime/hexagon/sim/*.cc) + include_directories("${HEXAGON_TOOLCHAIN}/include/iss") + link_directories("${HEXAGON_TOOLCHAIN}/lib/iss") + list(APPEND TVM_RUNTIME_LINKER_LIBS "-lwrapper") +elseif(USE_HEXAGON STREQUAL "device") + find_hexagon_sdk_root() + find_hexagon_toolchain() + message(STATUS "Hexagon SDK: ${HEXAGON_SDK_ROOT}") + add_definitions("-DHEXAGON_TOOLCHAIN=\"${HEXAGON_TOOLCHAIN}\"") + file(GLOB RUNTIME_HEXAGON_DEVICE_SRCS src/runtime/hexagon/device/*.cc) + include_directories("${HEXAGON_SDK_ROOT}/incs/stddef") + include_directories("${HEXAGON_SDK_ROOT}/libs/common/rpcmem/inc") + include_directories("${HEXAGON_SDK_ROOT}/libs/common/remote/ship") + include_directories("${HEXAGON_TOOLCHAIN}/include/iss") + list(APPEND TVM_RUNTIME_LINKER_LIBS "-ldl") +endif() + +add_definitions("-DDMLC_LOG_FATAL_THROW=0") +file(GLOB RUNTIME_HEXAGON_SRCS src/runtime/hexagon/*.cc) +list(APPEND RUNTIME_SRCS ${RUNTIME_HEXAGON_SRCS} ${RUNTIME_HEXAGON_SIM_SRCS} + ${RUNTIME_HEXAGON_DEVICE_SRCS}) diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h index ba2c0d2291b6..c1479415ac3f 100644 --- a/include/tvm/runtime/c_runtime_api.h +++ b/include/tvm/runtime/c_runtime_api.h @@ -82,6 +82,7 @@ typedef enum { kDLSDAccel = 6, kOpenGL = 11, // AddExtraTVMType which is not in DLPack here + kDLHexagon = 13, } TVMDeviceExtType; /*! diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h index 6986e62475fd..8319675b06d5 100644 --- a/include/tvm/runtime/device_api.h +++ b/include/tvm/runtime/device_api.h @@ -46,10 +46,10 @@ enum DeviceAttrKind : int { }; /*! \brief Number of bytes each allocation must align to */ -constexpr int kAllocAlignment = 64; +constexpr int kAllocAlignment = 128; /*! \brief Number of bytes each allocation must align to in temporary allocation */ -constexpr int kTempAllocaAlignment = 64; +constexpr int kTempAllocaAlignment = 128; /*! \brief Maximum size that can be allocated on stack */ constexpr int kMaxStackAlloca = 1024; @@ -215,6 +215,7 @@ inline const char* DeviceName(int type) { case kDLROCM: return "rocm"; case kOpenGL: return "opengl"; case kDLExtDev: return "ext_dev"; + case kDLHexagon: return "hexagon"; default: LOG(FATAL) << "unknown type =" << type; return "Unknown"; } } diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py index 5765eed0ad8b..539449ff31a8 100644 --- a/python/tvm/__init__.py +++ b/python/tvm/__init__.py @@ -42,7 +42,7 @@ from . import ndarray as nd from .ndarray import context, cpu, gpu, opencl, cl, vulkan, metal, mtl -from .ndarray import vpi, rocm, opengl, ext_dev +from .ndarray import vpi, rocm, opengl, ext_dev, hexagon from ._ffi.runtime_ctypes import TypeCode, TVMType from ._ffi.ndarray import TVMContext diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py index 72cff1a10ead..e6dd1055d989 100644 --- a/python/tvm/_ffi/runtime_ctypes.py +++ b/python/tvm/_ffi/runtime_ctypes.py @@ -143,6 +143,7 @@ class TVMContext(ctypes.Structure): 10: 'rocm', 11: 'opengl', 12: 'ext_dev', + 13: 'hexagon', } STR2MASK = { 'llvm': 1, @@ -163,6 +164,7 @@ class TVMContext(ctypes.Structure): 'rocm': 10, 'opengl': 11, 'ext_dev': 12, + 'hexagon': 13, } def __init__(self, device_type, device_id): super(TVMContext, self).__init__() diff --git a/python/tvm/ndarray.py b/python/tvm/ndarray.py index e6c911576e64..c4b4fc341931 100644 --- a/python/tvm/ndarray.py +++ b/python/tvm/ndarray.py @@ -188,6 +188,21 @@ def ext_dev(dev_id=0): """ return TVMContext(12, dev_id) +def hexagon(dev_id=0): + """Construct a Hexagon device + + Parameters + ---------- + dev_id : int, optional + The integer device id + + Returns + ------- + ctx : TVMContext + The created context + """ + return TVMContext(13, dev_id) + cl = opencl mtl = metal diff --git a/python/tvm/rpc/client.py b/python/tvm/rpc/client.py index 9c0dea5b0863..d11997cfd610 100644 --- a/python/tvm/rpc/client.py +++ b/python/tvm/rpc/client.py @@ -175,6 +175,10 @@ def ext_dev(self, dev_id=0): """Construct extension device.""" return self.context(12, dev_id) + def hexagon(self, dev_id=0): + """Construct extension device.""" + return self.context(13, dev_id) + class LocalSession(RPCSession): """RPCSession interface backed by local environment. diff --git a/python/tvm/target.py b/python/tvm/target.py index 4548ffac4c88..ba48d20901e2 100644 --- a/python/tvm/target.py +++ b/python/tvm/target.py @@ -495,6 +495,18 @@ def vta(model='unknown', options=None): ret = _api_internal._TargetCreate("ext_dev", *opts) return ret +def hexagon(options=None): + """Returns a Hexagon target. + + Parameters + ---------- + options : str or list of str + Additional options + """ + # No extra options, but pass an empty list instead of None to avoid + # an assertion in packed_func.h in conversion to std::string(). + opts = _merge_opts([], options) + return _api_internal._TargetCreate("hexagon", *opts) def create(target_str): """Get a target given target string. diff --git a/src/codegen/build_module.cc b/src/codegen/build_module.cc index 6917200ff920..461119e9483b 100644 --- a/src/codegen/build_module.cc +++ b/src/codegen/build_module.cc @@ -44,8 +44,8 @@ TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable) /*! * \brief Construct a Target node from the given name and options. * \param target_name The major target name. Should be one of -* {"aocl", "aocl_sw_emu", "c", "cuda", "ext_dev", "hybrid", "llvm", "metal", -* "nvptx", "opencl", "opengl", "rocm", "sdaccel", "stackvm", "vulkan"} +* {"aocl", "aocl_sw_emu", "c", "cuda", "ext_dev", "hexagon", "hybrid", "llvm", +* "metal","nvptx", "opencl", "opengl", "rocm", "sdaccel", "stackvm", "vulkan"} * \param options Additional options appended to the target * \return The constructed Target */ @@ -132,6 +132,9 @@ Target CreateTarget(const std::string& target_name, t->device_type = kDLExtDev; } else if (target_name == "hybrid") { t->device_type = kDLCPU; + } else if (target_name == "hexagon") { + t->keys_array.push_back(ir::StringImm::make("hexagon")); + t->device_type = kDLHexagon; } else { LOG(ERROR) << "Unknown target name " << target_name; return target::stackvm(); diff --git a/src/runtime/hexagon/device/fastrpc/tvm_hexagon_remote.h b/src/runtime/hexagon/device/fastrpc/tvm_hexagon_remote.h new file mode 100644 index 000000000000..8d608be8bb67 --- /dev/null +++ b/src/runtime/hexagon/device/fastrpc/tvm_hexagon_remote.h @@ -0,0 +1,137 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * This Contribution is being provided by Qualcomm Technologies, Inc., + * a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., + * a California corporation, under certain additional terms and conditions + * pursuant to Section 5 of the Apache 2.0 license. In this regard, with + * respect to this Contribution, the term "Work" in Section 1 of the + * Apache 2.0 license means only the specific subdirectory within the TVM repo + * (currently at https://github.com/dmlc/tvm) to which this Contribution is + * made. + * In any case, this submission is "Not a Contribution" with respect to its + * permitted use with any of the "vta" and "verilog" subdirectories in the TVM + * repo. + * Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain + * copyright of their respective Contributions. + */ +#ifndef TVM_RUNTIME_HEXAGON_DEVICE_FASTRPC_TVM_HEXAGON_REMOTE_H_ +#define TVM_RUNTIME_HEXAGON_DEVICE_FASTRPC_TVM_HEXAGON_REMOTE_H_ +/// @file tvm_hexagon_remote.idl +/// IDL to offload TVM kernels to Hexagon from APPS for multi-domains +#include +#include +#ifndef __QAIC_HEADER +#define __QAIC_HEADER(ff) ff +#endif // __QAIC_HEADER + +#ifndef __QAIC_HEADER_EXPORT +#define __QAIC_HEADER_EXPORT +#endif // __QAIC_HEADER_EXPORT + +#ifndef __QAIC_HEADER_ATTRIBUTE +#define __QAIC_HEADER_ATTRIBUTE +#endif // __QAIC_HEADER_ATTRIBUTE + +#ifndef __QAIC_IMPL +#define __QAIC_IMPL(ff) ff +#endif // __QAIC_IMPL + +#ifndef __QAIC_IMPL_EXPORT +#define __QAIC_IMPL_EXPORT +#endif // __QAIC_IMPL_EXPORT + +#ifndef __QAIC_IMPL_ATTRIBUTE +#define __QAIC_IMPL_ATTRIBUTE +#endif // __QAIC_IMPL_ATTRIBUTE +#ifdef __cplusplus +extern "C" { +#endif +/** + * Opens the handle in the specified domain. If this is the first + * handle, this creates the session. Typically this means opening + * the device, aka open("/dev/adsprpc-smd"), then calling ioctl + * device APIs to create a PD on the DSP to execute our code in, + * then asking that PD to dlopen the .so and dlsym the skel function. + * + * @param uri, _URI"&_dom=aDSP" + * _URI is a QAIC generated uri, or + * "file:///?_skel_handle_invoke&_modver=1.0" + * If the _dom parameter is not present, _dom=DEFAULT is assumed + * but not forwarded. + * Reserved uri keys: + * [0]: first unamed argument is the skel invoke function + * _dom: execution domain name, _dom=mDSP/aDSP/DEFAULT + * _modver: module version, _modver=1.0 + * _*: any other key name starting with an _ is reserved + * Unknown uri keys/values are forwarded as is. + * @param h, resulting handle + * @retval, 0 on success + */ +__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_open)( + const char* uri, remote_handle64* h) __QAIC_HEADER_ATTRIBUTE; +/** + * Closes a handle. If this is the last handle to close, the session + * is closed as well, releasing all the allocated resources. + + * @param h, the handle to close + * @retval, 0 on success, should always succeed + */ +__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_close)( + remote_handle64 h) __QAIC_HEADER_ATTRIBUTE; +typedef struct _tvm_hexagon_remote_buffer__seq_octet + _tvm_hexagon_remote_buffer__seq_octet; +typedef _tvm_hexagon_remote_buffer__seq_octet tvm_hexagon_remote_buffer; +struct _tvm_hexagon_remote_buffer__seq_octet { + unsigned char* data; + int dataLen; +}; +typedef unsigned int tvm_hexagon_remote_handle_t; +typedef uint64 tvm_hexagon_remote_scalar_t; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_load_library)( + remote_handle64 _h, const char* soname, int sonameLen, const char* code, + int codeLen, + tvm_hexagon_remote_handle_t* module_ptr) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_get_symbol)( + remote_handle64 _h, tvm_hexagon_remote_handle_t module_ptr, + const char* name, int nameLen, + tvm_hexagon_remote_handle_t* sym_ptr) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_kernel)( + remote_handle64 _h, tvm_hexagon_remote_handle_t module_ptr, + tvm_hexagon_remote_handle_t symbol, int* scalar, int scalarLen, int* stack, + int stackLen, const tvm_hexagon_remote_buffer* scalar_in_octet, + int scalar_in_octetLen, tvm_hexagon_remote_buffer* scalar_out_octet, + int scalar_out_octetLen, const tvm_hexagon_remote_buffer* stack_in_octet, + int stack_in_octetLen, tvm_hexagon_remote_buffer* stack_out_octet, + int stack_out_octetLen, uint64* pcycles, + uint64* time_usec) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_release_library)( + remote_handle64 _h, + tvm_hexagon_remote_handle_t module_ptr) __QAIC_HEADER_ATTRIBUTE; +#ifndef tvm_hexagon_remote_URI +#define tvm_hexagon_remote_URI \ + "file:///" \ + "libtvm_hexagon_remote_skel.so?tvm_hexagon_remote_skel_handle_invoke&_" \ + "modver=1.0" +#endif /*tvm_hexagon_remote_URI*/ +#ifdef __cplusplus +} +#endif +#endif // TVM_RUNTIME_HEXAGON_DEVICE_FASTRPC_TVM_HEXAGON_REMOTE_H_ diff --git a/src/runtime/hexagon/device/fastrpc/tvm_hexagon_remote_nd.h b/src/runtime/hexagon/device/fastrpc/tvm_hexagon_remote_nd.h new file mode 100644 index 000000000000..8127d715dc19 --- /dev/null +++ b/src/runtime/hexagon/device/fastrpc/tvm_hexagon_remote_nd.h @@ -0,0 +1,101 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * This Contribution is being provided by Qualcomm Technologies, Inc., + * a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., + * a California corporation, under certain additional terms and conditions + * pursuant to Section 5 of the Apache 2.0 license. In this regard, with + * respect to this Contribution, the term "Work" in Section 1 of the + * Apache 2.0 license means only the specific subdirectory within the TVM repo + * (currently at https://github.com/dmlc/tvm) to which this Contribution is + * made. + * In any case, this submission is "Not a Contribution" with respect to its + * permitted use with any of the "vta" and "verilog" subdirectories in the TVM + * repo. + * Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain + * copyright of their respective Contributions. + */ +#ifndef TVM_RUNTIME_HEXAGON_DEVICE_FASTRPC_TVM_HEXAGON_REMOTE_ND_H_ +#define TVM_RUNTIME_HEXAGON_DEVICE_FASTRPC_TVM_HEXAGON_REMOTE_ND_H_ +/// @file tvm_hexagon_remote_nd.idl +/// IDL to offload TVM kernels to Hexagon from APPS for non-domains +#include +#include +#ifndef __QAIC_HEADER +#define __QAIC_HEADER(ff) ff +#endif // __QAIC_HEADER + +#ifndef __QAIC_HEADER_EXPORT +#define __QAIC_HEADER_EXPORT +#endif // __QAIC_HEADER_EXPORT + +#ifndef __QAIC_HEADER_ATTRIBUTE +#define __QAIC_HEADER_ATTRIBUTE +#endif // __QAIC_HEADER_ATTRIBUTE + +#ifndef __QAIC_IMPL +#define __QAIC_IMPL(ff) ff +#endif // __QAIC_IMPL + +#ifndef __QAIC_IMPL_EXPORT +#define __QAIC_IMPL_EXPORT +#endif // __QAIC_IMPL_EXPORT + +#ifndef __QAIC_IMPL_ATTRIBUTE +#define __QAIC_IMPL_ATTRIBUTE +#endif // __QAIC_IMPL_ATTRIBUTE +#ifdef __cplusplus +extern "C" { +#endif +typedef struct _tvm_hexagon_remote_nd_buffer__seq_octet + _tvm_hexagon_remote_nd_buffer__seq_octet; +typedef _tvm_hexagon_remote_nd_buffer__seq_octet tvm_hexagon_remote_nd_buffer; +struct _tvm_hexagon_remote_nd_buffer__seq_octet { + unsigned char* data; + int dataLen; +}; +typedef unsigned int tvm_hexagon_remote_nd_handle_t; +typedef uint64 tvm_hexagon_remote_nd_scalar_t; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_open)(void) + __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_close)(void) + __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_load_library)( + const char* soname, int sonameLen, const char* code, int codeLen, + tvm_hexagon_remote_nd_handle_t* module_ptr) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_get_symbol)( + tvm_hexagon_remote_nd_handle_t module_ptr, const char* name, int nameLen, + tvm_hexagon_remote_nd_handle_t* sym_ptr) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_kernel)( + tvm_hexagon_remote_nd_handle_t module_ptr, + tvm_hexagon_remote_nd_handle_t symbol, int* scalar, int scalarLen, + int* stack, int stackLen, + const tvm_hexagon_remote_nd_buffer* scalar_in_octet, + int scalar_in_octetLen, tvm_hexagon_remote_nd_buffer* scalar_out_octet, + int scalar_out_octetLen, + const tvm_hexagon_remote_nd_buffer* stack_in_octet, int stack_in_octetLen, + tvm_hexagon_remote_nd_buffer* stack_out_octet, int stack_out_octetLen, + uint64* pcycles, uint64* time_usec) __QAIC_HEADER_ATTRIBUTE; +__QAIC_HEADER_EXPORT int __QAIC_HEADER(tvm_hexagon_remote_nd_release_library)( + tvm_hexagon_remote_nd_handle_t module_ptr) __QAIC_HEADER_ATTRIBUTE; +#ifdef __cplusplus +} +#endif +#endif // TVM_RUNTIME_HEXAGON_DEVICE_FASTRPC_TVM_HEXAGON_REMOTE_ND_H_ diff --git a/src/runtime/hexagon/device/hexagon_device_log.h b/src/runtime/hexagon/device/hexagon_device_log.h new file mode 100644 index 000000000000..552ffb6446db --- /dev/null +++ b/src/runtime/hexagon/device/hexagon_device_log.h @@ -0,0 +1,55 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * This Contribution is being provided by Qualcomm Technologies, Inc., + * a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., + * a California corporation, under certain additional terms and conditions + * pursuant to Section 5 of the Apache 2.0 license. In this regard, with + * respect to this Contribution, the term "Work" in Section 1 of the + * Apache 2.0 license means only the specific subdirectory within the TVM repo + * (currently at https://github.com/dmlc/tvm) to which this Contribution is + * made. + * In any case, this submission is "Not a Contribution" with respect to its + * permitted use with any of the "vta" and "verilog" subdirectories in the TVM + * repo. + * Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain + * copyright of their respective Contributions. + */ +#ifndef TVM_RUNTIME_HEXAGON_DEVICE_HEXAGON_DEVICE_LOG_H_ +#define TVM_RUNTIME_HEXAGON_DEVICE_HEXAGON_DEVICE_LOG_H_ +#ifdef __ANDROID__ + +#include + +#define TVM_LOGV(...) \ + __android_log_print(ANDROID_LOG_VERBOSE, "TVM", ##__VA_ARGS__) +#define TVM_LOGD(...) \ + __android_log_print(ANDROID_LOG_DEBUG, "TVM", ##__VA_ARGS__) +#define TVM_LOGI(...) \ + __android_log_print(ANDROID_LOG_INFO, "TVM", ##__VA_ARGS__) +#define TVM_LOGW(...) \ + __android_log_print(ANDROID_LOG_WARN, "TVM", ##__VA_ARGS__) +#define TVM_LOGE(...) \ + __android_log_print(ANDROID_LOG_ERROR, "TVM", ##__VA_ARGS__) +#define TVM_LOGF(...) \ + __android_log_print(ANDROID_LOG_FATAL, "TVM", ##__VA_ARGS__) + +#endif // __ANDROID__ +#endif // TVM_RUNTIME_HEXAGON_DEVICE_HEXAGON_DEVICE_LOG_H_ diff --git a/src/runtime/hexagon/device/hexagon_device_target.cc b/src/runtime/hexagon/device/hexagon_device_target.cc new file mode 100644 index 000000000000..82929389a825 --- /dev/null +++ b/src/runtime/hexagon/device/hexagon_device_target.cc @@ -0,0 +1,456 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * This Contribution is being provided by Qualcomm Technologies, Inc., + * a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., + * a California corporation, under certain additional terms and conditions + * pursuant to Section 5 of the Apache 2.0 license. In this regard, with + * respect to this Contribution, the term "Work" in Section 1 of the + * Apache 2.0 license means only the specific subdirectory within the TVM repo + * (currently at https://github.com/dmlc/tvm) to which this Contribution is + * made. + * In any case, this submission is "Not a Contribution" with respect to its + * permitted use with any of the "vta" and "verilog" subdirectories in the TVM + * repo. + * Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain + * copyright of their respective Contributions. + */ +#ifdef __ANDROID__ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "../hexagon_module.h" +#include "fastrpc/tvm_hexagon_remote.h" +#include "hexagon_device_log.h" +#include "hexagon_dsprpcapi.h" +#include "hexagon_stubapi.h" + +#define RPCMEM_HEAP 25 + +// All log messages start with "HexagonTarget::%s", where %s is replaced +// with the function name, so create macros that add that to avoid repetition. +// The downside is that the format string must be given as a string literal, +// but it seems to be a minor issue. +#define VA_EXPANDER(...) , ##__VA_ARGS__ +#define TVM_LOGD_HT(fmt, ...) \ + TVM_LOGD("HexagonTarget::%s: " fmt, __func__ VA_EXPANDER(__VA_ARGS__)) +#define TVM_LOGE_HT(fmt, ...) \ + TVM_LOGE("HexagonTarget::%s: " fmt, __func__ VA_EXPANDER(__VA_ARGS__)) + +namespace tvm { +namespace runtime { +namespace hexagon { + +class HexagonTarget : public tvm::runtime::hexagon::Device { + public: + HexagonTarget() {} + ~HexagonTarget() final {} + void* Alloc(unsigned size, unsigned align) final; + void Free(void* ptr) final; + void CopyDeviceToDevice(void* dst, const void* src, unsigned len) final; + void CopyDeviceToHost(void* host_dst, const void* src, unsigned len) final; + void CopyHostToDevice(void* dst, const void* host_src, unsigned len) final; + void* Load(const std::string& data, const std::string& fmt) final; + void Unload(void* mod) final; + void* Resolve(const std::string& sym) final; + void Call(void* func, uint32_t* scalar, unsigned sc_num, uint32_t* stack, + unsigned st_num) final; + + private: + std::pair AddAddrMapping(const void* dsp_addr, + void* apps_addr, size_t size); + std::pair GetAppsAddr(const void* dsp_addr, bool exact) const; + void RemoveAddrMapping(const void* dsp_addr); + + int OpenDomainChannel(bool set_unsigned_pd); + int CloseDomainChannel(); + void ReleaseLibrary(); + + // Mapping from a DSP address to a pair . + // Using void* pointers is ok, since DSP pointers will always fit + // in apps's pointers, i.e. sizeof_dsp(void*) <= sizeof_apps(void*). + std::map> dsp_to_apps_; + remote_handle64 domain_channel_handle_ = AEE_EUNKNOWN; + tvm_hexagon_remote_handle_t module_pointer_ = AEE_EUNKNOWN; + uint64_t count_channel_open_ = 0; + // Global lock, used for all critical sections. This can be refined + // in the future. + mutable std::mutex crit_section_; +}; + +std::unique_ptr CreateHexagonTarget() { + // C++11 does not have std::make_unique. + return std::unique_ptr(new HexagonTarget()); +} + +std::pair HexagonTarget::AddAddrMapping(const void* dsp_addr, + void* apps_addr, + size_t size) { + crit_section_.lock(); + auto p = dsp_to_apps_.insert({dsp_addr, {apps_addr, size}}); + crit_section_.unlock(); + if (!p.second) { + TVM_LOGE_HT( + "failed to insert address mapping: dsp:%p -> apps:%p, size:%zu", + dsp_addr, apps_addr, size); + return std::make_pair(nullptr, 0); + } + TVM_LOGD_HT("added address mapping: dsp:%p -> apps:%p, size:%zu", dsp_addr, + apps_addr, size); + return p.first->second; +} + +void HexagonTarget::RemoveAddrMapping(const void* dsp_addr) { + crit_section_.lock(); + auto f = dsp_to_apps_.find(dsp_addr); + bool error = f == dsp_to_apps_.end(); + dsp_to_apps_.erase(f); + crit_section_.unlock(); + + if (error) { + TVM_LOGE_HT("failed to remove address mapping for dsp:%p", dsp_addr); + } +} + +std::pair HexagonTarget::GetAppsAddr(const void* dsp_addr, + bool exact) const { + struct AutoUnlock { + explicit AutoUnlock(std::mutex& m) : m(m) {} + ~AutoUnlock() { m.unlock(); } + std::mutex& m; + }; + + crit_section_.lock(); + AutoUnlock u(crit_section_); + + // If the address is in the map, simply return the result. + auto f = dsp_to_apps_.find(dsp_addr); + if (f != dsp_to_apps_.end()) return f->second; + // If exact mapping is requested, then it hasn't been found. + if (exact) return std::make_pair(nullptr, 0); + + // If the address is not in the map, maybe it points to somewhere in the + // interior of a mapped buffer. + uintptr_t dsp_v = reinterpret_cast(dsp_addr); + for (const auto& v : dsp_to_apps_) { + uintptr_t dsp_k = reinterpret_cast(v.first); + size_t size = v.second.second; + if (dsp_v >= dsp_k && dsp_v < dsp_k + size) { + uintptr_t apps_k = reinterpret_cast(v.second.first); + size_t offset = dsp_v - dsp_k; + uintptr_t apps_v = apps_k + offset; + return std::make_pair(reinterpret_cast(apps_v), size - offset); + } + } + TVM_LOGE_HT("failed to locate apps address for dsp:%p", dsp_addr); + return std::make_pair(nullptr, 0); +} + +int HexagonTarget::OpenDomainChannel(bool use_unsigned_pd) { + if (domain_channel_handle_ != AEE_EUNKNOWN) return AEE_SUCCESS; + + const DspRpcAPI* dsp_api = DspRpcAPI::Global(); + if (auto* init_p = dsp_api->rpcmem_init_ptr(true)) init_p(); + + if (use_unsigned_pd) { + if (auto* rsc_ptr = dsp_api->remote_session_control_ptr(true)) { + remote_rpc_control_unsigned_module data; + data.enable = 1; + data.domain = CDSP_DOMAIN_ID; + int rc = rsc_ptr(DSPRPC_CONTROL_UNSIGNED_MODULE, &data, sizeof(data)); + if (rc != AEE_SUCCESS) { + TVM_LOGD_HT("remote_session_control failed rc=0x%x for unsigned PD", + rc); + } + } else { + TVM_LOGD_HT("no unsigned PD support available"); + } + } + + const StubAPI* stub_api = StubAPI::Global(); + int rc = stub_api->tvm_hexagon_remote_open( + tvm_hexagon_remote_URI "&_dom=cdsp", &domain_channel_handle_); + if (rc != AEE_SUCCESS) { + TVM_LOGE_HT("failed to open channel rc=0x%x", rc); + } else { + count_channel_open_++; + TVM_LOGD_HT("channel open success and rpcmem_init done"); + } + return rc; +} + +int HexagonTarget::CloseDomainChannel() { + if (domain_channel_handle_ == AEE_EUNKNOWN) return AEE_SUCCESS; + + const DspRpcAPI* dsp_api = DspRpcAPI::Global(); + const StubAPI* stub_api = StubAPI::Global(); + + int rc = stub_api->tvm_hexagon_remote_close(domain_channel_handle_); + if (rc == AEE_SUCCESS) { + domain_channel_handle_ = AEE_EUNKNOWN; + if (auto* deinit_p = dsp_api->rpcmem_deinit_ptr(true)) deinit_p(); + TVM_LOGD_HT("channel close success and rpcmem_deinit done"); + } else { + TVM_LOGE_HT("failed to close domain channel rc=0x%x", rc); + } + return rc; +} + +void HexagonTarget::ReleaseLibrary() { + crit_section_.lock(); + if (module_pointer_ != AEE_EUNKNOWN) { + const StubAPI* stub_api = StubAPI::Global(); + int rc = stub_api->tvm_hexagon_remote_release_library( + domain_channel_handle_, module_pointer_); + if (rc != AEE_SUCCESS) { + TVM_LOGE_HT("failed to unload device library rc=0x%x", rc); + } else { + module_pointer_ = AEE_EUNKNOWN; + } + } + crit_section_.unlock(); +} + +void* HexagonTarget::Alloc(unsigned size, unsigned align) { + const DspRpcAPI* dsp_api = DspRpcAPI::Global(); + + // Opening the domain channel should be done once. + crit_section_.lock(); + int rc_oc = OpenDomainChannel(/*use_unsigned_pd*/ true); + crit_section_.unlock(); + if (rc_oc != AEE_SUCCESS) { + TVM_LOGE_HT("mem alloc failed: unable to open domain channel"); + return nullptr; + } + + void* mem = + dsp_api->rpcmem_alloc_ptr()(RPCMEM_HEAP, RPCMEM_DEFAULT_FLAGS, size); + if (mem == nullptr) { + TVM_LOGE_HT("mem alloc failed for size=0x%x alignment=0x%x", size, align); + return nullptr; + } + int mem_fd = dsp_api->rpcmem_to_fd_ptr()(mem); + uintptr_t dsp_va = 0; + int rc = dsp_api->remote_mmap64_ptr()( + mem_fd, 0, reinterpret_cast(mem), size, &dsp_va); + if (rc != AEE_SUCCESS) { + TVM_LOGE_HT("buffer mapping failed for remote_map64 fd=0x%x rc=0x%x", + mem_fd, rc); + return nullptr; + } + + void* dsp_addr = reinterpret_cast(dsp_va); + AddAddrMapping(dsp_addr, mem, size); + return dsp_addr; +} + +void HexagonTarget::Free(void* ptr) { + const DspRpcAPI* dsp_api = DspRpcAPI::Global(); + + auto aa = GetAppsAddr(ptr, true); + if (aa.first == nullptr) return; + + int rc = dsp_api->remote_munmap64_ptr()(reinterpret_cast(ptr), + aa.second); + if (rc != AEE_SUCCESS) { + TVM_LOGE_HT("buffer unmapping failed rc=0x%x", rc); + } + RemoveAddrMapping(ptr); + dsp_api->rpcmem_free_ptr()(aa.first); +} + +void HexagonTarget::CopyDeviceToDevice(void* dst, const void* src, + unsigned len) {} + +void HexagonTarget::CopyDeviceToHost(void* host_dst, const void* src, + unsigned len) { + auto aa = GetAppsAddr(src, false); + if (!aa.first) { + TVM_LOGE_HT("copy failed, dsp:%p -> apps:%p, len:%u", src, host_dst, len); + return; + } + if (aa.second < len) { + TVM_LOGE_HT( + "specified length:%u larger than buffer size:%zu, copy truncated", len, + aa.second); + len = aa.second; + } + TVM_LOGD_HT("copy, dsp:%p(apps:%p) -> apps:%p, len:%u", src, aa.first, + host_dst, len); + std::memcpy(host_dst, aa.first, len); +} + +void HexagonTarget::CopyHostToDevice(void* dst, const void* host_src, + unsigned len) { + auto aa = GetAppsAddr(dst, false); + if (!aa.first) { + TVM_LOGE_HT("copy failed, dsp:%p <- apps:%p, len:%u", dst, host_src, len); + return; + } + if (aa.second < len) { + TVM_LOGE_HT( + "specified length:%u larger than buffer size:%zu, copy truncated", len, + aa.second); + len = aa.second; + } + TVM_LOGD_HT("copy, dsp:%p(apps:%p) <- apps:%p, len:%u", dst, aa.first, + host_src, len); + std::memcpy(aa.first, host_src, len); +} + +void* HexagonTarget::Load(const std::string& data, const std::string& fmt) { + crit_section_.lock(); + int rc_oc = OpenDomainChannel(/*use_unsigned_pd*/ true); + crit_section_.unlock(); + if (rc_oc != AEE_SUCCESS) { + TVM_LOGE_HT("loading of %s failed: unable to open domain channel", + data.c_str()); + return nullptr; + } + + if (domain_channel_handle_ == AEE_EUNKNOWN) return nullptr; + ReleaseLibrary(); + + crit_section_.lock(); + TVM_LOGD_HT("loading library %s ", data.c_str()); + const StubAPI* stub_api = StubAPI::Global(); + int rc = stub_api->tvm_hexagon_remote_load_library( + domain_channel_handle_, data.c_str(), data.size() + 1, data.c_str(), + data.size() + 1, &module_pointer_); + if (rc != AEE_SUCCESS) { + TVM_LOGE_HT("failed to load device library rc=0x%x", rc); + } + crit_section_.unlock(); + + if (module_pointer_ != AEE_EUNKNOWN) { + return reinterpret_cast(module_pointer_); + } else { + return nullptr; + } +} + +void HexagonTarget::Unload(void* mod) { + ReleaseLibrary(); + if (module_pointer_ != AEE_EUNKNOWN) return; + + crit_section_.lock(); + count_channel_open_--; + if (count_channel_open_ == 0) CloseDomainChannel(); + crit_section_.unlock(); +} + +void* HexagonTarget::Resolve(const std::string& sym) { + const StubAPI* stub_api = StubAPI::Global(); + + tvm_hexagon_remote_handle_t pf; + TVM_LOGD_HT("resolving symbol %s", sym.c_str()); + int rc = stub_api->tvm_hexagon_remote_get_symbol( + domain_channel_handle_, module_pointer_, sym.c_str(), sym.size() + 1, + &pf); + if (rc != AEE_SUCCESS) { + TVM_LOGE_HT("failed to get symbol from CDSP rc=0x%x", rc); + return nullptr; + } + void* addr = reinterpret_cast(pf); + TVM_LOGD_HT("resolved %s -> %p", sym.c_str(), addr); + return addr; +} + +void HexagonTarget::Call(void* func, uint32_t* scalar, unsigned sc_num, + uint32_t* stack, unsigned st_num) { + // XXX: Workaround to fix FastRPC reporting BADPARAM + unsigned scalar_num = std::max(sc_num, 1u); + unsigned stack_num = std::max(st_num, 1u); + + uint64 pcycles = 0, execution_time_usec = 0; + auto scalar_octet = std::unique_ptr( + new tvm_hexagon_remote_buffer[scalar_num]); + auto stack_octet = std::unique_ptr( + new tvm_hexagon_remote_buffer[stack_num]); + TVM_LOGD_HT("scalars=%p, stack=%p", scalar, stack); + + if (scalar_octet == nullptr || stack_octet == nullptr) { + TVM_LOGE_HT("mem alloc failed for scalar/stack octets"); + return; + } + std::memset(scalar_octet.get(), 0, + scalar_num * sizeof(tvm_hexagon_remote_buffer)); + std::memset(stack_octet.get(), 0, + stack_num * sizeof(tvm_hexagon_remote_buffer)); + + auto ProcessInputs = [this](uint32_t* inputs, + tvm_hexagon_remote_buffer* buffers, + unsigned num) { + for (unsigned i = 0; i != num; ++i) { + void* ptr = reinterpret_cast(static_cast(inputs[i])); + auto aa = GetAppsAddr(ptr, false); + if (aa.first) { + buffers[i].data = static_cast(aa.first); + buffers[i].dataLen = aa.second; + } + } + }; + + ProcessInputs(scalar, scalar_octet.get(), sc_num); + ProcessInputs(stack, stack_octet.get(), st_num); + + auto ToString = [](const char* title, uint32_t* data, unsigned num) { + std::ostringstream log; + log << " " << title << ':' << num << " {" << std::hex; + for (unsigned i = 0; i != num; ++i) log << ' ' << data[i]; + log << " }"; + return log.str(); + }; + + TVM_LOGD_HT("%s", ToString("scalars", scalar, sc_num).c_str()); + TVM_LOGD_HT("%s", ToString(" stack", stack, st_num).c_str()); + + const StubAPI* stub_api = StubAPI::Global(); + int rc = stub_api->tvm_hexagon_remote_kernel( + domain_channel_handle_, module_pointer_, + static_cast( + reinterpret_cast(func)), + reinterpret_cast(scalar), sc_num, reinterpret_cast(stack), + st_num, scalar_octet.get(), scalar_num, scalar_octet.get(), scalar_num, + stack_octet.get(), stack_num, stack_octet.get(), stack_num, &pcycles, + &execution_time_usec); + + if (rc != AEE_SUCCESS) { + TVM_LOGE_HT("failed to run kernel on CDSP rc=0x%x", rc); + } else { + TVM_LOGD_HT("kernel execution: %llu pcycles, %llu usec, sc_num=%d", + pcycles, execution_time_usec, sc_num); + } +} + +} // namespace hexagon +} // namespace runtime +} // namespace tvm + +#endif // #ifdef __ANDROID__ diff --git a/src/runtime/hexagon/device/hexagon_dsprpcapi.cc b/src/runtime/hexagon/device/hexagon_dsprpcapi.cc new file mode 100644 index 000000000000..b633ebc19661 --- /dev/null +++ b/src/runtime/hexagon/device/hexagon_dsprpcapi.cc @@ -0,0 +1,113 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * This Contribution is being provided by Qualcomm Technologies, Inc., + * a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., + * a California corporation, under certain additional terms and conditions + * pursuant to Section 5 of the Apache 2.0 license. In this regard, with + * respect to this Contribution, the term "Work" in Section 1 of the + * Apache 2.0 license means only the specific subdirectory within the TVM repo + * (currently at https://github.com/dmlc/tvm) to which this Contribution is + * made. + * In any case, this submission is "Not a Contribution" with respect to its + * permitted use with any of the "vta" and "verilog" subdirectories in the TVM + * repo. + * Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain + * copyright of their respective Contributions. + */ +#ifdef __ANDROID__ +#include "hexagon_dsprpcapi.h" + +#include +#include +#include + +#include "hexagon_device_log.h" + +namespace tvm { +namespace runtime { + +namespace hexagon { + +DspRpcAPI::DspRpcAPI() { + CHECK(lib_handle_ = dlopen(rpc_lib_name_, RTLD_LAZY | RTLD_LOCAL)); + +#define RESOLVE(n) n##_ = GetSymbol(#n) + RESOLVE(remote_handle_close); + RESOLVE(remote_handle_control); + RESOLVE(remote_handle_invoke); + RESOLVE(remote_handle_open); + RESOLVE(remote_mmap); + RESOLVE(remote_munmap); + + RESOLVE(remote_handle64_close); + RESOLVE(remote_handle64_control); + RESOLVE(remote_handle64_invoke); + RESOLVE(remote_handle64_open); + RESOLVE(remote_mmap64); + RESOLVE(remote_munmap64); + + RESOLVE(remote_register_buf); + RESOLVE(remote_register_buf_attr); + RESOLVE(remote_register_dma_handle); + RESOLVE(remote_register_dma_handle_attr); + RESOLVE(remote_register_fd); + + RESOLVE(remote_session_control); + RESOLVE(remote_set_mode); + + RESOLVE(rpcmem_init); + RESOLVE(rpcmem_deinit); + RESOLVE(rpcmem_alloc); + RESOLVE(rpcmem_free); + RESOLVE(rpcmem_to_fd); +#undef RESOLVE +} + +DspRpcAPI::~DspRpcAPI() { + if (lib_handle_) dlclose(lib_handle_); +} + +template +T DspRpcAPI::GetSymbol(const char* sym) { + if (!lib_handle_) { + TVM_LOGE("error looking up symbol \"%s\": library not loaded", sym); + return nullptr; + } + dlerror(); // Clear any previous errror conditions. + if (T ret = reinterpret_cast(dlsym(lib_handle_, sym))) return ret; + + const char* err = dlerror(); + const char* err_txt = err ? err : "symbol not found"; + TVM_LOGD("error looking up symbol \"%s\": %s", sym, err_txt); + return nullptr; +} + +const DspRpcAPI* DspRpcAPI::Global() { + static const DspRpcAPI dsp_api; + return &dsp_api; +} + +} // namespace hexagon + +} // namespace runtime +} // namespace tvm + +#endif // __ANDROID__ diff --git a/src/runtime/hexagon/device/hexagon_dsprpcapi.h b/src/runtime/hexagon/device/hexagon_dsprpcapi.h new file mode 100644 index 000000000000..11f3c03e8fec --- /dev/null +++ b/src/runtime/hexagon/device/hexagon_dsprpcapi.h @@ -0,0 +1,206 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * This Contribution is being provided by Qualcomm Technologies, Inc., + * a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., + * a California corporation, under certain additional terms and conditions + * pursuant to Section 5 of the Apache 2.0 license. In this regard, with + * respect to this Contribution, the term "Work" in Section 1 of the + * Apache 2.0 license means only the specific subdirectory within the TVM repo + * (currently at https://github.com/dmlc/tvm) to which this Contribution is + * made. + * In any case, this submission is "Not a Contribution" with respect to its + * permitted use with any of the "vta" and "verilog" subdirectories in the TVM + * repo. + * Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain + * copyright of their respective Contributions. + */ +#ifndef TVM_RUNTIME_HEXAGON_DEVICE_HEXAGON_DSPRPCAPI_H_ +#define TVM_RUNTIME_HEXAGON_DEVICE_HEXAGON_DSPRPCAPI_H_ + +#ifdef __ANDROID__ +#include +#include +#include +#include +#include + +namespace tvm { +namespace runtime { + +namespace hexagon { + +/*! + * Encapsulation of the API of lib(a|c)dsprpc.so (loaded via dlopen), allowing + * for having versions of the library that do not implement all of the + * functions. + * + * Functions defined in the DSP RPC library: + * remote_handle_close + * remote_handle_control + * remote_handle_invoke + * remote_handle_open + * remote_mmap + * remote_munmap + * + * remote_handle64_close + * remote_handle64_control + * remote_handle64_invoke + * remote_handle64_open + * remote_mmap64 + * remote_munmap64 + * + * remote_register_buf + * remote_register_buf_attr + * remote_register_dma_handle + * remote_register_dma_handle_attr + * remote_register_fd + * + * remote_session_control + * remote_set_mode + * + * rpcmem_init + * rpcmem_deinit + * rpcmem_alloc + * rpcmem_free + * rpcmem_to_fd + */ +class DspRpcAPI { + public: + DspRpcAPI(); + ~DspRpcAPI(); + + using remote_handle = ::remote_handle; + using remote_handle64 = ::remote_handle64; + +#define DECLTYPE(ty) using ty##_t = decltype(::ty); + DECLTYPE(remote_handle_close) + DECLTYPE(remote_handle_control) + DECLTYPE(remote_handle_invoke) + DECLTYPE(remote_handle_open) + DECLTYPE(remote_mmap) + DECLTYPE(remote_munmap) + + DECLTYPE(remote_handle64_close) + DECLTYPE(remote_handle64_control) + DECLTYPE(remote_handle64_invoke) + DECLTYPE(remote_handle64_open) + DECLTYPE(remote_mmap64) + DECLTYPE(remote_munmap64) + + DECLTYPE(remote_register_buf) + DECLTYPE(remote_register_buf_attr) + DECLTYPE(remote_register_dma_handle) + DECLTYPE(remote_register_dma_handle_attr) + DECLTYPE(remote_register_fd) + + DECLTYPE(remote_session_control) + DECLTYPE(remote_set_mode) + + DECLTYPE(rpcmem_init) + DECLTYPE(rpcmem_deinit) + DECLTYPE(rpcmem_alloc) + DECLTYPE(rpcmem_free) + DECLTYPE(rpcmem_to_fd) +#undef DECLTYPE + +#define DECLFUNC(fn) \ + fn##_t* fn##_ptr(bool allow_nullptr = false) const { \ + if (!allow_nullptr) CHECK(fn##_ != nullptr); \ + return fn##_; \ + } + DECLFUNC(remote_handle_close) + DECLFUNC(remote_handle_control) + DECLFUNC(remote_handle_invoke) + DECLFUNC(remote_handle_open) + DECLFUNC(remote_mmap) + DECLFUNC(remote_munmap) + + DECLFUNC(remote_handle64_close) + DECLFUNC(remote_handle64_control) + DECLFUNC(remote_handle64_invoke) + DECLFUNC(remote_handle64_open) + DECLFUNC(remote_mmap64) + DECLFUNC(remote_munmap64) + + DECLFUNC(remote_register_buf) + DECLFUNC(remote_register_buf_attr) + DECLFUNC(remote_register_dma_handle) + DECLFUNC(remote_register_dma_handle_attr) + DECLFUNC(remote_register_fd) + + DECLFUNC(remote_session_control) + DECLFUNC(remote_set_mode) + + DECLFUNC(rpcmem_init) + DECLFUNC(rpcmem_deinit) + DECLFUNC(rpcmem_alloc) + DECLFUNC(rpcmem_free) + DECLFUNC(rpcmem_to_fd) +#undef DECLFUNC + + static const DspRpcAPI* Global(); + + private: + static constexpr const char* rpc_lib_name_ = "libadsprpc.so"; + void* lib_handle_ = nullptr; + +#define DECLPTR(p) p##_t* p##_ = nullptr; + DECLPTR(remote_handle_close) + DECLPTR(remote_handle_control) + DECLPTR(remote_handle_invoke) + DECLPTR(remote_handle_open) + DECLPTR(remote_mmap) + DECLPTR(remote_munmap) + + DECLPTR(remote_handle64_close) + DECLPTR(remote_handle64_control) + DECLPTR(remote_handle64_invoke) + DECLPTR(remote_handle64_open) + DECLPTR(remote_mmap64) + DECLPTR(remote_munmap64) + + DECLPTR(remote_register_buf) + DECLPTR(remote_register_buf_attr) + DECLPTR(remote_register_dma_handle) + DECLPTR(remote_register_dma_handle_attr) + DECLPTR(remote_register_fd) + + DECLPTR(remote_session_control) + DECLPTR(remote_set_mode) + + DECLPTR(rpcmem_init) + DECLPTR(rpcmem_deinit) + DECLPTR(rpcmem_alloc) + DECLPTR(rpcmem_free) + DECLPTR(rpcmem_to_fd) +#undef DECLPTR + + template + T GetSymbol(const char* sym); +}; + +} // namespace hexagon + +} // namespace runtime +} // namespace tvm + +#endif // __ANDROID__ +#endif // TVM_RUNTIME_HEXAGON_DEVICE_HEXAGON_DSPRPCAPI_H_ diff --git a/src/runtime/hexagon/device/hexagon_stubapi.cc b/src/runtime/hexagon/device/hexagon_stubapi.cc new file mode 100644 index 000000000000..166680c4f1aa --- /dev/null +++ b/src/runtime/hexagon/device/hexagon_stubapi.cc @@ -0,0 +1,112 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * This Contribution is being provided by Qualcomm Technologies, Inc., + * a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., + * a California corporation, under certain additional terms and conditions + * pursuant to Section 5 of the Apache 2.0 license. In this regard, with + * respect to this Contribution, the term "Work" in Section 1 of the + * Apache 2.0 license means only the specific subdirectory within the TVM repo + * (currently at https://github.com/dmlc/tvm) to which this Contribution is + * made. + * In any case, this submission is "Not a Contribution" with respect to its + * permitted use with any of the "vta" and "verilog" subdirectories in the TVM + * repo. + * Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain + * copyright of their respective Contributions. + */ +#ifdef __ANDROID__ +#include "hexagon_stubapi.h" + +#include +#include +#include +#include + +#include "hexagon_device_log.h" + +namespace tvm { +namespace runtime { +namespace hexagon { + +StubAPI::StubAPI() { + struct stat sb; + if (!stat("/dev/subsys_cdsp", &sb)) { + enable_domains_ = true; + TVM_LOGD("CDSP subsystem present"); + } else if (!stat("/dev/subsys_adsp", &sb)) { + enable_domains_ = false; + TVM_LOGD("ADSP subsystem present"); + } + + constexpr auto domain_lib_name = "libtvm_hexagon_remote_stub.so"; + constexpr auto nondomain_lib_name = "libtvm_hexagon_remote_nd_stub.so"; + + const char* lib_name = + enable_domains_ ? domain_lib_name : nondomain_lib_name; + CHECK(lib_handle_ = dlopen(lib_name, RTLD_LAZY | RTLD_LOCAL)); + +#define RESOLVE(fn) p##fn##_ = GetSymbol(#fn) + if (enable_domains_) { + RESOLVE(tvm_hexagon_remote_load_library); + RESOLVE(tvm_hexagon_remote_release_library); + RESOLVE(tvm_hexagon_remote_get_symbol); + RESOLVE(tvm_hexagon_remote_kernel); + RESOLVE(tvm_hexagon_remote_open); + RESOLVE(tvm_hexagon_remote_close); + } else { + RESOLVE(tvm_hexagon_remote_nd_load_library); + RESOLVE(tvm_hexagon_remote_nd_release_library); + RESOLVE(tvm_hexagon_remote_nd_get_symbol); + RESOLVE(tvm_hexagon_remote_nd_kernel); + RESOLVE(tvm_hexagon_remote_nd_open); + } +#undef RESOLVE +} + +StubAPI::~StubAPI() { + if (lib_handle_) dlclose(lib_handle_); +} + +template +T StubAPI::GetSymbol(const char* sym) { + if (!lib_handle_) { + TVM_LOGE("error looking up symbol \"%s\": library not loaded", sym); + return nullptr; + } + dlerror(); // Clear any previous errror conditions. + if (T ret = reinterpret_cast(dlsym(lib_handle_, sym))) return ret; + + const char* err = dlerror(); + const char* err_txt = err ? err : "symbol not found"; + TVM_LOGE("error looking up symbol \"%s\": %s", sym, err_txt); + return nullptr; +} + +const StubAPI* StubAPI::Global() { + static const StubAPI stub_api; + return &stub_api; +} + +} // namespace hexagon +} // namespace runtime +} // namespace tvm + +#endif // __ANDROID__ diff --git a/src/runtime/hexagon/device/hexagon_stubapi.h b/src/runtime/hexagon/device/hexagon_stubapi.h new file mode 100644 index 000000000000..f0867aced9c5 --- /dev/null +++ b/src/runtime/hexagon/device/hexagon_stubapi.h @@ -0,0 +1,278 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * This Contribution is being provided by Qualcomm Technologies, Inc., + * a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., + * a California corporation, under certain additional terms and conditions + * pursuant to Section 5 of the Apache 2.0 license. In this regard, with + * respect to this Contribution, the term "Work" in Section 1 of the + * Apache 2.0 license means only the specific subdirectory within the TVM repo + * (currently at https://github.com/dmlc/tvm) to which this Contribution is + * made. + * In any case, this submission is "Not a Contribution" with respect to its + * permitted use with any of the "vta" and "verilog" subdirectories in the TVM + * repo. + * Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain + * copyright of their respective Contributions. + */ +#ifndef TVM_RUNTIME_HEXAGON_DEVICE_HEXAGON_STUBAPI_H_ +#define TVM_RUNTIME_HEXAGON_DEVICE_HEXAGON_STUBAPI_H_ + +#ifdef __ANDROID__ +#include +#include +#include + +#include + +#include "fastrpc/tvm_hexagon_remote.h" +#include "fastrpc/tvm_hexagon_remote_nd.h" + +namespace tvm { +namespace runtime { + +namespace hexagon { + +/*! + * Unify the handling of domain and non-domain functions. + * + * In most cases, for a function "foo", the domain version will be called + * "tvm_hexagon_remote_foo", and the non-domain version will have "nd_foo". + * The interfaces will be the same, except: + * - the domain version will take "remote_handle64" as the first parameter, + * while the non-domain version will not: + * int tvm_hexagon_remote_foo (remote_handle64 h, param1, param2, ...); + * int tvm_hexagon_remote_nd_foo (param1, param2, ...); + * - any parameter of type "buffer" in the IDL, will be converted into a + * type "tvm_hexagon_remote_buffer" for domain functions, and into + * "tvm_hexagon_remote_nd_buffer" for non-domain functions. These two + * types are identical, but since they are declared in two different IDLs, + * they get different names. + * + * For any function, only a pointer to the "buffer" type is passed, but + * since the pointee types are different, this is enough to create a + * difference in the function signatures even if the "remote_handle64" + * parameter is ignored. For this reason, in all function types, the + * types "tvm_hexagon_remote_buffer *" and "tvm_hexagon_remote_nd_buffer *", + * both const and non-const, are replaced with "void *", with the + * corresponding const-qualification. This is done by the templates + * "replace_pointee_type" and "map_tuple_element" below. + * + * The following functions are subject to the uniform handling: + * + * tvm_hexagon_remote_load_library (remote_handle64 h, p1, p2, ...) + * tvm_hexagon_remote_release_library + * tvm_hexagon_remote_get_symbol + * tvm_hexagon_remote_kernel + * tvm_hexagon_remote_close + * + * tvm_hexagon_remote_nd_load_library (p1, p2, ...) + * tvm_hexagon_remote_nd_release_library + * tvm_hexagon_remote_nd_get_symbol + * tvm_hexagon_remote_nd_kernel + * tvm_hexagon_remote_nd_close + * + * The "open" functions differ in their parameters in different ways, and + * need to be handled individually. + * + * tvm_hexagon_remote_open + * tvm_hexagon_remote_nd_open + */ + +namespace { +/*! + * replace_pointee_type + * + * If T is a pointer to a potentially const-qualified M, then replace + * M in T with V. Otherwise, leave T unchanged. + */ +template +struct replace_pointee_type { + using type = T; +}; + +template +struct replace_pointee_type { + using type = V*; +}; + +template +struct replace_pointee_type { + using type = const V*; +}; + +/*! + * map_tuple_elements> + * + * From given tuple , form another tuple where for each A in As, + * if A contains a pointer to M, the pointer is replaced with a pointer + * to V, leaving other types unchanged. + */ +template +struct map_tuple_elements; + +template +struct map_tuple_elements> { + using type = std::tuple::type...>; +}; + +/*! + * map_func_type + * + * Given function type F = R(As...), form another function type by replacing + * each pointer to M with a pointer to V. + */ +template +struct map_func_type { + template + struct func_to_tuple; + template + struct func_to_tuple { + using args = std::tuple; + using ret = R; + }; + + template + struct tuple_to_func; + template + struct tuple_to_func> { + using func = R(As...); + }; + + using arg_tuple = typename func_to_tuple::args; + using ret_type = typename func_to_tuple::ret; + using mapped_args = typename map_tuple_elements::type; + using type = typename tuple_to_func::func; +}; +} // namespace + +class StubAPI { + public: + StubAPI(); + ~StubAPI(); + + private: + // Create types for each remote function. For functions that take + // a pointer to tvm_hexagon_remote_buffer or tvm_hexagon_remote_nd_buffer, + // replace that pointer with pointer to void to make pointers to these + // two types identical in the function types created below. + // For example, int foo(tvm_hexagon_remote_buffer*) and + // int bar(tvm_hexagon_remote_nd_buffer*) should both have the same type. +#define MAPTYPE(fn, ty) \ + using fn##_t = typename map_func_type::type; + MAPTYPE(tvm_hexagon_remote_load_library, tvm_hexagon_remote_buffer) + MAPTYPE(tvm_hexagon_remote_release_library, tvm_hexagon_remote_buffer) + MAPTYPE(tvm_hexagon_remote_get_symbol, tvm_hexagon_remote_buffer) + MAPTYPE(tvm_hexagon_remote_kernel, tvm_hexagon_remote_buffer) + MAPTYPE(tvm_hexagon_remote_close, tvm_hexagon_remote_buffer) + + MAPTYPE(tvm_hexagon_remote_nd_load_library, tvm_hexagon_remote_nd_buffer) + MAPTYPE(tvm_hexagon_remote_nd_release_library, tvm_hexagon_remote_nd_buffer) + MAPTYPE(tvm_hexagon_remote_nd_get_symbol, tvm_hexagon_remote_nd_buffer) + MAPTYPE(tvm_hexagon_remote_nd_kernel, tvm_hexagon_remote_nd_buffer) + MAPTYPE(tvm_hexagon_remote_nd_close, tvm_hexagon_remote_buffer) +#undef MAPTYPE + + // For remote functions whose prototypes differ significantly between + // the domain and non-domain versions, create the types directly. +#define DECLTYPE(fn) using fn##_t = decltype(::fn); + DECLTYPE(tvm_hexagon_remote_open) + DECLTYPE(tvm_hexagon_remote_nd_open) +#undef DECLTYPE + + public: + template + int invoke(Fd func_d, Fnd func_nd, remote_handle64 handle, + Ts... args) const { + if (enable_domains_) { + return func_d(handle, args...); + } + return func_nd(args...); + } + +#define CONCAT_STR_FOR_REAL(a, b) a##b +#define CONCAT_STR(a, b) CONCAT_STR_FOR_REAL(a, b) + +#define FUNC(name) CONCAT_STR(tvm_hexagon_remote_, name) +#define FUNC_D(name) CONCAT_STR(tvm_hexagon_remote_, name) +#define FUNC_ND(name) CONCAT_STR(tvm_hexagon_remote_nd_, name) +#define PTRNAME(fn) CONCAT_STR(p, CONCAT_STR(fn, _)) + +#define DECLFUNC(name) \ + template \ + int FUNC(name)(remote_handle64 handle, Ts... args) const { \ + return invoke(PTRNAME(FUNC_D(name)), PTRNAME(FUNC_ND(name)), handle, \ + args...); \ + } + + DECLFUNC(load_library) + DECLFUNC(release_library) + DECLFUNC(get_symbol) + DECLFUNC(kernel) + DECLFUNC(close) +#undef DECLFUNC + + int tvm_hexagon_remote_open(const char* uri, remote_handle64* handle) const { + if (enable_domains_) { + return PTRNAME(tvm_hexagon_remote_open)(uri, handle); + } + return PTRNAME(tvm_hexagon_remote_nd_open)(); + } + + static const StubAPI* Global(); + + private: + bool enable_domains_ = true; + void* lib_handle_ = nullptr; + +#define DECLPTR(fn) fn##_t* PTRNAME(fn) = nullptr + DECLPTR(tvm_hexagon_remote_load_library); + DECLPTR(tvm_hexagon_remote_release_library); + DECLPTR(tvm_hexagon_remote_get_symbol); + DECLPTR(tvm_hexagon_remote_kernel); + DECLPTR(tvm_hexagon_remote_open); + DECLPTR(tvm_hexagon_remote_close); + + DECLPTR(tvm_hexagon_remote_nd_load_library); + DECLPTR(tvm_hexagon_remote_nd_release_library); + DECLPTR(tvm_hexagon_remote_nd_get_symbol); + DECLPTR(tvm_hexagon_remote_nd_kernel); + DECLPTR(tvm_hexagon_remote_nd_open); + DECLPTR(tvm_hexagon_remote_nd_close); +#undef DECLPTR + +#undef PTRNAME +#undef FUNC_ND +#undef FUNC_D +#undef FUNC +#undef CONCAT_STR +#undef CONCAT_STR_FOR_REAL + + template + T GetSymbol(const char* sym); +}; + +} // namespace hexagon + +} // namespace runtime +} // namespace tvm + +#endif // __ANDROID__ +#endif // TVM_RUNTIME_HEXAGON_DEVICE_HEXAGON_STUBAPI_H_ diff --git a/src/runtime/hexagon/hexagon_device_api.cc b/src/runtime/hexagon/hexagon_device_api.cc new file mode 100644 index 000000000000..77e1db72b535 --- /dev/null +++ b/src/runtime/hexagon/hexagon_device_api.cc @@ -0,0 +1,173 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * This Contribution is being provided by Qualcomm Technologies, Inc., + * a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., + * a California corporation, under certain additional terms and conditions + * pursuant to Section 5 of the Apache 2.0 license. In this regard, with + * respect to this Contribution, the term "Work" in Section 1 of the + * Apache 2.0 license means only the specific subdirectory within the TVM repo + * (currently at https://github.com/dmlc/tvm) to which this Contribution is + * made. + * In any case, this submission is "Not a Contribution" with respect to its + * permitted use with any of the "vta" and "verilog" subdirectories in the TVM + * repo. + * Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain + * copyright of their respective Contributions. + */ +#include +#include +#include + +#include + +#include "hexagon_module.h" + +#ifdef __ANDROID__ +#include +#endif + +namespace tvm { +namespace runtime { + +class HexagonDeviceAPI : public DeviceAPI { + public: + void SetDevice(TVMContext ctx) final; + void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final; + void* AllocDataSpace(TVMContext ctx, size_t nbytes, size_t alignment, + TVMType type_hint) final; + void FreeDataSpace(TVMContext ctx, void* ptr) final; + void CopyDataFromTo(const void* from, size_t from_offset, void* to, + size_t to_offset, size_t num_bytes, TVMContext ctx_from, + TVMContext ctx_to, TVMType type_hint, + TVMStreamHandle stream) final; + void StreamSync(TVMContext ctx, TVMStreamHandle stream) final; + + static const std::shared_ptr& Global() { + static std::shared_ptr inst = + std::make_shared(); + return inst; + } +}; + +// Debugging helpers. + +std::string DeviceAttrName(DeviceAttrKind kind) { + switch (kind) { + case kExist: + return "kExist"; + case kMaxThreadsPerBlock: + return "kMaxThreadsPerBlock"; + case kWarpSize: + return "kWarpSize"; + case kMaxSharedMemoryPerBlock: + return "kMaxSharedMemoryPerBlock"; + case kComputeVersion: + return "kComputeVersion"; + case kDeviceName: + return "kDeviceName"; + case kMaxClockRate: + return "kMaxClockRate"; + case kMultiProcessorCount: + return "kMultiProcessorCount"; + case kMaxThreadDimensions: + return "kMaxThreadDimensions"; + default: + break; + } + + std::stringstream ss; + ss << "Alloc(nbytes, alignment); +} + +inline void HexagonDeviceAPI::FreeDataSpace(TVMContext ctx, void* ptr) { + CHECK(hexagon::Device::ValidateDeviceId(ctx.device_id)); + hexagon::Device::Global()->Free(ptr); +} + +inline void HexagonDeviceAPI::CopyDataFromTo( + const void* from, size_t from_offset, void* to, size_t to_offset, + size_t num_bytes, TVMContext ctx_from, TVMContext ctx_to, + TVMType type_hint, TVMStreamHandle stream) { + const char* src = static_cast(from) + from_offset; + char* dst = static_cast(to) + to_offset; + + auto Is32bit = [](const void* p) { + return p == reinterpret_cast(uint32_t(uintptr_t(p))); + }; + (void)Is32bit; + + if (ctx_from.device_type == ctx_to.device_type) { + if (ctx_from.device_type == kDLCPU) { + memmove(dst, src, num_bytes); + } else if (static_cast(ctx_from.device_type) == kDLHexagon) { + CHECK(hexagon::Device::ValidateDeviceId(ctx_from.device_id)); + CHECK_EQ(ctx_from.device_id, ctx_to.device_id); + CHECK(Is32bit(dst) && Is32bit(src)); + hexagon::Device::Global()->CopyDeviceToDevice(dst, src, num_bytes); + } + } else { + if (ctx_from.device_type == kDLCPU) { + CHECK_EQ(static_cast(ctx_to.device_type), kDLHexagon); + CHECK(Is32bit(dst)); + CHECK(hexagon::Device::ValidateDeviceId(ctx_to.device_id)); + hexagon::Device::Global()->CopyHostToDevice(dst, src, num_bytes); + } else { + CHECK_EQ(static_cast(ctx_from.device_type), kDLHexagon); + CHECK_EQ(ctx_to.device_type, kDLCPU); + CHECK(Is32bit(src)); + CHECK(hexagon::Device::ValidateDeviceId(ctx_from.device_id)); + hexagon::Device::Global()->CopyDeviceToHost(dst, src, num_bytes); + } + } +} + +inline void HexagonDeviceAPI::StreamSync(TVMContext ctx, + TVMStreamHandle stream) {} + +TVM_REGISTER_GLOBAL("device_api.hexagon") + .set_body([](TVMArgs args, TVMRetValue* rv) { + DeviceAPI* ptr = HexagonDeviceAPI::Global().get(); + *rv = ptr; + }); +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/hexagon/hexagon_module.cc b/src/runtime/hexagon/hexagon_module.cc new file mode 100644 index 000000000000..266b96202463 --- /dev/null +++ b/src/runtime/hexagon/hexagon_module.cc @@ -0,0 +1,343 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * This Contribution is being provided by Qualcomm Technologies, Inc., + * a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., + * a California corporation, under certain additional terms and conditions + * pursuant to Section 5 of the Apache 2.0 license. In this regard, with + * respect to this Contribution, the term "Work" in Section 1 of the + * Apache 2.0 license means only the specific subdirectory within the TVM repo + * (currently at https://github.com/dmlc/tvm) to which this Contribution is + * made. + * In any case, this submission is "Not a Contribution" with respect to its + * permitted use with any of the "vta" and "verilog" subdirectories in the TVM + * repo. + * Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain + * copyright of their respective Contributions. + */ +#include "hexagon_module.h" + +#ifdef __ANDROID__ +#include +#endif +#include +#include + +#include +#include +#include +#include + +#include "../file_util.h" +#include "../meta_data.h" + +namespace tvm { +namespace runtime { + +hexagon::Device::~Device() {} + +namespace hexagon { + +/*! + * \brief Function argument locations according to the Hexagon ABI. + * + * In order to invoke a function whose arguments are in TVMArgs list, at + * some point before branching to the function's address, these arguments + * need to be loaded into locations (registers or stack) specified by the + * corresponding ABI. + * When a host wants to call a function on Hexagon, the host will identify + * how each element of the TVMArgs list will be passed to the Hexagon + * function. This class is a description of which values should go into + * registers, and which values should be on stack. Right before the call + * this class will be serialized and transfereed over to the Hexagon side. + * The code running on Hexagon will then execute the argument placement + * and invoke the function. + */ +struct ArgLayout { + std::vector Scalar; /*!< Values going into registers, maximum */ + /*!< 6, including dummy values for skipped */ + /*!< registers. */ + std::vector Stack; /*!< Values going on stack, including */ + /*!< dummy values for padding. */ + // There are no vector types at this time. + + /*! + * \brief Alignment of type T on Hexagon. + */ + template + static constexpr unsigned align_of(); + /*! + * \brief Size of type T on Hexagon. + */ + template + static constexpr unsigned size_of(); + + /*! + * \brief Add a value of type T to the layout. + */ + template + void Push(const T& v); + + private: + /*! + * \brief Add raw data to the layout. + * \param v Pointer to the raw data as an array of 32-bit words. + * \param t_size Number of bytes to add. + * \param t_align Required alignment of the data on Hexagon. + */ + void Push(uint32_t* v, unsigned t_size, unsigned t_align); +}; + +template <> +constexpr unsigned ArgLayout::align_of() { + return 4; +} +template <> +constexpr unsigned ArgLayout::align_of() { + return 4; +} +template <> +constexpr unsigned ArgLayout::align_of() { + return 4; +} +template <> +constexpr unsigned ArgLayout::align_of() { + return 4; +} +template <> +constexpr unsigned ArgLayout::align_of() { + return 8; +} +template <> +constexpr unsigned ArgLayout::align_of() { + return 8; +} +template <> +constexpr unsigned ArgLayout::align_of() { + return 8; +} + +template +constexpr unsigned ArgLayout::align_of() { + // The static_assertion should depend on T so that it's only checked + // after instantiation. + static_assert((sizeof(T), false), "Implement align_of for this type"); + return 0; +} + +template +constexpr unsigned ArgLayout::size_of() { + return ArgLayout::align_of(); +} + +template +void ArgLayout::Push(const T& v) { + static_assert(std::is_scalar::value, "T must be a scalar"); + constexpr unsigned T_size = size_of(); + // The reason for this assertion is to avoid sign-extensions here: + // an extra bit of information would be required to determine whether + // a size- or a zero-extension is needed. + static_assert(T_size >= 4, "Type should be of size that is at least 4"); + union { + uint32_t v[(T_size + 3) / 4]; + T t; + } u; + + u.t = v; + Push(u.v, T_size, align_of()); +} + +void ArgLayout::Push(uint32_t* v, unsigned t_size, unsigned t_align) { + // t_size == 4 and t_size == 8 can be passed in scalar registers. + bool InReg = false; + if (t_size == 4) { + if (Scalar.size() < 6) { + Scalar.push_back(v[0]); + InReg = true; + } + } else if (t_size == 8) { + // Round the size up to the next + unsigned cs = Scalar.size(); + if (cs <= 4) { + // There is room in the scalar registers. + if (cs & 1) Scalar.push_back(0u); + Scalar.push_back(v[0]); + Scalar.push_back(v[1]); + InReg = true; + } + } + + if (!InReg) { + // Allocate on stack. + CHECK_EQ((t_align & (t_align - 1)), 0) + << "Alignment should be a power of 2"; + CHECK_GE(t_align, 4) << "Alignment should be at least 4"; + // Round t_size up to a multiple of 4. + unsigned s_size = Stack.size(); + unsigned s_align = t_align / 4; // Alignment of T in words on the stack. + unsigned pad = ((s_size + s_align - 1) / s_align) * s_align - s_size; + Stack.insert(Stack.end(), pad / 4, 0u); + Stack.insert(Stack.end(), v, v + t_size / 4); + } +} + +} // namespace hexagon + +class HexagonModuleNode final : public runtime::ModuleNode { + public: + HexagonModuleNode(std::string data, std::string fmt, + std::unordered_map fmap) + : data_(data), fmt_(fmt), fmap_(fmap) { + dl_handle_ = hexagon::Device::Global()->Load(data, fmt); + } + ~HexagonModuleNode() { + if (dl_handle_) { + hexagon::Device::Global()->Unload(dl_handle_); + } + } + + PackedFunc GetFunction( + const std::string& name, + const std::shared_ptr& sptr_to_self) final; + const char* type_key() const final; + void SaveToFile(const std::string& file_name, + const std::string& format) final { + std::string meta_file = GetMetaFilePath(file_name); + SaveMetaDataToFile(meta_file, fmap_); + std::string c = "cp " + data_ + " " + file_name; + CHECK(std::system(c.c_str()) == 0) << "Cannot create " + file_name; + } + + private: + hexagon::ArgLayout BuildArgLayout(const TVMArgs& Aa) const; + + void* dl_handle_ = nullptr; + std::string data_; + std::string fmt_; + std::unordered_map fmap_; +}; + +const char* HexagonModuleNode::type_key() const { return "hexagon"; } + +PackedFunc HexagonModuleNode::GetFunction( + const std::string& name, const std::shared_ptr& sptr_to_self) { + auto f = fmap_.find(name); + if (f == fmap_.end()) return PackedFunc(nullptr); + + // Get function pointer from device. + void* pf = hexagon::Device::Global()->Resolve(name); + + auto func = [pf, sptr_to_self](TVMArgs args, TVMRetValue* rv) { + auto m = std::static_pointer_cast(sptr_to_self); + hexagon::ArgLayout As = m->BuildArgLayout(args); + hexagon::Device* HD = hexagon::Device::Global(); + HD->Call(pf, As.Scalar.data(), As.Scalar.size(), As.Stack.data(), + As.Stack.size()); + }; + return PackedFunc(func); +} + +hexagon::ArgLayout HexagonModuleNode::BuildArgLayout(const TVMArgs& As) const { + hexagon::ArgLayout Args; + + for (unsigned i = 0, e = As.size(); i != e; ++i) { + const TVMArgValue& A = As[i]; + unsigned TC = A.type_code(); + switch (TC) { + // Treat all integers as 32-bit values. + case kDLInt: + case kDLUInt: + // KLUDGE: There is no distinction between 32- and 64-bit integer + // types, so there is no way to tell if the value being passed needs + // one or two registers. Assume that all integers are 32-bit, and + // simply abort if the actual value does not fit. + CHECK_EQ(static_cast(A), static_cast(A)); + Args.Push(static_cast(A)); + break; + // 64-bit values + case kDLFloat: + Args.Push(static_cast(A)); + break; + + case kHandle: + case kNull: + case kArrayHandle: + case kNodeHandle: + case kModuleHandle: + case kFuncHandle: + Args.Push(static_cast(A)); + break; + + default: + LOG(FATAL) << "Unhandled type code" << TC; + break; + } + } + + return Args; +} + +Module HexagonModuleCreate( + std::string data, std::string fmt, + std::unordered_map fmap) { + return Module(std::make_shared(data, fmt, fmap)); +} + +// Load module from file. +Module HexagonModuleLoadFile(const std::string& file_name, + const std::string& format) { + std::string data = file_name; + std::unordered_map fmap; + std::string fmt = GetFileFormat(file_name, format); + std::string meta_file = GetMetaFilePath(file_name); + LoadMetaDataFromFile(meta_file, &fmap); + return HexagonModuleCreate(data, fmt, fmap); +} + +namespace hexagon { + +Device* Device::Global() { + // Declare device constructors. +#ifdef __ANDROID__ + std::unique_ptr CreateHexagonTarget(void); +#else + std::unique_ptr CreateHexagonSimulator(void); +#endif + + static std::unique_ptr dev( +#ifdef __ANDROID__ + CreateHexagonTarget() +#else + CreateHexagonSimulator() +#endif + ); // NOLINT + + return dev.get(); +} + +} // namespace hexagon + +TVM_REGISTER_GLOBAL("module.loadfile_hexagon") + .set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = HexagonModuleLoadFile(args[0], args[1]); + }); + +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/hexagon/hexagon_module.h b/src/runtime/hexagon/hexagon_module.h new file mode 100644 index 000000000000..4516409d3828 --- /dev/null +++ b/src/runtime/hexagon/hexagon_module.h @@ -0,0 +1,152 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * This Contribution is being provided by Qualcomm Technologies, Inc., + * a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., + * a California corporation, under certain additional terms and conditions + * pursuant to Section 5 of the Apache 2.0 license. In this regard, with + * respect to this Contribution, the term "Work" in Section 1 of the + * Apache 2.0 license means only the specific subdirectory within the TVM repo + * (currently at https://github.com/dmlc/tvm) to which this Contribution is + * made. + * In any case, this submission is "Not a Contribution" with respect to its + * permitted use with any of the "vta" and "verilog" subdirectories in the TVM + * repo. + * Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain + * copyright of their respective Contributions. + */ +#ifndef TVM_RUNTIME_HEXAGON_HEXAGON_MODULE_H_ +#define TVM_RUNTIME_HEXAGON_HEXAGON_MODULE_H_ + +#include +#include + +#include +#include +#include +#include + +#include "../meta_data.h" + +namespace tvm { +namespace runtime { + +/*! + * \brief Create a Hexagon module from data. + * \param data The module data. + * \param fmt The format of the data, can be "obj". + * \param fmap The map function information map of each function. + */ +Module HexagonModuleCreate(std::string data, std::string fmt, + std::unordered_map fmap); + +namespace hexagon { + +/*! + * \brief Low-level interface for communicating with Hexagon devices. + */ +class Device { + public: + /*! + * \brief Allocate memory on device. + * \param size Requested size. + * \param align Requested alignment. + * \return Pointer (local to the device) of the allocated memory, + * or nullptr if allocation failed. + */ + virtual void* Alloc(unsigned size, unsigned align) = 0; + /*! + * \brief Release allocated memory on device. + * \param ptr Pointer to memory previously allocated by \ref Alloc. + */ + virtual void Free(void* ptr) = 0; + /*! + * \brief Copy a block of data on device to another location on the device. + * \param dst Pointer (local to device) to the destination buffer. + * \param src Pointer (local to device) of the source buffer. + * \param len Number of bytes to copy. + */ + virtual void CopyDeviceToDevice(void* dst, const void* src, + unsigned len) = 0; + /*! + * \brief Copy a block of data from device to host. + * \param host_dst Pointer (local to host) to the destination buffer. + * \param src Pointer (local to device) to the source buffer. + * \param len Number of bytes to copy. + */ + virtual void CopyDeviceToHost(void* host_dst, const void* src, + unsigned len) = 0; + /*! + * \brief Copy a block of data from host to device. + * \param dst Pointer (local to device) to the destination buffer. + * \param host_src Pointer (local to host) to the source buffer. + * \param len Number of bytes to copy. + */ + virtual void CopyHostToDevice(void* dst, const void* host_src, + unsigned len) = 0; + /*! + * \brief Load a module (typically a shared library) into device. + * \param data Name of the shared library. + * \param fmt Format of the library (currently ignored). + * \return Pointer to the loaded module. + * \note Currently only one module can be loaded at any given time. + */ + virtual void* Load(const std::string& data, const std::string& fmt) = 0; + /*! + * \brief Unload a module from device. + * \param mod Pointer to a loaded module returned by \ref Load. + */ + virtual void Unload(void* mod) = 0; + /*! + * \brief Find the address of an object in the currently loaded module. + * \param sym Name of the object. + * \return Address of the located object, or nullptr if object was + * not found. + */ + virtual void* Resolve(const std::string& sym) = 0; + /*! + * \brief Invoke a function on device with given arguments. + * \param func Address (local to device) of the function to call. + * \param scalar Pointer to an array of 32-bit values that will be + * passed via consecutive registers: r0..r5. This array + * includes dummy values for skipped registers. + * \param sc_num Number of values in the "scalar" array. + * \param stack Pointer to an array of 32-bit values that will be + * passed on the stack. This array includes dummy values + * for padding. + * \param st_num Number of values in the "stack" array. + */ + virtual void Call(void* func, uint32_t* scalar, unsigned sc_num, + uint32_t* stack, unsigned st_num) = 0; + + virtual ~Device() = 0; + + static Device* Global(); + static bool ValidateDeviceId(decltype(DLContext::device_id) device_id) { + // Only supporting a single device for now. + return device_id == 0; + } +}; + +} // namespace hexagon + +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_HEXAGON_HEXAGON_MODULE_H_ diff --git a/src/runtime/hexagon/sim/driver/sim_device.cc b/src/runtime/hexagon/sim/driver/sim_device.cc new file mode 100644 index 000000000000..4376bc003118 --- /dev/null +++ b/src/runtime/hexagon/sim/driver/sim_device.cc @@ -0,0 +1,422 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * This Contribution is being provided by Qualcomm Technologies, Inc., + * a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., + * a California corporation, under certain additional terms and conditions + * pursuant to Section 5 of the Apache 2.0 license. In this regard, with + * respect to this Contribution, the term "Work" in Section 1 of the + * Apache 2.0 license means only the specific subdirectory within the TVM repo + * (currently at https://github.com/dmlc/tvm) to which this Contribution is + * made. + * In any case, this submission is "Not a Contribution" with respect to its + * permitted use with any of the "vta" and "verilog" subdirectories in the TVM + * repo. + * Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain + * copyright of their respective Contributions. + */ + +// Build with: +/* + hexagon-clang++ -O2 sim_device.cc -o sim_dev -G0 -ldl -stdlib=libstdc++ \ + -Wl,--force-dynamic -Wl,-E -Wl,--whole-archive -lm \ + -Isrc/runtime/hexagon/sim +*/ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "hexagon_sim_proto.h" + +std::string time_now() { + char str[11]; // [hh:mm:ss] + time_t time_value = time(NULL); + tm now; + tm* pnow = localtime_r(&time_value, &now); + + snprintf(str, sizeof(str), "[%02d:%02d:%02d]", pnow->tm_hour, pnow->tm_min, + pnow->tm_sec); + return std::string(str); +} + +#define LOG(FMT, ...) \ + fprintf(stderr, "%s %s:%d: " FMT "\n", time_now().c_str(), __FILE__, \ + __LINE__, ##__VA_ARGS__) + +extern "C" { +// Type definition copied from include/tvm/runtime/c_backend_api.h. +typedef struct { + /*! + * \brief Auxiliary used for synchronization + */ + void* sync_handle; + /*! \brief total amount of task */ + int32_t num_task; +} TVMParallelGroupEnv; + +/*! + * \brief The callback function to execute a parallel lambda + * \param task_id the task id of the function. + * \param penv The parallel environment backs the execution. + * \param cdata The supporting closure data. + */ +typedef int (*FTVMParallelLambda)(int task_id, TVMParallelGroupEnv* penv, + void* cdata); + +int TVMBackendParallelLaunch(FTVMParallelLambda kernel, void* cdata, + int num_task); +int TVMBackendParallelBarrier(int task_id, TVMParallelGroupEnv* penv); +} + +/*! + * The implementation of the parallel runtime for execution on simulator. + * The simulator environment does not support running multiple threads, + * so the runtime is trivial: the maximum number of threads that it + * supports it 1 (i.e. the main thread only). + */ +int TVMBackendParallelLaunch(FTVMParallelLambda kernel, void* cdata, + int num_task) { + TVMParallelGroupEnv penv{nullptr, 1}; + LOG("%s(kernel=%p, cdata=%p, num_task=%d)", __func__, kernel, cdata, + num_task); + kernel(0, &penv, cdata); + return 0; +} + +int TVMBackendParallelBarrier(int task_id, TVMParallelGroupEnv* penv) { + LOG("%s(task_id=%d, penv=%p)", __func__, task_id, penv); + assert(task_id == 0 && penv->num_task == 1 && "Expecting single task"); + return 0; +} + +struct allocator { + private: + struct block { + block(void* p, size_t s) : ptr_(p), size_(s) {} + bool operator<(const block& b) const { + return uintptr_t(ptr_) < uintptr_t(b.ptr_); + } + void* ptr_; + size_t size_; + }; + + using vector_type = std::vector; + using iterator = vector_type::iterator; + vector_type allocations_; + + public: + void* alloc(unsigned size, size_t align); + void free(void* p); +}; + +void* allocator::alloc(unsigned size, size_t align) { + void* ptr = aligned_alloc(align, size); + if (ptr == nullptr) { + perror("device: error allocating memory:"); + return ptr; + } + + block b(ptr, size); + iterator i = std::lower_bound(allocations_.begin(), allocations_.end(), b); + iterator w = allocations_.insert(i, b); + if (w != allocations_.begin()) { + iterator pw = w - 1; + assert(uintptr_t(pw->ptr_) + pw->size_ < uintptr_t(w->ptr_)); + } + if (w + 1 != allocations_.end()) { + iterator nw = w + 1; + assert(uintptr_t(w->ptr_) + w->size_ <= uintptr_t(nw->ptr_)); + } + + LOG("device: allocated %d bytes aligned at %d: %p", size, align, ptr); + return ptr; +} + +void allocator::free(void* ptr) { + LOG("device: freeing %p", ptr); + iterator i = std::lower_bound(allocations_.begin(), allocations_.end(), + block(ptr, 0)); + assert(i != allocations_.end()); + assert(i->ptr_ == ptr); + ::free(i->ptr_); + allocations_.erase(i); +} + +static void print_msg_call(const MsgCall& mc) { + auto to_dec_string = [](int v) { + char tmp[11]; + snprintf(tmp, sizeof(tmp), "%d", v); + return std::string(tmp); + }; + auto to_hex_string = [](uint32_t v) { + char tmp[9]; + snprintf(tmp, sizeof(tmp), "%lx", v); + return std::string(tmp); + }; + std::string str = "device: launching " + to_hex_string(mc.func_va) + + " sc:" + to_dec_string(mc.scalar_num) + " {"; + for (unsigned i = 0; i != mc.scalar_num; ++i) { + str += ' ' + to_hex_string(mc.data[i]); + if (i + 1 != mc.scalar_num) str += ','; + } + str += " }, st:" + to_dec_string(mc.stack_num) + " {"; + for (unsigned i = 0; i != mc.stack_num; ++i) { + str += ' ' + to_hex_string(mc.data[i + mc.scalar_num]); + if (i + 1 != mc.stack_num) str += ','; + } + str += " }"; + LOG("%s", str.c_str()); +} + +static std::vector task_queue; + +struct environment { + allocator alloc; + void* dl_handle = nullptr; +}; + +extern "C" { +volatile Message message_buffer; +int dispatch(environment* env) __attribute__((noinline)); +} + +static volatile unsigned char payload_buffer[4096]; + +void set_msg(uint32_t code, uint32_t len, uint32_t va) { + message_buffer.code = code; + message_buffer.len = len; + message_buffer.va = va; +} + +inline void* pointer(uint32_t v) { + return reinterpret_cast(static_cast(v)); +} + +inline uint32_t va(const volatile void* p) { + return static_cast(reinterpret_cast(p)); +} + +__attribute__((naked)) uint32_t launcher(volatile MsgCall* mc, uint64_t* pcc) { + __asm__( + "// This function is intentionally written to be readable, \n" + "// rather than fast. \n" + "// r0 = value of 'volatile MsgCall *mc' \n" + "// r1 = address where to store the program cycle count \n" + "{ memd(r29+#-16) = r21:20 \n" + " allocframe(#24) } \n" + "{ memd(r29+#0) = r17:16 \n" + " memd(r29+#8) = r19:18 } \n" + "{ r17:16 = combine(r1,r0) \n" + " r18 = r29 \n" + " r1 = memw(r0+#4) // scalar_num \n" + " r2 = memw(r0+#8) } // stack_num \n" + "// If there are no stack values, skip the stack setup. \n" + "{ p0 = cmp.eq(r2,#0) \n" + " if (p0.new) jump:t .Llauncher1 } \n" + + "// Allocate space on the stack. Let r2 = needed space \n" + "// rounded up to a multiple of 8. \n" + "{ loop0(.Llauncher0,r2) \n" + " r2 = asl(r2,#2) } \n" + "{ r2 = add(r2,#4) } \n" + "{ r2 = clrbit(r2,#2) } \n" + "{ r29 = sub(r29,r2) } \n" + + "// Copy stack contents onto the stack. Stack contents start \n" + "// at r3 = r0 + offsetof(data) + scalar_num*4 \n" + "{ r3 = addasl(r0,r1,#2) \n" + " r4 = r29 } \n" + "{ r3 = add(r3,#12) } // offsetof(data) \n" + ".Llauncher0: \n" + "{ r5 = memw(r3++#4) \n" + " memw(r4++#4) = r5.new } :endloop0 \n" + + "// Load registers. Some of the loaded data may actually be \n" + "// values from the stack part of 'data', but it's not an issue.\n" + ".Llauncher1: \n" + "{ r0 = memw(r16+#12) // mc + offsetof(data) \n" + " r1 = memw(r16+#16) } \n" + "{ r2 = memw(r16+#20) \n" + " r3 = memw(r16+#24) } \n" + "{ r4 = memw(r16+#28) \n" + " r5 = memw(r16+#32) } \n" + + "// Call. \n" + "{ r6 = memw(r16+#0) \n" + " r21:20 = upcycle } \n" + "{ callr r6 } \n" + + "// Restore stack pointer (free up r18), calculate cycle count. \n" + "{ r29 = r18 \n" + " r19:18 = upcycle } \n" + "{ r19:18 = sub(r19:18, r21:20) } \n" + + "// Store pcount, restore non-volatile registers, and return. \n" + "{ memd(r17+#0) = r19:18 \n" + " r21:20 = memd(r29+#16) } \n" + "{ r19:18 = memd(r29+#8) \n" + " r17:16 = memd(r29+#0) } \n" + "{ dealloc_return } // implicit-use r1:0 \n"); +} + +int dispatch(environment* env) { + uint32_t code = message_buffer.code; + // Special handling of MsgReq. + if (code == kMsgReq) { + // XXX: Enable fprintfs for MsqReg under #define. + // LOG("device: {MsgReq, %lu, %lx}", message_buffer.len, + // message_buffer.va); + // XXX: Implement handling of longer messages. + assert(message_buffer.len <= sizeof(payload_buffer)); + set_msg(kMsgAck, sizeof(payload_buffer), va(payload_buffer)); + return 0; + } + + switch (code) { + case kAlloc: { + LOG("device: {kAlloc, %lu, %lx}", message_buffer.len, message_buffer.va); + assert(message_buffer.len == sizeof(MsgAlloc)); + auto* ma = reinterpret_cast(message_buffer.va); + void* p = env->alloc.alloc(ma->size, ma->align); + reinterpret_cast(payload_buffer)->va = va(p); + set_msg(kNone, sizeof(MsgPointer), va(payload_buffer)); + break; + } + case kFree: { + LOG("device: {kFree, %lu, %lx}", message_buffer.len, message_buffer.va); + assert(message_buffer.len == sizeof(MsgPointer)); + auto* mp = reinterpret_cast(message_buffer.va); + env->alloc.free(pointer(mp->va)); + set_msg(kNone, 0u, 0u); + break; + } + case kCopy: { + LOG("device: {kCopy, %lu, %lx}", message_buffer.len, message_buffer.va); + assert(message_buffer.len == sizeof(MsgCopy)); + auto* mc = reinterpret_cast(message_buffer.va); + memcpy(pointer(mc->dst), pointer(mc->src), mc->len); + set_msg(kNone, 0u, 0u); + break; + } + case kLoad: { + // LOG("device: {kLoad, %lu, %lx}", + // message_buffer.len, message_buffer.va); + if (env->dl_handle != nullptr) dlclose(env->dl_handle); + const char* name = static_cast(pointer(message_buffer.va)); + // LOG(stderr, "device: dlopen(%s)", name); + env->dl_handle = dlopen(name, RTLD_LAZY); + if (env->dl_handle == nullptr) LOG("dlopen: %s\n", dlerror()); + assert(env->dl_handle != nullptr); + reinterpret_cast(payload_buffer)->va = + va(env->dl_handle); + set_msg(kNone, sizeof(MsgPointer), va(payload_buffer)); + break; + } + case kUnload: { + // LOG("device: {kUnload, %lu, %lx}", + // message_buffer.len, message_buffer.va); + assert(env->dl_handle != nullptr); + assert(message_buffer.len == sizeof(MsgPointer)); + auto* mp = reinterpret_cast(message_buffer.va); + assert(pointer(mp->va) == env->dl_handle); + dlclose(env->dl_handle); + env->dl_handle = nullptr; + set_msg(kNone, 0u, 0u); + break; + } + case kResolve: { + LOG("device: {kResolve, %lu, %lx}", message_buffer.len, + message_buffer.va); + assert(env->dl_handle != nullptr); + dlerror(); + const char* name = static_cast(pointer(message_buffer.va)); + void* s = dlsym(env->dl_handle, name); + reinterpret_cast(payload_buffer)->va = va(s); + set_msg(kNone, sizeof(MsgPointer), va(payload_buffer)); + break; + } + case kCall: { + LOG("device: {kCall, %lu, %lx}", message_buffer.len, message_buffer.va); + // Add the task to the queue. + auto* mc = reinterpret_cast(message_buffer.va); + uint32_t size = 4 * (3 + mc->scalar_num + mc->stack_num); + MsgCall* t = static_cast(malloc(size)); + memcpy(t, mc, size); + task_queue.push_back(t); + // Return 0. + *reinterpret_cast(payload_buffer) = 0; + set_msg(kNone, sizeof(uint32_t), va(payload_buffer)); + break; + } + case kFlush: { + LOG("device: {kFlush}"); + LOG("device: %d tasks in the queue", task_queue.size()); + // Execute all tasks from the queue and release memory buffers + // for as long as the return values are 0. Upon receiving a non-zero + // return value, continue freeing memory but no longer execute + // any tasks. The task queue will be cleared in any case. + uint32_t rv = 0; + uint64_t pcc; // Pcycle counter, will be 0 under simulator (upcycle). + for (MsgCall* t : task_queue) { + if (rv == 0) { + print_msg_call(*t); + rv = launcher(t, &pcc); + } + free(t); + } + task_queue.clear(); + *reinterpret_cast(payload_buffer) = rv; + set_msg(kNone, sizeof(uint32_t), va(payload_buffer)); + break; + } + default: + LOG("device: unknown code: %lu", message_buffer.code); + abort(); + break; + } + return 0; +} + +extern "C" { +int acquire_vector_unit(int); +void release_vector_unit(); +} + +int main() { + environment env; + acquire_vector_unit(0); + + const char* builtin[] = {"libgcc.so", "libc.so"}; + dlinit(2, const_cast(builtin)); + + while (!dispatch(&env)) { + } + + release_vector_unit(); + return 0; +} diff --git a/src/runtime/hexagon/sim/hexagon_device_sim.cc b/src/runtime/hexagon/sim/hexagon_device_sim.cc new file mode 100644 index 000000000000..46d5c60cc52a --- /dev/null +++ b/src/runtime/hexagon/sim/hexagon_device_sim.cc @@ -0,0 +1,461 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * This Contribution is being provided by Qualcomm Technologies, Inc., + * a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., + * a California corporation, under certain additional terms and conditions + * pursuant to Section 5 of the Apache 2.0 license. In this regard, with + * respect to this Contribution, the term "Work" in Section 1 of the + * Apache 2.0 license means only the specific subdirectory within the TVM repo + * (currently at https://github.com/dmlc/tvm) to which this Contribution is + * made. + * In any case, this submission is "Not a Contribution" with respect to its + * permitted use with any of the "vta" and "verilog" subdirectories in the TVM + * repo. + * Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain + * copyright of their respective Contributions. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "../hexagon_module.h" +#include "hexagon_sim_proto.h" + +namespace tvm { +namespace runtime { +namespace hexagon { + +static_assert(sizeof(HEX_VA_t) == sizeof(uint32_t), + "Hexagon VA must be uint32"); + +template +struct unalign { + using type = T __attribute__((aligned(1))); +}; + +template +struct uint { + using type = void; +}; + +template <> +struct uint<8> { + using type = uint64_t; +}; +template <> +struct uint<4> { + using type = uint32_t; +}; +template <> +struct uint<2> { + using type = uint16_t; +}; +template <> +struct uint<1> { + using type = uint8_t; +}; + +class HexagonSimulator : public tvm::runtime::hexagon::Device { + public: + explicit HexagonSimulator(bool enable_queuing); + ~HexagonSimulator() final {} + void* Alloc(unsigned size, unsigned align) final; + void Free(void* ptr) final; + void CopyDeviceToDevice(void* dst, const void* src, unsigned len) final; + void CopyDeviceToHost(void* host_dst, const void* src, unsigned len) final; + void CopyHostToDevice(void* dst, const void* host_src, unsigned len) final; + void* Load(const std::string& data, const std::string& fmt) final; + void Unload(void* mod) final; + void* Resolve(const std::string& sym) final; + void Call(void* func, uint32_t* scalar, unsigned sc_num, uint32_t* stack, + unsigned st_num) final; + + private: + static HEX_VA_t p2va(const void* p); + static void* va2p(HEX_VA_t va); + + void CopyFromV(void* host_dst, HEX_VA_t src, unsigned len); + void CopyToV(HEX_VA_t dst, const void* host_src, unsigned len); + + template + void CopyNToV(HEX_VA_t dst, const void* host_src); + template + void CopyNFromV(void* host_dst, HEX_VA_t src); + + // NOLINTNEXTLINE(runtime/references) + void SendMsg(Message& m, const void* data, bool show_dbg); + + std::unique_ptr sim_; + HEX_VA_t dispatch_v_, message_buffer_v_; + bool task_queuing_; +}; + +inline HEX_VA_t HexagonSimulator::p2va(const void* p) { + uintptr_t u = reinterpret_cast(p); + HEX_VA_t va = static_cast(u); + CHECK_EQ(static_cast(va), u); + return va; +} + +inline void* HexagonSimulator::va2p(HEX_VA_t va) { + return reinterpret_cast(static_cast(va)); +} + +template +constexpr bool is_multiple_of() { + return (N / A) * A == N; +} + +std::unique_ptr CreateHexagonSimulator() { + // C++11 does not have std::make_unique. + return llvm::make_unique(/*enable_queuing=*/true); +} + +template +void HexagonSimulator::CopyNToV(HEX_VA_t dst, const void* host_src) { + using src_uint_t = typename unalign::type>::type; + auto* ps = reinterpret_cast(host_src); + CHECK_EQ(sim_->WriteVirtual(dst, -1u, N, *ps), HEX_STAT_SUCCESS); +} + +template +void HexagonSimulator::CopyNFromV(void* host_dst, HEX_VA_t src) { + typename uint::type v; + CHECK_EQ(sim_->ReadVirtual(src, -1u, N, &v), HEX_STAT_SUCCESS); + + using dst_uint_t = typename unalign::type>::type; + auto* pd = reinterpret_cast(host_dst); + *pd = v; +} + +void HexagonSimulator::CopyToV(HEX_VA_t dst, const void* host_src, + unsigned len) { + const uint8_t* src = static_cast(host_src); + + while (len >= 8) { + CopyNToV<8>(dst, src); + dst += 8; + src += 8; + len -= 8; + } + if (len >= 4) { + CopyNToV<4>(dst, src); + dst += 4; + src += 4; + len -= 4; + } + if (len >= 2) { + CopyNToV<2>(dst, src); + dst += 2; + src += 2; + len -= 2; + } + if (len >= 1) { + CopyNToV<1>(dst, src); + dst++; + src++; + len--; + } + CHECK_EQ(len, 0); +} + +void HexagonSimulator::CopyFromV(void* host_dst, HEX_VA_t src, unsigned len) { + uint8_t* dst = static_cast(host_dst); + + while (len >= 8) { + CopyNFromV<8>(dst, src); + dst += 8; + src += 8; + len -= 8; + } + if (len >= 4) { + CopyNFromV<4>(dst, src); + dst += 4; + src += 4; + len -= 4; + } + if (len >= 2) { + CopyNFromV<2>(dst, src); + dst += 2; + src += 2; + len -= 2; + } + if (len >= 1) { + CopyNFromV<1>(dst, src); + dst++; + src++; + len--; + } + CHECK_EQ(len, 0); +} + +void HexagonSimulator::SendMsg(Message& m, const void* data, bool show_dbg) { + auto run = [this](bool report_cycles) { + HEXAPI_CoreState core = HEX_CORE_RESET; + HEX_4u_t result; + HEX_8u_t cycles0, cycles1; + if (report_cycles) + CHECK_EQ(sim_->GetSimulatedCycleCount(&cycles0), HEX_STAT_SUCCESS); + core = sim_->Run(&result); + CHECK_EQ(core, HEX_CORE_BREAKPOINT); + if (report_cycles) { + CHECK_EQ(sim_->GetSimulatedCycleCount(&cycles1), HEX_STAT_SUCCESS); + LOG(INFO) << "host: execution took " << (cycles1 - cycles0) << " cycles"; + } + }; + + // Send the message request. + Message r = {kMsgReq, m.len, 0u}; + CopyToV(message_buffer_v_, &r, sizeof(r)); + run(false); + + // Receive the acknowledgement with the address for the payload. + CopyFromV(&r, message_buffer_v_, sizeof(r)); + CHECK_EQ(r.code, kMsgAck); + CHECK_GE(r.len, m.len); + + // Send the actual message. + m.va = r.va; + CopyToV(message_buffer_v_, &m, sizeof(m)); + if (m.len > 0) CopyToV(r.va, data, m.len); + run(show_dbg); + + // Receive the return data. + CopyFromV(&m, message_buffer_v_, sizeof(m)); + CHECK_EQ(m.code, kNone); +} + +HexagonSimulator::HexagonSimulator(bool enable_queuing) + : sim_(new HexagonWrapper(HEX_CPU_V66)), task_queuing_(enable_queuing) { + HEXAPI_Status status = HEX_STAT_SUCCESS; + + // Locate the sim_dev binary in PATH, or in the current working directory. + llvm::StringRef sim_dev = "sim_dev"; + llvm::Optional path_sim_dev = + llvm::sys::Process::FindInEnvPath("PATH", sim_dev); + if (!path_sim_dev) { + if (!llvm::sys::fs::exists(sim_dev)) { + LOG(ERROR) << "Cannot find sim_dev in PATH."; + exit(1); + } + path_sim_dev = sim_dev; + } + + status = sim_->ConfigureExecutableBinary(path_sim_dev->c_str()); + if (status != HEX_STAT_SUCCESS) + LOG(FATAL) << "HexagonSimulator: ConfigureExecutableBinary failed " + "with code=" + << static_cast(status); + + status = sim_->EndOfConfiguration(); + if (status != HEX_STAT_SUCCESS) + LOG(FATAL) << "HexagonSimulator: EndOfConfiguration failed with " + "code=" + << static_cast(status); + + status = sim_->LoadExecutableBinary(); + if (status != HEX_STAT_SUCCESS) + LOG(FATAL) << "HexagonSimulator: LoadExecutableBinary failed with " + "code=" + << static_cast(status); + + status = sim_->ReadSymbolValue("dispatch", &dispatch_v_); + if (status != HEX_STAT_SUCCESS) + LOG(FATAL) << "HexagonSimulator: ReadSymbolValue(\"dispatch\") " + "failed with code=" + << static_cast(status); + + status = sim_->ReadSymbolValue("message_buffer", &message_buffer_v_); + if (status != HEX_STAT_SUCCESS) + LOG(FATAL) << "HexagonSimulator: ReadSymbolValue(\"message_buffer\") " + "failed with code=" + << static_cast(status); + + status = sim_->SetBreakpoint(dispatch_v_); + if (status != HEX_STAT_SUCCESS) + LOG(FATAL) << "HexagonSimulator: SetBreakpoint failed with " + "code=" + << static_cast(status); + + HEXAPI_CoreState core = HEX_CORE_RESET; + + HEX_4u_t result; + core = sim_->Run(&result); + if (core != HEX_CORE_BREAKPOINT) + LOG(FATAL) << "HexagonSimulator: Run not stopped on breakpoint, " + "code=" + << static_cast(core); +} + +void* HexagonSimulator::Alloc(unsigned size, unsigned align) { + LOG(INFO) << "HexagonSimulator::Alloc(size=" << size << ", align=" << align + << ')'; + Message m = {kAlloc, sizeof(MsgAlloc), 0u}; + MsgAlloc ma = {size, align}; + SendMsg(m, &ma, true); + + CHECK_EQ(sizeof(MsgPointer), m.len); + MsgPointer mp; + CopyFromV(&mp, m.va, m.len); + + LOG(INFO) << "HexagonSimulator::Alloc -> " << std::hex << mp.va << std::dec; + CHECK_NE(mp.va, 0); + return va2p(mp.va); +} + +void HexagonSimulator::Free(void* ptr) { + LOG(INFO) << "HexagonSimulator::Free(ptr=" << std::hex << ptr << std::dec + << ')'; + if (task_queuing_) { + Message mf = {kFlush, 0, 0}; + SendMsg(mf, 0, true); + } + Message m = {kFree, sizeof(MsgPointer), 0u}; + MsgPointer mp = {p2va(ptr)}; + SendMsg(m, &mp, true); +} + +void HexagonSimulator::CopyDeviceToDevice(void* dst, const void* src, + unsigned len) { + LOG(INFO) << "HexagonSimulator::CopyDeviceToDevice(dst=" << std::hex << dst + << ", src=" << src << ", len=" << std::dec << len << ')'; + CHECK(dst != nullptr && src != nullptr); + Message m = {kCopy, sizeof(MsgCopy), 0u}; + MsgCopy mc = {p2va(dst), p2va(src), len}; + SendMsg(m, &mc, true); +} + +void HexagonSimulator::CopyDeviceToHost(void* host_dst, const void* src, + unsigned len) { + LOG(INFO) << "HexagonSimulator::CopyDeviceToHost(host_dst=" << host_dst + << ", src=" << src << ", len=" << len << ')'; + if (task_queuing_) { + Message mf = {kFlush, 0, 0}; + SendMsg(mf, 0, true); + } + CopyFromV(host_dst, p2va(src), len); +} + +void HexagonSimulator::CopyHostToDevice(void* dst, const void* host_src, + unsigned len) { + LOG(INFO) << "HexagonSimulator::CopyHostToDevice(dst=" << dst + << ", host_src=" << host_src << ", len=" << len << ')'; + CopyToV(p2va(dst), host_src, len); +} + +void* HexagonSimulator::Load(const std::string& data, const std::string& fmt) { + // Load the shared library. + Message m = {kLoad, static_cast(data.size() + 1), 0u}; + SendMsg(m, data.c_str(), false); + + CHECK_EQ(sizeof(MsgPointer), m.len); + MsgPointer mp; + CopyFromV(&mp, m.va, sizeof(mp)); + + return va2p(mp.va); +} + +void HexagonSimulator::Unload(void* mod) { + CHECK(mod); + Message m = {kUnload, sizeof(MsgPointer), 0u}; + MsgPointer mp = {p2va(mod)}; + SendMsg(m, &mp, false); +} + +void* HexagonSimulator::Resolve(const std::string& sym) { + LOG(INFO) << "HexagonSimulator::Resolve(sym=" << sym << ')'; + Message m = {kResolve, static_cast(sym.size() + 1), 0u}; + SendMsg(m, sym.c_str(), true); + + CHECK_EQ(sizeof(MsgPointer), m.len); + MsgPointer mp; + CopyFromV(&mp, m.va, sizeof(mp)); + + LOG(INFO) << "HexagonSimulator::Resolve -> " << std::hex << mp.va + << std::dec; + return va2p(mp.va); +} + +void HexagonSimulator::Call(void* func, uint32_t* scalar, unsigned sc_num, + uint32_t* stack, unsigned st_num) { + LOG(INFO) << "HexagonSimulator::Call(func=" << std::hex << func + << ", scalar=" << scalar << ", sc_num=" << std::dec + << sc_num + // NOLINTNEXTLINE(build/include_what_you_use) + << ", stack=" << std::hex << stack << ", st_num=" << std::dec + << st_num; + + std::vector data; + + // Copy the MsgCall contents into the data vector as a sequence of uints. + MsgCall me = {p2va(func), sc_num, st_num}; + + CHECK((is_multiple_of())); + for (unsigned i = 0, e = sizeof(me) / sizeof(uint32_t); i != e; ++i) + data.push_back(reinterpret_cast(&me)[i]); + + // Append the scalar (register) arguments. + for (unsigned i = 0; i != sc_num; ++i) data.push_back(scalar[i]); + // Append the stack contents. + for (unsigned i = 0; i != st_num; ++i) data.push_back(stack[i]); + + std::ostringstream log_data; + log_data << "data: {" << std::hex; + for (unsigned i = 0, e = static_cast(data.size()); i != e; ++i) + log_data << ' ' << reinterpret_cast(data.data())[i]; + log_data << std::dec << " }" << std::flush; + LOG(INFO) << log_data.str(); + + Message m = {kCall, static_cast(data.size() * sizeof(uint32_t)), + 0u}; + SendMsg(m, data.data(), true); + + if (!task_queuing_) { + Message mf = {kFlush, 0, 0}; + SendMsg(mf, 0, true); + } + + std::vector rv(m.len); + CopyFromV(rv.data(), m.va, m.len); + + std::ostringstream log_rv; + log_rv << "HexagonSimulator::Call -> {" << std::hex; + for (unsigned i = 0, e = std::min(rv.size(), 4u); i != e; ++i) + log_rv << ' ' << std::setw(2) << std::setfill('0') << uint32_t(rv[i]); + if (rv.size() > 4) log_rv << "..."; + log_rv << std::dec << " }"; + LOG(INFO) << log_rv.str(); +} + +} // namespace hexagon +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/hexagon/sim/hexagon_sim_proto.h b/src/runtime/hexagon/sim/hexagon_sim_proto.h new file mode 100644 index 000000000000..ac839199abbe --- /dev/null +++ b/src/runtime/hexagon/sim/hexagon_sim_proto.h @@ -0,0 +1,88 @@ +/*! + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * This Contribution is being provided by Qualcomm Technologies, Inc., + * a Delaware corporation, or its subsidiary Qualcomm Innovation Center, Inc., + * a California corporation, under certain additional terms and conditions + * pursuant to Section 5 of the Apache 2.0 license. In this regard, with + * respect to this Contribution, the term "Work" in Section 1 of the + * Apache 2.0 license means only the specific subdirectory within the TVM repo + * (currently at https://github.com/dmlc/tvm) to which this Contribution is + * made. + * In any case, this submission is "Not a Contribution" with respect to its + * permitted use with any of the "vta" and "verilog" subdirectories in the TVM + * repo. + * Qualcomm Technologies, Inc. and Qualcomm Innovation Center, Inc. retain + * copyright of their respective Contributions. + */ + +#ifndef TVM_RUNTIME_HEXAGON_SIM_HEXAGON_SIM_PROTO_H_ +#define TVM_RUNTIME_HEXAGON_SIM_HEXAGON_SIM_PROTO_H_ + +// Protocol: + +// Host >-- [ code:MsgReq, len:amount requested, va:_ ] --> Remote +// Host <-- [ code:MsqAck, len:amount provided, va:address ] --< Remote +// Host >-- [ code:message, len:payload length, va:address ] --> Remote +// Host <-- [ code:None, len:response length, va:address ] --< Remote + +enum : uint32_t { + kNone, + kMsgReq, + kMsgAck, + kAlloc, + kFree, + kCopy, + kLoad, + kUnload, + kResolve, + kCall, + kFlush +}; + +struct Message { + uint32_t code; + uint32_t len; + uint32_t va; +} __attribute__((packed)); + +struct MsgAlloc { + uint32_t size; + uint32_t align; +} __attribute__((packed)); + +struct MsgPointer { + uint32_t va; +} __attribute__((packed)); + +struct MsgCopy { + uint32_t dst; + uint32_t src; + uint32_t len; +} __attribute__((packed)); + +struct MsgCall { + uint32_t func_va; // offset: 0 + uint32_t scalar_num; // 4 + uint32_t stack_num; // 8 + uint32_t data[]; // 12 +} __attribute__((packed)); + +#endif // TVM_RUNTIME_HEXAGON_SIM_HEXAGON_SIM_PROTO_H_