From 40e5672507f3d26394e96a1f76ae0b6edb45ab23 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Thu, 12 Nov 2020 22:40:21 -0800 Subject: [PATCH 01/60] refactor RPCSessionContext utils --- include/tvm/runtime/device_api.h | 54 ++++++++++++++-- src/runtime/rpc/rpc_device_api.cc | 35 ++++------ src/runtime/rpc/rpc_endpoint.cc | 2 +- src/runtime/rpc/rpc_module.cc | 104 +++++++++++++++++------------- 4 files changed, 122 insertions(+), 73 deletions(-) diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h index c6a2ce3d28d0..40989e4057e0 100644 --- a/include/tvm/runtime/device_api.h +++ b/include/tvm/runtime/device_api.h @@ -240,13 +240,57 @@ inline const char* DeviceName(int type) { } } +/*! + * \brief Return true if a TVMContext is owned by an RPC session. + */ +inline bool IsRPCSessionContext(TVMContext ctx) { + return (ctx.device_type / kRPCSessMask) > 0; +} + +/*! + * \brief Return the RPCSessTable index of the RPC Session that owns this context. + * \return the table index. + */ +inline int GetRPCSessionIndex(TVMContext ctx) { + ICHECK(IsRPCSessionContext(ctx)) << "GetRPCSessionIndex: ctx has no RPC session"; + return ctx.device_type / kRPCSessMask - 1; +} + +/*! + * \brief Remove the RPC session mask from a TVMContext. + * RPC clients typically do this when encoding a TVMContext for transmission to an RPC remote. + * On the wire, RPCContext are expected to be valid on the server without interpretation. + * \param ctx A TVMContext with non-zero RPC Session mask, valid on the RPC client. + * \return A TVMContext without any RPC Session mask, valid on the RPC server. + */ +inline TVMContext RemoveRPCSessionMask(TVMContext ctx) { + ctx.device_type = static_cast(ctx.device_type % kRPCSessMask); + return ctx; +} + +inline std::ostream& operator<<(std::ostream& os, DLContext ctx); + +/*! + * \brief Add a RPC session mask to a TVMContext. + * RPC clients typically do this when decoding a TVMContext received from a RPC remote. + * \param ctx A TVMContext without any RPC Session mask, valid on the RPC server. + * \param session_table_index Numeric index of the RPC session in the session table. + * \return A TVMContext with RPC session mask added, valid on the RPC client. + */ +inline TVMContext AddRPCSessionMask(TVMContext ctx, int session_table_index) { + CHECK(!IsRPCSessionContext(ctx)) + << "AddRPCSessionMask: ctx already non-zero RPCSessionIndex: " << ctx; + ctx.device_type = static_cast( + ctx.device_type | (kRPCSessMask * (session_table_index + 1))); + return ctx; +} + inline std::ostream& operator<<(std::ostream& os, DLContext ctx) { // NOLINT(*) - int device_type = static_cast(ctx.device_type); - if (device_type > kRPCSessMask) { - os << "remote[" << (device_type / kRPCSessMask) << "]-"; - device_type = device_type % kRPCSessMask; + if (IsRPCSessionContext(ctx)) { + os << "remote[" << GetRPCSessionIndex(ctx) << "]-"; + ctx = RemoveRPCSessionMask(ctx); } - os << runtime::DeviceName(device_type) << "(" << ctx.device_id << ")"; + os << runtime::DeviceName(static_cast(ctx.device_type)) << "(" << ctx.device_id << ")"; return os; } } // namespace runtime diff --git a/src/runtime/rpc/rpc_device_api.cc b/src/runtime/rpc/rpc_device_api.cc index 943990fd9585..a1e96e92b4e0 100644 --- a/src/runtime/rpc/rpc_device_api.cc +++ b/src/runtime/rpc/rpc_device_api.cc @@ -34,19 +34,19 @@ namespace runtime { class RPCDeviceAPI final : public DeviceAPI { public: void SetDevice(TVMContext ctx) final { - auto remote_ctx = RemoveSessMask(ctx); + auto remote_ctx = RemoveRPCSessionMask(ctx); GetSess(ctx)->GetDeviceAPI(remote_ctx)->SetDevice(remote_ctx); } void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final { - auto remote_ctx = RemoveSessMask(ctx); + auto remote_ctx = RemoveRPCSessionMask(ctx); GetSess(ctx)->GetDeviceAPI(remote_ctx)->GetAttr(remote_ctx, kind, rv); } void* AllocDataSpace(TVMContext ctx, size_t nbytes, size_t alignment, DLDataType type_hint) final { auto sess = GetSess(ctx); - auto remote_ctx = RemoveSessMask(ctx); + auto remote_ctx = RemoveRPCSessionMask(ctx); void* data = sess->GetDeviceAPI(remote_ctx)->AllocDataSpace(remote_ctx, nbytes, alignment, type_hint); @@ -57,7 +57,7 @@ class RPCDeviceAPI final : public DeviceAPI { } void FreeDataSpace(TVMContext ctx, void* ptr) final { RemoteSpace* space = static_cast(ptr); - auto remote_ctx = RemoveSessMask(ctx); + auto remote_ctx = RemoveRPCSessionMask(ctx); try { GetSess(ctx)->GetDeviceAPI(remote_ctx)->FreeDataSpace(remote_ctx, space->data); } catch (const dmlc::Error& e) { @@ -68,13 +68,11 @@ class RPCDeviceAPI final : public DeviceAPI { void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset, size_t size, TVMContext ctx_from, TVMContext ctx_to, DLDataType type_hint, TVMStreamHandle stream) final { - int from_dev_type = ctx_from.device_type; - int to_dev_type = ctx_to.device_type; - if (from_dev_type > kRPCSessMask && to_dev_type > kRPCSessMask) { + if (IsRPCSessionContext(ctx_from) && IsRPCSessionContext(ctx_to)) { ICHECK(ctx_from.device_type == ctx_to.device_type) << "Cannot copy across two different remote session"; - auto remote_ctx_from = RemoveSessMask(ctx_from); - auto remote_ctx_to = RemoveSessMask(ctx_to); + auto remote_ctx_from = RemoveRPCSessionMask(ctx_from); + auto remote_ctx_to = RemoveRPCSessionMask(ctx_to); auto remote_ctx = remote_ctx_from; if (remote_ctx.device_type == kDLCPU) remote_ctx = remote_ctx_to; GetSess(ctx_from) @@ -82,12 +80,12 @@ class RPCDeviceAPI final : public DeviceAPI { ->CopyDataFromTo(static_cast(from)->data, from_offset, static_cast(to)->data, to_offset, size, remote_ctx_from, remote_ctx_to, type_hint, stream); - } else if (from_dev_type > kRPCSessMask && to_dev_type == kDLCPU) { - auto remote_ctx_from = RemoveSessMask(ctx_from); + } else if (IsRPCSessionContext(ctx_from) && ctx_to.device_type == kDLCPU) { + auto remote_ctx_from = RemoveRPCSessionMask(ctx_from); GetSess(ctx_from)->CopyFromRemote(static_cast(from)->data, from_offset, to, to_offset, size, remote_ctx_from, type_hint); - } else if (from_dev_type == kDLCPU && to_dev_type > kRPCSessMask) { - auto remote_ctx_to = RemoveSessMask(ctx_to); + } else if (ctx_from.device_type == kDLCPU && IsRPCSessionContext(ctx_to)) { + auto remote_ctx_to = RemoveRPCSessionMask(ctx_to); GetSess(ctx_to)->CopyToRemote(const_cast(from), from_offset, static_cast(to)->data, to_offset, size, remote_ctx_to, type_hint); @@ -97,22 +95,15 @@ class RPCDeviceAPI final : public DeviceAPI { } void StreamSync(TVMContext ctx, TVMStreamHandle stream) final { - auto remote_ctx = RemoveSessMask(ctx); + auto remote_ctx = RemoveRPCSessionMask(ctx); GetSess(ctx)->GetDeviceAPI(remote_ctx)->StreamSync(remote_ctx, stream); } private: std::shared_ptr GetSess(TVMContext ctx) { - int dev_type = ctx.device_type; - ICHECK_GE(dev_type, kRPCSessMask); - int tbl_index = dev_type / kRPCSessMask - 1; + int tbl_index = GetRPCSessionIndex(ctx); return RPCSession::Get(tbl_index); } - - static TVMContext RemoveSessMask(TVMContext ctx) { - ctx.device_type = static_cast(ctx.device_type % kRPCSessMask); - return ctx; - } }; TVM_REGISTER_GLOBAL("device_api.rpc").set_body([](TVMArgs args, TVMRetValue* rv) { diff --git a/src/runtime/rpc/rpc_endpoint.cc b/src/runtime/rpc/rpc_endpoint.cc index b8c2a3bb0b97..fbdd93fb4f62 100644 --- a/src/runtime/rpc/rpc_endpoint.cc +++ b/src/runtime/rpc/rpc_endpoint.cc @@ -178,7 +178,7 @@ class RPCEndpoint::EventHandler : public dmlc::Stream { << args[i].AsObjectRef()->GetTypeKey() << " is not supported by RPC"; } else if (tcode == kTVMContext) { DLContext ctx = args[i]; - ICHECK_LT(static_cast(ctx.device_type), kRPCSessMask) + ICHECK(!IsRPCSessionContext(ctx)) << "InternalError: cannot pass RPC context in the channel"; } } diff --git a/src/runtime/rpc/rpc_module.cc b/src/runtime/rpc/rpc_module.cc index a3d888e927ed..7f810a229887 100644 --- a/src/runtime/rpc/rpc_module.cc +++ b/src/runtime/rpc/rpc_module.cc @@ -36,6 +36,52 @@ namespace tvm { namespace runtime { + +// deleter of RPC remote array +static void RemoteNDArrayDeleter(Object* obj) { + auto* ptr = static_cast(obj); + RemoteSpace* space = static_cast(ptr->dl_tensor.data); + space->sess->FreeHandle(ptr->manager_ctx, kTVMNDArrayHandle); + delete space; + delete ptr; +} + +/*! + * \brief Build a local NDArray with remote backing storage. + * \param handle A pointer valid on the remote end which should form the `data` field of the + * underlying DLTensor. + * \param shape The shape field of this DLTensor. + * \param ndim The rank of this DLTensor. + * \param ctx Remote context used with this tensor. Must have non-zero RPCSessMask. + * \param deleter A function invoked when the local NDArray object is no longer used. If `handle` + * needs to be explicitly deleted after the NDArray is freed, this function should do that. + * \param deleter_ctx An opaque pointer passed to deleter to identify the tensor being deleted. + */ +NDArray NDArrayFromRemoteOpaqueHandle(void* handle, int64_t* shape, int64_t ndim, DLContext* ctx, FDeleter deleter, void* deleter_ctx) { + NDArray::Container* data = new NDArray::Container(); + data->manager_ctx = deleter_ctx; + data->SetDeleter(deleter); + RemoteSpace* space = new RemoteSpace(); + space->sess = sess_; + space->data = tensor->data; + data->dl_tensor.data = space; + NDArray ret(GetObjectPtr(data)); + // RAII now in effect + data->shape_ = std::vector(tensor->shape, tensor->shape + tensor->ndim); + data->dl_tensor.shape = dmlc::BeginPtr(data->shape_); + data->dl_tensor.ndim = static_cast(data->shape_.size()); + // setup dtype + data->dl_tensor.dtype = tensor->dtype; + // setup ctx + data->dl_tensor.ctx = ctx; + // check strides. + ICHECK(tensor->strides == nullptr); + // setup byteoffset + data->dl_tensor.byte_offset = tensor->byte_offset; + return ret; +} + + /*! * \brief A wrapped remote function as a PackedFunc. */ @@ -108,47 +154,10 @@ class RPCWrappedFunc : public Object { // remove a remote session mask TVMContext RemoveSessMask(TVMContext ctx) const { - int dev_type = ctx.device_type; - ICHECK_EQ(dev_type / kRPCSessMask, sess_->table_index() + 1) - << "Can not pass in local context or context with a different remote session"; - ctx.device_type = static_cast(ctx.device_type % kRPCSessMask); - return ctx; - } - - // deleter of RPC remote array - static void RemoteNDArrayDeleter(Object* obj) { - auto* ptr = static_cast(obj); - RemoteSpace* space = static_cast(ptr->dl_tensor.data); - space->sess->FreeHandle(ptr->manager_ctx, kTVMNDArrayHandle); - delete space; - delete ptr; - } - - // wrap return value as remote NDArray. - NDArray WrapRemoteNDArray(DLTensor* tensor, void* nd_handle) const { - NDArray::Container* data = new NDArray::Container(); - data->manager_ctx = nd_handle; - data->SetDeleter(RemoteNDArrayDeleter); - RemoteSpace* space = new RemoteSpace(); - space->sess = sess_; - space->data = tensor->data; - data->dl_tensor.data = space; - NDArray ret(GetObjectPtr(data)); - // RAII now in effect - data->shape_ = std::vector(tensor->shape, tensor->shape + tensor->ndim); - data->dl_tensor.shape = dmlc::BeginPtr(data->shape_); - data->dl_tensor.ndim = static_cast(data->shape_.size()); - // setup dtype - data->dl_tensor.dtype = tensor->dtype; - // setup ctx, encode as remote session - data->dl_tensor.ctx.device_id = tensor->ctx.device_id; - data->dl_tensor.ctx.device_type = static_cast( - static_cast(tensor->ctx.device_type) + kRPCSessMask * (sess_->table_index() + 1)); - // check strides. - ICHECK(tensor->strides == nullptr); - // setup byteoffset - data->dl_tensor.byte_offset = tensor->byte_offset; - return ret; + ICHECK(IsRPCSessionContext(ctx)) << "Can not pass in local context"; + ICHECK_EQ(GetRPCSessionIndex(ctx), sess_->table_index()) + << "Can not pass in context with a different remote session"; + return RemoveRPCSessionMask(ctx); } }; @@ -189,10 +198,9 @@ class RPCModuleNode final : public ModuleNode { int min_repeat_ms, const std::string& f_preproc_name) { InitRemoteFunc(&remote_get_time_evaluator_, "runtime.RPCTimeEvaluator"); // Remove session mask because we pass ctx by parts. - int dev_type = ctx.device_type; - ICHECK_EQ(dev_type / kRPCSessMask, sess_->table_index() + 1) + ICHECK_EQ(GetRPCSessionIndex(ctx), sess_->table_index()) << "ValueError: Need to pass the matched remote context to RPCModule.GetTimeEvaluator"; - ctx.device_type = static_cast(ctx.device_type % kRPCSessMask); + ctx = RemoveRPCSessionMask(ctx); if (module_handle_ != nullptr) { return remote_get_time_evaluator_(GetRef(this), name, @@ -283,7 +291,7 @@ void RPCWrappedFunc::WrapRemoteReturnToValue(TVMArgs args, TVMRetValue* rv) cons ICHECK_EQ(args.size(), 3); DLTensor* tensor = args[1]; void* nd_handle = args[2]; - *rv = WrapRemoteNDArray(tensor, nd_handle); + *rv = NDArrayFromRemoteOpaqueHandle(tensor->data, tensor->shape, tensor->ndim, AddRPCSessionMask(ctx, sess_->table_index()), RemoteNDArrayDeleter, nd_handle); } else { ICHECK_EQ(args.size(), 2); *rv = args[1]; @@ -469,5 +477,11 @@ TVM_REGISTER_GLOBAL("rpc.SessTableIndex").set_body([](TVMArgs args, TVMRetValue* *rv = static_cast(m.operator->())->sess()->table_index(); }); +TVM_REGISTER_GLOBAL("tvm.rpc.wrap_remote_ndarray").set_body_typed([](void* remote_array, PackedFunc deleter) { + *rv = WrapRemoteNDArray(remote_array, [pf](Object* ctx) { + pf(); + }); +}); + } // namespace runtime } // namespace tvm From 99ef7e4496445c67a2be37f8399eb066fccc4339 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Fri, 13 Nov 2020 09:35:06 -0800 Subject: [PATCH 02/60] Make TVMLogf platform-independent. * Some platforms need to use an alternate printf() to support basic things like %zu. Since %zu is platform-specific, we prefer to use a printf() that supports it or allow the platform to fix it up as needed. --- include/tvm/runtime/crt/platform.h | 17 +++++++++++++++++ src/runtime/crt/host/main.cc | 5 +++++ src/runtime/crt/utvm_rpc_server/rpc_server.cc | 3 ++- tests/micro/qemu/zephyr-runtime/src/main.c | 5 +++++ 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/include/tvm/runtime/crt/platform.h b/include/tvm/runtime/crt/platform.h index 782060dfd000..3eac45f64e4e 100644 --- a/include/tvm/runtime/crt/platform.h +++ b/include/tvm/runtime/crt/platform.h @@ -25,6 +25,8 @@ #ifndef TVM_RUNTIME_CRT_PLATFORM_H_ #define TVM_RUNTIME_CRT_PLATFORM_H_ +#include +#include #include #ifdef __cplusplus @@ -39,6 +41,21 @@ extern "C" { */ void __attribute__((noreturn)) TVMPlatformAbort(tvm_crt_error_t code); +/*! \brief Called by the microTVM RPC server to implement TVMLogf. + * + * Not required to be implemented when the RPC server is not linked into the binary. This + * function's signature matches that of vsnprintf, so trivial implementations can just call + * vsnprintf. + * + * \param out_buf A char buffer where the formatted string should be written. + * \param out_buf_size_bytes Number of bytes available for writing in out_buf. + * \param fmt The printf-style formatstring. + * \param args extra arguments to be formatted. + * \return number of bytes written. + */ +size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, + const char* fmt, va_list args); + #ifdef __cplusplus } // extern "C" #endif diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc index 5623b2515585..60797c39b41d 100644 --- a/src/runtime/crt/host/main.cc +++ b/src/runtime/crt/host/main.cc @@ -43,6 +43,11 @@ ssize_t UTvmWriteFunc(void* context, const uint8_t* data, size_t num_bytes) { return to_return; } +size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, + const char* fmt, va_list args) { + return vsnprintf(out_buf, out_buf_size_bytes, fmt, args); +} + void TVMPlatformAbort(tvm_crt_error_t error_code) { std::cerr << "TVMPlatformAbort: " << error_code << std::endl; throw "Aborted"; diff --git a/src/runtime/crt/utvm_rpc_server/rpc_server.cc b/src/runtime/crt/utvm_rpc_server/rpc_server.cc index 34eff6a3270d..84930866367e 100644 --- a/src/runtime/crt/utvm_rpc_server/rpc_server.cc +++ b/src/runtime/crt/utvm_rpc_server/rpc_server.cc @@ -219,7 +219,8 @@ void TVMLogf(const char* format, ...) { va_list args; char log_buffer[256]; va_start(args, format); - size_t num_bytes_logged = vsnprintf(log_buffer, sizeof(log_buffer), format, args); + size_t num_bytes_logged = TVMPlatformFormatMessage( + log_buffer, sizeof(log_buffer), format, args); va_end(args); // Most header-based logging frameworks tend to insert '\n' at the end of the log message. diff --git a/tests/micro/qemu/zephyr-runtime/src/main.c b/tests/micro/qemu/zephyr-runtime/src/main.c index 19e72e1c076d..91b13de7d04d 100644 --- a/tests/micro/qemu/zephyr-runtime/src/main.c +++ b/tests/micro/qemu/zephyr-runtime/src/main.c @@ -57,6 +57,11 @@ ssize_t write_serial(void* unused_context, const uint8_t* data, size_t size) { return size; } +size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, + const char* fmt, va_list args) { + return vsnprintk(out_buf, out_buf_size_bytes, fmt, args); +} + void TVMPlatformAbort(tvm_crt_error_t error) { sys_reboot(SYS_REBOOT_COLD); for (;;) From b9db1471303fc33cc02d69ad12a097b7130a2071 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 9 Nov 2020 09:14:29 -0800 Subject: [PATCH 03/60] test pass, make runtime part work (wip) --- include/tvm/runtime/module.h | 4 + python/tvm/relay/param_dict.py | 17 ++ src/relay/backend/build_module.cc | 29 +++ src/runtime/graph/graph_runtime.cc | 43 +++- src/runtime/graph/graph_runtime.h | 7 +- src/target/llvm/codegen_llvm.cc | 102 ++++++++++ src/target/llvm/codegen_params.cc | 184 ++++++++++++++++++ src/target/llvm/llvm_module.cc | 7 +- .../unittest/test_target_codegen_llvm.py | 3 + 9 files changed, 390 insertions(+), 6 deletions(-) create mode 100644 src/target/llvm/codegen_params.cc diff --git a/include/tvm/runtime/module.h b/include/tvm/runtime/module.h index 0e7cd2b08784..0e9266b17c74 100644 --- a/include/tvm/runtime/module.h +++ b/include/tvm/runtime/module.h @@ -226,6 +226,10 @@ constexpr const char* tvm_global_barrier_state = "__tvm_global_barrier_state"; constexpr const char* tvm_prepare_global_barrier = "__tvm_prepare_global_barrier"; /*! \brief Placeholder for the module's entry function. */ constexpr const char* tvm_module_main = "__tvm_main__"; +/*! \brief Prefix for parameter symbols emitted into the main program. */ +constexpr const char* tvm_param_prefix = "__tvm_param__"; +/*! \brief A PackedFunc that looks up linked parameters by storage_id. */ +constexpr const char* tvm_lookup_linked_param = "__lookup_linked_param"; } // namespace symbol // implementations of inline functions. diff --git a/python/tvm/relay/param_dict.py b/python/tvm/relay/param_dict.py index 2d0398e20486..463eae51d7b8 100644 --- a/python/tvm/relay/param_dict.py +++ b/python/tvm/relay/param_dict.py @@ -16,6 +16,7 @@ # under the License. # pylint: disable=invalid-name """Helper utility to save parameter dicts.""" +import json import tvm import tvm._ffi @@ -76,3 +77,19 @@ def load_param_dict(param_bytes): param_bytes = bytearray(param_bytes) load_arr = _load_param_dict(param_bytes) return {v.name: v.array for v in load_arr} + + +def linkable_param_dict(graph_json, params, target): + graph = json.loads(graph_json) + data_by_sid = [None] * len(params) + for param_name, param in params.items(): + for node in graph['nodes']: + if node['name'] == param_name: + sid = node['storage_id'] + data_by_sid[sid] = param + + # GraphRuntimeCodegen is expected to allocated the first len(params) storage_ids to contain + # parameters. + assert all(d is not None for d in data_by_sid) + + data_ diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc index ddea5456585b..152bbde2ee46 100644 --- a/src/relay/backend/build_module.cc +++ b/src/relay/backend/build_module.cc @@ -443,6 +443,35 @@ class RelayBuildModule : public runtime::ModuleNode { auto lowered_funcs = graph_codegen_->GetIRModule(); + Target target_host = GetTargetHost(); + // If no target_host has been set, we choose a default one, which is + // llvm if "codegen.LLVMModuleCreate" is accessible. + const runtime::PackedFunc* pf = runtime::Registry::Get("codegen.LLVMModuleCreate"); + if (!target_host.defined()) + target_host = (pf != nullptr) ? Target("llvm") : Target("stackvm"); + + if (target_host->GetAttr("link-params").value_or(Bool(false))) { + CHECK(pf != nullptr) << "Unable to link-params with no target_host and no llvm codegen."; + auto param_ids = graph_codegen_->GetParamIds(); + auto link_params = Map(); + for (auto param : ret_.params) { + link_params.Set( + param.first, tir::LinkedParam(param_ids[param.first], param.second)); + } + + Map dict; + dict.Set(tvm::tir::attr::kLinkedParams, link_params); + dict.Set(tvm::attr::kGlobalSymbol, String(::tvm::target::packed_func::kLookupLinkedParam)); + DictAttrs attrs{dict}; + auto prim = tir::PrimFunc( + Array(), tir::SeqStmt(Array()), VoidType(), Map(), attrs); + if (lowered_funcs.find(target_host->str()) == lowered_funcs.end()) { + lowered_funcs.Set(target_host->str(), IRModule(Map({}))); + } + lowered_funcs[target_host->str()]->Add( + GlobalVar(::tvm::target::packed_func::kLookupLinkedParam), prim); + } + // When there is no lowered_funcs due to reasons such as optimization. if (lowered_funcs.size() == 0) { Target target_host = GetTargetHost(); diff --git a/src/runtime/graph/graph_runtime.cc b/src/runtime/graph/graph_runtime.cc index 45a36900b586..fdf4ee8baf8a 100644 --- a/src/runtime/graph/graph_runtime.cc +++ b/src/runtime/graph/graph_runtime.cc @@ -72,6 +72,7 @@ void GraphRuntime::Init(const std::string& graph_json, tvm::runtime::Module modu this->Load(&reader); module_ = module; ctxs_ = ctxs; + this->SetupLinkedParams(); this->SetupStorage(); this->SetupOpExecs(); for (size_t i = 0; i < input_nodes_.size(); i++) { @@ -244,7 +245,15 @@ void GraphRuntime::ShareParams(const GraphRuntime& other, dmlc::Stream* strm) { this->SetupOpExecs(); } +void GraphRuntime::PreAllocatedDeleter(void* ctx) { + delete ctx; +} + void GraphRuntime::SetupStorage() { + // Get pre-linked parameter lookup function, if it was generated. When pf == nullptr, no linked + // params are present. + tvm::runtime::PackedFunc pf = module_.GetFunction(::tvm::runtime::module::kLookupLinkedParam, true); + // Grab saved optimization plan from graph. std::vector vtype; for (const std::string& s_type : attrs_.dltype) { @@ -254,6 +263,8 @@ void GraphRuntime::SetupStorage() { // Size and device type of each storage pool entry. std::vector pool_entry; // Find the maximum space size. + int node_index = 0; + int node_output = 0; for (size_t i = 0; i < attrs_.shape.size(); ++i) { int storage_id = attrs_.storage_id[i]; // Use the fallback device if no device index is available. @@ -278,21 +289,41 @@ void GraphRuntime::SetupStorage() { ICHECK(pool_entry[sid].device_type == -1 || pool_entry[sid].device_type == device_type) << "The same pool entry cannot be assigned to multiple devices"; } + if (pf != nullptr && pool_entry[sid] == nullptr) { + try { + pool_entry[sid].pre_linked_param = pf(sid); + pool_entry[sid].param_data_entry = i; + } except (std::runtime_error& e) { + // Indicates this storage_id is not pre-linked. + } + } pool_entry[sid].size = std::max(pool_entry[sid].size, bytes); pool_entry[sid].device_type = device_type; } // Allocate the space. for (const auto& pit : pool_entry) { - std::vector shape; // This for loop is very fast since there are usually only a couple of // devices available on the same hardware. const auto& cit = std::find_if(ctxs_.begin(), ctxs_.end(), [&pit](const TVMContext& c) { return pit.device_type == static_cast(c.device_type); }); TVMContext ctx = cit == ctxs_.end() ? ctxs_[0] : *cit; - shape.push_back(static_cast(pit.size + 3) / 4); - storage_pool_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx)); + if (pit.pre_linked_param != nullptr) { + auto param_entry = data_entry_[pit.param_data_entry]; + DLTensor* param_tensor = new DLTensor{ + pit.preq_linked_param, ctx, vtype[pit.param_data_entry], + param_entry.size(), nullptr, 0}; + + storage_pool_.push_back( + NDArray::FromDLManagedTensor( + DLManagedTensor{param_tensor, param_tensor, PreAllocatedDeleter})); + + } else { + std::vector shape; + shape.push_back(static_cast(pit.size + 3) / 4); + storage_pool_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx)); + } } // Assign the pooled entries. A unified memory pool is used to simplifiy @@ -303,7 +334,11 @@ void GraphRuntime::SetupStorage() { for (size_t i = 0; i < data_entry_.size(); ++i) { int storage_id = attrs_.storage_id[i]; ICHECK_LT(static_cast(storage_id), storage_pool_.size()); - data_entry_[i] = storage_pool_[storage_id].CreateView(attrs_.shape[i], vtype[i]); + auto pool_entry = storage_pool_[storage_id].CreateView(attrs_.shape[i], vtype[i]); + if (pool_entry.get() != nullptr) { + data_entry_[i] = pool_entry.get(); + } + const DLTensor* tmp = data_entry_[i].operator->(); data_alignment_[i] = details::GetDataAlignment(*tmp); } diff --git a/src/runtime/graph/graph_runtime.h b/src/runtime/graph/graph_runtime.h index 810ff43fe97a..d687ab4b3615 100644 --- a/src/runtime/graph/graph_runtime.h +++ b/src/runtime/graph/graph_runtime.h @@ -182,7 +182,10 @@ class TVM_DLL GraphRuntime : public ModuleNode { struct PoolEntry { size_t size; int device_type; - PoolEntry(int s, int dev_type) : size(s), device_type(dev_type) {} + void* pre_linked_param; + int param_data_entry; + PoolEntry(int s, int dev_type, std::unique_ptr pre_linked_param) : + size(s), device_type(dev_type), pre_linked_param(std::move(pre_linked_param)) {} }; // Node entry struct NodeEntry { @@ -363,6 +366,8 @@ class TVM_DLL GraphRuntime : public ModuleNode { } ICHECK_EQ(bitmask, 1 | 2 | 4 | 8 | 16) << "invalid format"; } + /*! \brief Setup pre-linked parameters. */ + void SetupLinkedParams(); /*! \brief Setup the temporal storage */ void SetupStorage(); /*! \brief Setup the executors. */ diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc index faa483d019c0..2604f5c50ddd 100644 --- a/src/target/llvm/codegen_llvm.cc +++ b/src/target/llvm/codegen_llvm.cc @@ -184,6 +184,108 @@ void CodeGenLLVM::AddFunctionInternal(const PrimFunc& f, bool ret_void) { } } +void CodeGenLLVM::LinkParameters(const Map params) { + // It would be nice to de-dupe these declarations frm src/tir/transforms/make_packed_api.cc, + // but they are at a different layer in the compiler... + std::vector param_types; + // args + param_types.push_back(t_void_->getPointerTo(GetGlobalAddressSpace())); + // tcodes + param_types.push_back(t_int_->getPointerTo(GetGlobalAddressSpace())); + // num_args + param_types.push_back(t_int64_); + // ret_args + param_types.push_back(t_void_->getPointerTo(GetGlobalAddressSpace())); + // ret_tcodes + param_types.push_back(t_int_->getPointerTo(GetGlobalAddressSpace())); + // resource_handle + param_types.push_back(t_void_->getPointerTo(GetGlobalAddressSpace())); + + // TODO(tvm-team): + // Update the function type to respect the ret_type field of f. + // Once we allow more flexibility in the PrimFunc. + llvm::FunctionType* ftype = llvm::FunctionType::get(t_int_, param_types, false); + + llvm::Function* function = llvm::Function::Create( + ftype, llvm::Function::ExternalLinkage, + ::tvm::target::packed_func::kLookupLinkedParam, module_.get()); + function->setCallingConv(llvm::CallingConv::C); + function->setDLLStorageClass(llvm::GlobalValue::DLLStorageClassTypes::DLLExportStorageClass); + + llvm::BasicBlock* entry = llvm::BasicBlock::Create(*ctx_, "entry", function); + builder_->SetInsertPoint(entry); + std::vector zero_index_list{{llvm::ConstantInt::get(t_int32_, 0)}}; + auto args_array = builder_->CreateBitCast( + &function->arg_begin()[0], llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)); + llvm::Value* sid = + builder_->CreateBitCast( + builder_->CreateLoad(t_void_->getPointerTo(GetGlobalAddressSpace()), + builder_->CreateInBoundsGEP(args_array, zero_index_list)), t_int64_); + // +// builder_->CreateGEP(&function->arg_begin()[0], zero_index_list), t_int64_); + + llvm::BasicBlock* default_block = llvm::BasicBlock::Create(*ctx_, "default_block", function); + llvm::SwitchInst* switch_inst = builder_->CreateSwitch(sid, default_block, params.size() + 1); + + builder_->SetInsertPoint(default_block); + builder_->CreateRet(ConstInt32(kTvmErrorGeneratedInvalidStorageId)); + + llvm::raw_os_ostream os{std::cout}; + + for (auto kv : params) { + auto array = NDArrayToLLVMArray(ctx_, kv.second->param); + std::cout << "param " << kv.first << ": "; + array->print(os); + std::string symbol_name = std::string{::tvm::runtime::symbol::tvm_param_prefix} + kv.first; + llvm::GlobalVariable* param_symbol = new llvm::GlobalVariable( + *module_, array->getType(), true, llvm::GlobalValue::InternalLinkage, + array, symbol_name); + + llvm::BasicBlock* case_block = llvm::BasicBlock::Create(*ctx_, "case_" + symbol_name, function); + switch_inst->addCase( + llvm::cast(llvm::ConstantInt::get(t_int64_, kv.second->id)), + case_block); + builder_->SetInsertPoint(case_block); + auto retval_array = builder_->CreateBitCast( + &function->arg_begin()[3], llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)); + builder_->CreateStore( +// param_symbol, + builder_->CreatePointerCast(param_symbol, t_void_->getPointerTo(GetGlobalAddressSpace())), + builder_->CreateGEP(retval_array, zero_index_list)); + auto ret_types_array = builder_->CreateBitCast( + &function->arg_begin()[4], llvm::ArrayType::get(t_int_, 1)); + builder_->CreateStore( + llvm::ConstantInt::get(t_int_, kTVMOpaqueHandle), + builder_->CreateGEP(ret_types_array, zero_index_list)); + builder_->CreateRet(ConstInt32(0)); + } + + std::cout << "generated function: " << std::endl; + function->print(os); + + // llvm::Value* sid_start = module_->getGlobalVariable(module::tvm_param_array_sid_start); + // llvm::Value* cond = builder_->CreateAnd( + // builder_->CreateICmpSGE(sid, sid_start), + // builder_->CreateICmpSLT(sid, + // module_->getGlobalVariable(module::tvm_param_array_sid_end))); + + // BasicBlock* then_block = BasicBlock::Create(*ctx_, "if_then", function_); + // builder_->CreateCondBr(cond, then_block, else_block); + + // // SID valid block (fetch sid data pointer and write to ret_values). + // builder_->SetInsertPoint(then_block); + // std::vector sid_index_list{builder_->CreateISub(sid, sid_start)}; + // builder_->CreateStore( + // builder_->CreateGEP(module_->getGlobalVariable(module::tvm_param_array), sid_index_list), + // builder_->CreateBitCast( + // builder_->CreateGEP(function->getArg(3), zero_index_list), t_int64_ty_)); + // NOTE: set ret_tcode[0] to kTVMOpaqueHandle because the 'data' pointer of a DLTensor is returned + // here, *not* a proper DLTensor. It is up to the caller to create a DLTensor that correctly + // describes the returned data pointer. + + // SID invalid block (return invalid SID error). +} + std::unique_ptr CodeGenLLVM::Finish() { this->AddStartupFunction(); for (size_t i = 0; i < link_modules_.size(); ++i) { diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc new file mode 100644 index 000000000000..7c160cf198e7 --- /dev/null +++ b/src/target/llvm/codegen_params.cc @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file codegen_blob.cc + */ +#ifdef TVM_LLVM_VERSION + +#include "codegen_params.h" + +namespace tvm { +namespace codegen { + +class DLManagedTensorDeleter { + public: + void operator()(DLManagedTensor* ptr) { + ptr->deleter(ptr); + } +}; + +llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime::NDArray arr) { + llvm::Type* element_type = nullptr; + + auto arr_type = arr.DataType(); + CHECK_EQ(arr_type.lanes(), 1) + << "CodegenParams: only support generating 1-lane parameters; saw " << arr_type.lanes(); + + auto shape = arr.Shape(); + int num_elements = 1; + for (auto shape_elem : shape) { + num_elements *= shape_elem; + } + + std::unique_ptr tensor(arr.ToDLPack()); + std::vector elements; + + switch (arr_type.code()) { + case runtime::DataType::kInt: + CHECK(arr_type.bits() == 8 || + arr_type.bits() == 16 || + arr_type.bits() == 32 || + arr_type.bits() == 64) + << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " + << arr_type.bits() << "-bit array"; + element_type = llvm::Type::getIntNTy(*ctx, arr_type.bits()); + + if (arr_type.bits() == 8) { + int8_t* data_buf = static_cast(tensor->dl_tensor.data); + for (int i = 0; i < num_elements; i++) { + std::cout << std::hex << +static_cast(data_buf[i]) << std::dec << " "; + if (((i + 1) % 16) == 0) { + std::cout << std::endl; + } + + elements.emplace_back(llvm::ConstantInt::getSigned(element_type, data_buf[i])); + } + } else if (arr_type.bits() == 16) { + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantInt::getSigned(element_type, ((int16_t*) tensor->dl_tensor.data)[i])); + } + } else if (arr_type.bits() == 32) { + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantInt::getSigned(element_type, ((int32_t*) tensor->dl_tensor.data)[i])); + } + } else if (arr_type.bits() == 64) { + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantInt::getSigned(element_type, ((int64_t*) tensor->dl_tensor.data)[i])); + } + } else { + CHECK(false) << "should not get here"; + } + break; + + case runtime::DataType::TypeCode::kUInt: + CHECK(arr_type.bits() == 8 || + arr_type.bits() == 16 || + arr_type.bits() == 32 || + arr_type.bits() == 64) + << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " + << arr_type.bits() << "-bit array"; + element_type = llvm::Type::getIntNTy(*ctx, arr_type.bits()); + + if (arr_type.bits() == 8) { + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantInt::get(element_type, ((int8_t*) tensor->dl_tensor.data)[i])); + } + } else if (arr_type.bits() == 16) { + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantInt::get(element_type, ((int16_t*) tensor->dl_tensor.data)[i])); + } + } else if (arr_type.bits() == 32) { + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantInt::get(element_type, ((int32_t*) tensor->dl_tensor.data)[i])); + } + } else if (arr_type.bits() == 64) { + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantInt::get(element_type, ((int64_t*) tensor->dl_tensor.data)[i])); + } + } else { + CHECK(false) << "should not get here"; + } + break; + + case runtime::DataType::TypeCode::kFloat: + if (arr_type.bits() == 32) { + element_type = llvm::Type::getFloatTy(*ctx); + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantFP::get(element_type, ((float*) tensor->dl_tensor.data)[i])); + } + } else if (arr_type.bits() == 64) { + element_type = llvm::Type::getDoubleTy(*ctx); + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantFP::get(element_type, ((double*) tensor->dl_tensor.data)[i])); + } + } else { + CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " + << arr_type.bits() << "-bit array"; + } + break; + + default: + CHECK(false) << "Data type not supported"; + } + + return llvm::cast( + llvm::ConstantArray::get(llvm::ArrayType::get(element_type, num_elements), + llvm::ArrayRef(elements))); +} + +// void LLVMCodeGenParams(llvm::LLVMContext* ctx, +// llvm::Module* module, +// int64_t storage_id_offset, +// const Map& params, +// const std::unordered_map& param_ids) { +// CHECK_EQ(params.size(), params_ids.size()) +// << "Expect param_names and params_ids to have equal lengths, but params.size() == " +// << params.size() << " and params_ids.size() == " << params_ids.size(); + +// llvm::ArrayType* t_sid_ptr_ty = +// llvm::ArrayType::get(llvm::PointerType::getUnqual(llvm::getVoidTy())); +// std::vector sid_ptrs; +// for (auto kv : params) { + +// sid_ptrs.push_back( +// } + +// llvm::GlobalVaraible* sid_offset_symbol = new llvm::GlobalVariable( +// *module, llvm::Type::getInt64Ty(), true, llvm::GlobalVariable::InternalLinkage, +// llvm::ConstantInt::getSigned(ctx, sid_offset), ::tvm::runtime::symbol::tvm_sid_offset); +// llvm::GlobalVariable* sid_ptrs_symbol = new llvm::GlobalVariable( +// *module, t_sid_ptr_ty, true, llvm::GlobalValue::InternalLinkage, +// llvm::ConstantArray::get(t_sid_ptr_ty, sid_ptrs), ::tvm::runtime::symbol::tvm_param_array); +// } + +} // namespace codegen +} // namespace tvm + +#endif // TVM_LLVM_VERSION diff --git a/src/target/llvm/llvm_module.cc b/src/target/llvm/llvm_module.cc index 569082022852..98857f574bd9 100644 --- a/src/target/llvm/llvm_module.cc +++ b/src/target/llvm/llvm_module.cc @@ -209,7 +209,8 @@ class LLVMModuleNode final : public runtime::ModuleNode { } funcs.push_back(f); } - ICHECK_NE(funcs.size(), 0U); + bool is_link_params = target->GetAttr("link-params").value_or(Bool(false)); + ICHECK(funcs.size() > 0 || is_link_params); // TODO(tqchen): remove the entry function behavior as it does not // makes sense when we start to use multiple modules. cg->Init("TVMMod", tm_.get(), ctx_.get(), system_lib, system_lib, target_c_runtime); @@ -222,6 +223,10 @@ class LLVMModuleNode final : public runtime::ModuleNode { cg->AddMainFunction(entry_func); } + if (is_link_params) { + CHECK(found_linked_params) << "--link-params given, but no parameters given to codegen"; + cg->LinkParameters(linked_params); + } module_ = cg->Finish(); module_->addModuleFlag(llvm::Module::Warning, "tvm_target", llvm::MDString::get(*ctx_, LLVMTargetToString(target))); diff --git a/tests/python/unittest/test_target_codegen_llvm.py b/tests/python/unittest/test_target_codegen_llvm.py index 3599493a74cb..ea2a1f165b30 100644 --- a/tests/python/unittest/test_target_codegen_llvm.py +++ b/tests/python/unittest/test_target_codegen_llvm.py @@ -14,6 +14,9 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import collections +import ctypes +import json import tvm import tvm.testing from tvm import te From 8d62592dabbcedead74a4569c9db65f8142a15bf Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 9 Nov 2020 18:00:16 -0800 Subject: [PATCH 04/60] llvm and c backends work! --- include/tvm/runtime/module.h | 2 +- src/relay/backend/build_module.cc | 4 +- src/runtime/graph/graph_runtime.cc | 42 ++-- src/runtime/graph/graph_runtime.h | 8 +- src/target/llvm/codegen_llvm.cc | 4 +- src/target/llvm/codegen_params.cc | 224 +++++++++++++++--- src/target/llvm/codegen_params.h | 48 ++++ src/target/llvm/llvm_module.cc | 16 +- src/target/source/codegen_c_host.cc | 62 +++++ src/target/source/codegen_c_host.h | 3 + .../unittest/test_target_codegen_llvm.py | 2 +- 11 files changed, 350 insertions(+), 65 deletions(-) create mode 100644 src/target/llvm/codegen_params.h diff --git a/include/tvm/runtime/module.h b/include/tvm/runtime/module.h index 0e9266b17c74..04a5cf8bf25d 100644 --- a/include/tvm/runtime/module.h +++ b/include/tvm/runtime/module.h @@ -229,7 +229,7 @@ constexpr const char* tvm_module_main = "__tvm_main__"; /*! \brief Prefix for parameter symbols emitted into the main program. */ constexpr const char* tvm_param_prefix = "__tvm_param__"; /*! \brief A PackedFunc that looks up linked parameters by storage_id. */ -constexpr const char* tvm_lookup_linked_param = "__lookup_linked_param"; +constexpr const char* tvm_lookup_linked_param = "_lookup_linked_param"; } // namespace symbol // implementations of inline functions. diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc index 152bbde2ee46..cc304808b16f 100644 --- a/src/relay/backend/build_module.cc +++ b/src/relay/backend/build_module.cc @@ -461,7 +461,7 @@ class RelayBuildModule : public runtime::ModuleNode { Map dict; dict.Set(tvm::tir::attr::kLinkedParams, link_params); - dict.Set(tvm::attr::kGlobalSymbol, String(::tvm::target::packed_func::kLookupLinkedParam)); + dict.Set(tvm::attr::kGlobalSymbol, String(::tvm::runtime::symbol::tvm_lookup_linked_param)); DictAttrs attrs{dict}; auto prim = tir::PrimFunc( Array(), tir::SeqStmt(Array()), VoidType(), Map(), attrs); @@ -469,7 +469,7 @@ class RelayBuildModule : public runtime::ModuleNode { lowered_funcs.Set(target_host->str(), IRModule(Map({}))); } lowered_funcs[target_host->str()]->Add( - GlobalVar(::tvm::target::packed_func::kLookupLinkedParam), prim); + GlobalVar(::tvm::runtime::symbol::tvm_lookup_linked_param), prim); } // When there is no lowered_funcs due to reasons such as optimization. diff --git a/src/runtime/graph/graph_runtime.cc b/src/runtime/graph/graph_runtime.cc index fdf4ee8baf8a..c64f773f5157 100644 --- a/src/runtime/graph/graph_runtime.cc +++ b/src/runtime/graph/graph_runtime.cc @@ -72,7 +72,6 @@ void GraphRuntime::Init(const std::string& graph_json, tvm::runtime::Module modu this->Load(&reader); module_ = module; ctxs_ = ctxs; - this->SetupLinkedParams(); this->SetupStorage(); this->SetupOpExecs(); for (size_t i = 0; i < input_nodes_.size(); i++) { @@ -245,14 +244,16 @@ void GraphRuntime::ShareParams(const GraphRuntime& other, dmlc::Stream* strm) { this->SetupOpExecs(); } -void GraphRuntime::PreAllocatedDeleter(void* ctx) { - delete ctx; +void GraphRuntime::PreAllocatedDLTensorDeleter(DLManagedTensor* tensor) { + // ctx is the DLTensor which needs to get deleted. The data member points to global const memory. + delete reinterpret_cast(tensor); } void GraphRuntime::SetupStorage() { // Get pre-linked parameter lookup function, if it was generated. When pf == nullptr, no linked // params are present. - tvm::runtime::PackedFunc pf = module_.GetFunction(::tvm::runtime::module::kLookupLinkedParam, true); + tvm::runtime::PackedFunc pf = module_.GetFunction( + ::tvm::runtime::symbol::tvm_lookup_linked_param, true); // Grab saved optimization plan from graph. std::vector vtype; @@ -263,8 +264,6 @@ void GraphRuntime::SetupStorage() { // Size and device type of each storage pool entry. std::vector pool_entry; // Find the maximum space size. - int node_index = 0; - int node_output = 0; for (size_t i = 0; i < attrs_.shape.size(); ++i) { int storage_id = attrs_.storage_id[i]; // Use the fallback device if no device index is available. @@ -289,14 +288,14 @@ void GraphRuntime::SetupStorage() { ICHECK(pool_entry[sid].device_type == -1 || pool_entry[sid].device_type == device_type) << "The same pool entry cannot be assigned to multiple devices"; } - if (pf != nullptr && pool_entry[sid] == nullptr) { + if (pf != nullptr && pool_entry[sid].pre_linked_param == nullptr) { try { pool_entry[sid].pre_linked_param = pf(sid); - pool_entry[sid].param_data_entry = i; - } except (std::runtime_error& e) { + } catch (std::runtime_error& e) { // Indicates this storage_id is not pre-linked. } } + pool_entry[sid].param_data_entry = i; pool_entry[sid].size = std::max(pool_entry[sid].size, bytes); pool_entry[sid].device_type = device_type; } @@ -310,16 +309,20 @@ void GraphRuntime::SetupStorage() { }); TVMContext ctx = cit == ctxs_.end() ? ctxs_[0] : *cit; if (pit.pre_linked_param != nullptr) { - auto param_entry = data_entry_[pit.param_data_entry]; - DLTensor* param_tensor = new DLTensor{ - pit.preq_linked_param, ctx, vtype[pit.param_data_entry], - param_entry.size(), nullptr, 0}; - - storage_pool_.push_back( - NDArray::FromDLManagedTensor( - DLManagedTensor{param_tensor, param_tensor, PreAllocatedDeleter})); + LOG(INFO) << "param " << pit.param_data_entry << " pre-loaded!"; + auto param_shape = &attrs_.shape[pit.param_data_entry]; + DLManagedTensor* param_tensor = new DLManagedTensor{ + {pit.pre_linked_param, ctx, static_cast(param_shape->size()), + vtype[pit.param_data_entry], param_shape->data(), nullptr, 0}, + nullptr, + PreAllocatedDLTensorDeleter}; + + storage_pool_.push_back(NDArray::FromDLPack(param_tensor)); + LOG(INFO) << "Loaded data entry " << pit.param_data_entry + << " from pre-linked blob: " << param_tensor->dl_tensor.data; } else { + LOG(INFO) << "param " << pit.param_data_entry << " blank!"; std::vector shape; shape.push_back(static_cast(pit.size + 3) / 4); storage_pool_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx)); @@ -334,10 +337,7 @@ void GraphRuntime::SetupStorage() { for (size_t i = 0; i < data_entry_.size(); ++i) { int storage_id = attrs_.storage_id[i]; ICHECK_LT(static_cast(storage_id), storage_pool_.size()); - auto pool_entry = storage_pool_[storage_id].CreateView(attrs_.shape[i], vtype[i]); - if (pool_entry.get() != nullptr) { - data_entry_[i] = pool_entry.get(); - } + data_entry_[i] = storage_pool_[storage_id].CreateView(attrs_.shape[i], vtype[i]); const DLTensor* tmp = data_entry_[i].operator->(); data_alignment_[i] = details::GetDataAlignment(*tmp); diff --git a/src/runtime/graph/graph_runtime.h b/src/runtime/graph/graph_runtime.h index d687ab4b3615..9f0b0962333a 100644 --- a/src/runtime/graph/graph_runtime.h +++ b/src/runtime/graph/graph_runtime.h @@ -184,8 +184,8 @@ class TVM_DLL GraphRuntime : public ModuleNode { int device_type; void* pre_linked_param; int param_data_entry; - PoolEntry(int s, int dev_type, std::unique_ptr pre_linked_param) : - size(s), device_type(dev_type), pre_linked_param(std::move(pre_linked_param)) {} +// PoolEntry(int s, int dev_type, void* pre_linked_param) : +// size(s), device_type(dev_type), pre_linked_param(std::move(pre_linked_param)) {} }; // Node entry struct NodeEntry { @@ -366,8 +366,8 @@ class TVM_DLL GraphRuntime : public ModuleNode { } ICHECK_EQ(bitmask, 1 | 2 | 4 | 8 | 16) << "invalid format"; } - /*! \brief Setup pre-linked parameters. */ - void SetupLinkedParams(); + /*! \brief Delete pre-allocated DLTensor. */ + static void PreAllocatedDLTensorDeleter(DLManagedTensor* tensor); /*! \brief Setup the temporal storage */ void SetupStorage(); /*! \brief Setup the executors. */ diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc index 2604f5c50ddd..20cbdf83b971 100644 --- a/src/target/llvm/codegen_llvm.cc +++ b/src/target/llvm/codegen_llvm.cc @@ -208,13 +208,13 @@ void CodeGenLLVM::LinkParameters(const Map params) { llvm::Function* function = llvm::Function::Create( ftype, llvm::Function::ExternalLinkage, - ::tvm::target::packed_func::kLookupLinkedParam, module_.get()); + ::tvm::runtime::symbol::tvm_lookup_linked_param, module_.get()); function->setCallingConv(llvm::CallingConv::C); function->setDLLStorageClass(llvm::GlobalValue::DLLStorageClassTypes::DLLExportStorageClass); llvm::BasicBlock* entry = llvm::BasicBlock::Create(*ctx_, "entry", function); builder_->SetInsertPoint(entry); - std::vector zero_index_list{{llvm::ConstantInt::get(t_int32_, 0)}}; + std::vector zero_index_list{llvm::ConstantInt::get(t_int32_, 0)}; auto args_array = builder_->CreateBitCast( &function->arg_begin()[0], llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)); llvm::Value* sid = diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index 7c160cf198e7..365ab04505c9 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -24,6 +24,8 @@ #include "codegen_params.h" +#include + namespace tvm { namespace codegen { @@ -63,11 +65,6 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: if (arr_type.bits() == 8) { int8_t* data_buf = static_cast(tensor->dl_tensor.data); for (int i = 0; i < num_elements; i++) { - std::cout << std::hex << +static_cast(data_buf[i]) << std::dec << " "; - if (((i + 1) % 16) == 0) { - std::cout << std::endl; - } - elements.emplace_back(llvm::ConstantInt::getSigned(element_type, data_buf[i])); } } else if (arr_type.bits() == 16) { @@ -152,31 +149,198 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: llvm::ArrayRef(elements))); } -// void LLVMCodeGenParams(llvm::LLVMContext* ctx, -// llvm::Module* module, -// int64_t storage_id_offset, -// const Map& params, -// const std::unordered_map& param_ids) { -// CHECK_EQ(params.size(), params_ids.size()) -// << "Expect param_names and params_ids to have equal lengths, but params.size() == " -// << params.size() << " and params_ids.size() == " << params_ids.size(); - -// llvm::ArrayType* t_sid_ptr_ty = -// llvm::ArrayType::get(llvm::PointerType::getUnqual(llvm::getVoidTy())); -// std::vector sid_ptrs; -// for (auto kv : params) { - -// sid_ptrs.push_back( -// } - -// llvm::GlobalVaraible* sid_offset_symbol = new llvm::GlobalVariable( -// *module, llvm::Type::getInt64Ty(), true, llvm::GlobalVariable::InternalLinkage, -// llvm::ConstantInt::getSigned(ctx, sid_offset), ::tvm::runtime::symbol::tvm_sid_offset); -// llvm::GlobalVariable* sid_ptrs_symbol = new llvm::GlobalVariable( -// *module, t_sid_ptr_ty, true, llvm::GlobalValue::InternalLinkage, -// llvm::ConstantArray::get(t_sid_ptr_ty, sid_ptrs), ::tvm::runtime::symbol::tvm_param_array); -// } + +static constexpr const char* kFloatCast = "(float)"; +static constexpr const char* kDoubleCast = "(double)"; + +static constexpr const int kMaxLineLength = 80; + + +void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& os) { + auto arr_type = arr.DataType(); + CHECK_EQ(arr_type.lanes(), 1) + << "CodegenParams: only support generating 1-lane parameters; saw " << arr_type.lanes(); + + int one_element_size_bytes = (arr_type.bits() / 4) + (2 /* "0x" */) + (2 /* ", " */); + if (arr_type.code() == runtime::DataType::TypeCode::kInt) { + one_element_size_bytes += 1; // sign bit + if (arr_type.bits() > 32) { + one_element_size_bytes += 2; // "UL" + } + } else if (arr_type.code() == runtime::DataType::TypeCode::kUInt) { + if (arr_type.bits() > 32) { + one_element_size_bytes += 1; // "L" + } + } else if (arr_type.code() == runtime::DataType::TypeCode::kFloat) { + // Floats and doubles are printed as hex but casted. + one_element_size_bytes += std::string{(arr_type.bits() == 32 ? kFloatCast : kDoubleCast)}.size(); + } + + int elements_per_row = 16; + while (elements_per_row > 1 && + (elements_per_row * one_element_size_bytes) > (kMaxLineLength - indent_chars)) { + elements_per_row /= 2; + } + + std::string indent_str(indent_chars, ' '); + os << indent_str; + + auto shape = arr.Shape(); + int num_elements = 1; + for (auto shape_elem : shape) { + num_elements *= shape_elem; + } + + std::unique_ptr tensor(arr.ToDLPack()); + auto old_fmtflags = os.flags(); + os.setf(std::ios::right | std::ios::hex, std::ios::adjustfield | std::ios::basefield); + os.fill('0'); + switch (arr_type.code()) { + case runtime::DataType::kInt: + CHECK(arr_type.bits() == 8 || + arr_type.bits() == 16 || + arr_type.bits() == 32 || + arr_type.bits() == 64) + << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " + << arr_type.bits() << "-bit array"; + + if (arr_type.bits() == 8) { + for (int i = 0; i < num_elements; i++) { + // NOTE: for special types int8_t and uint8_t, need to promote to int type to avoid printing + // as a char. + int8_t elem = static_cast(tensor->dl_tensor.data)[i]; + uint8_t to_print; + if (elem < 0) { + os << "-"; + to_print = -elem; + } else { + os << "+"; + to_print = elem; + } + os << "0x" << std::setw(2) << +static_cast(to_print); + if (i < num_elements - 1) { os << ", "; } + if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } + } + } else if (arr_type.bits() == 16) { + for (int i = 0; i < num_elements; i++) { + int16_t elem = static_cast(tensor->dl_tensor.data)[i]; + uint16_t to_print; + if (elem < 0) { + os << "-"; + to_print = -elem; + } else { + os << "+"; + to_print = elem; + } + os << "0x" << std::setw(4) << to_print; + if (i < num_elements - 1) { os << ", "; } + if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } + } + } else if (arr_type.bits() == 32) { + for (int i = 0; i < num_elements; i++) { + int32_t elem = static_cast(tensor->dl_tensor.data)[i]; + uint32_t to_print ; + if (elem < 0) { + os << "-"; + to_print = -elem; + } else { + os << "+"; + to_print = elem; + } + os << "0x" << std::setw(8) << to_print; + if (i < num_elements - 1) { os << ", "; } + if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } + } + } else if (arr_type.bits() == 64) { + for (int i = 0; i < num_elements; i++) { + int64_t elem = static_cast(tensor->dl_tensor.data)[i]; + uint64_t to_print; + if (elem < 0) { + os << "-"; + to_print = -elem; + } else { + os << "+"; + to_print = elem; + } + os << "0x" << std::setw(16) << to_print; + if (i < num_elements - 1) { os << ", "; } + if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } + } + } else { + CHECK(false) << "should not get here"; + } + break; + + case runtime::DataType::TypeCode::kUInt: + CHECK(arr_type.bits() == 8 || + arr_type.bits() == 16 || + arr_type.bits() == 32 || + arr_type.bits() == 64) + << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " + << arr_type.bits() << "-bit array"; + + if (arr_type.bits() == 8) { + for (int i = 0; i < num_elements; i++) { + // NOTE: for special types int8_t and uint8_t, need to promote to int type to avoid printing + // as a char. + os << "0x" << std::setw(2) + << +static_cast(static_cast(tensor->dl_tensor.data)[i]); + if (i < num_elements - 1) { os << ", "; } + if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } + } + } else if (arr_type.bits() == 16) { + for (int i = 0; i < num_elements; i++) { + os << "0x" << std::setw(4) << static_cast(tensor->dl_tensor.data)[i]; + if (i < num_elements - 1) { os << ", "; } + if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } + } + } else if (arr_type.bits() == 32) { + for (int i = 0; i < num_elements; i++) { + os << "0x" << std::setw(8) << static_cast(tensor->dl_tensor.data)[i]; + if (i < num_elements - 1) { os << ", "; } + if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } + } + } else if (arr_type.bits() == 64) { + for (int i = 0; i < num_elements; i++) { + os << "0x" << std::setw(16) << static_cast(tensor->dl_tensor.data)[i] << "UL"; + if (i < num_elements - 1) { os << ", "; } + if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } + } + } else { + CHECK(false) << "should not get here"; + } + break; + + case runtime::DataType::TypeCode::kFloat: + if (arr_type.bits() == 32) { + for (int i = 0; i < num_elements; i++) { + os << kFloatCast << "0x" << std::setw(8) + << static_cast(tensor->dl_tensor.data)[i] << "U"; + if (i < num_elements - 1) { os << ", "; } + if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } + } + } else if (arr_type.bits() == 64) { + for (int i = 0; i < num_elements; i++) { + os << kDoubleCast << "0x" << std::setw(16) + << static_cast(tensor->dl_tensor.data)[i] << "UL"; + if (i < num_elements - 1) { os << ", "; } + if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } + } + } else { + CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " + << arr_type.bits() << "-bit array"; + } + break; + + default: + CHECK(false) << "Data type not supported"; + } + + if (num_elements % elements_per_row != 0) { + os << "\n"; + } + os.flags(old_fmtflags); +} } // namespace codegen } // namespace tvm diff --git a/src/target/llvm/codegen_params.h b/src/target/llvm/codegen_params.h new file mode 100644 index 000000000000..8b8ba4f23cc6 --- /dev/null +++ b/src/target/llvm/codegen_params.h @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file codegen_params.h + */ + +#ifndef TVM_TARGET_LLVM_CODEGEN_PARAMS_H_ +#define TVM_TARGET_LLVM_CODEGEN_PARAMS_H_ + +#include "llvm_common.h" +#include +#include + +namespace tvm { +namespace codegen { + +llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime::NDArray arr); + +void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& os); + +void LLVMCodeGenParams(llvm::LLVMContext* ctx, + llvm::Module* module, + int64_t storage_id_offset, + ::tvm::runtime::Array param_names, + ::tvm::runtime::Array params_by_sid); + + +} // namespace codegen +} // namespace tvm + +#endif // TVM_TARGET_LLVM_CODEGEN_PARAMS_H_ diff --git a/src/target/llvm/llvm_module.cc b/src/target/llvm/llvm_module.cc index 98857f574bd9..89774ec61618 100644 --- a/src/target/llvm/llvm_module.cc +++ b/src/target/llvm/llvm_module.cc @@ -200,6 +200,16 @@ class LLVMModuleNode final : public runtime::ModuleNode { std::vector funcs; std::string entry_func; for (auto kv : mod->functions) { + if (could_have_linked_params && + kv.first->name_hint == ::tvm::runtime::symbol::tvm_lookup_linked_param) { + Map attrs_dict = Downcast>(kv.second->attrs->dict); + CHECK(attrs_dict.find(::tvm::tir::attr::kLinkedParams) != attrs_dict.end()) + << "no " << ::tvm::tir::attr::kLinkedParams << " attribute found!"; + linked_params = Downcast>( + attrs_dict[::tvm::tir::attr::kLinkedParams]); + found_linked_params = true; + continue; + } ICHECK(kv.second->IsInstance()) << "Can only lower IR Module with PrimFuncs"; auto f = Downcast(kv.second); if (f->HasNonzeroAttr(tir::attr::kIsEntryFunc)) { @@ -209,8 +219,7 @@ class LLVMModuleNode final : public runtime::ModuleNode { } funcs.push_back(f); } - bool is_link_params = target->GetAttr("link-params").value_or(Bool(false)); - ICHECK(funcs.size() > 0 || is_link_params); + ICHECK(funcs.size() > 0 || (could_have_linked_params && found_linked_params)); // TODO(tqchen): remove the entry function behavior as it does not // makes sense when we start to use multiple modules. cg->Init("TVMMod", tm_.get(), ctx_.get(), system_lib, system_lib, target_c_runtime); @@ -223,8 +232,7 @@ class LLVMModuleNode final : public runtime::ModuleNode { cg->AddMainFunction(entry_func); } - if (is_link_params) { - CHECK(found_linked_params) << "--link-params given, but no parameters given to codegen"; + if (found_linked_params) { cg->LinkParameters(linked_params); } module_ = cg->Finish(); diff --git a/src/target/source/codegen_c_host.cc b/src/target/source/codegen_c_host.cc index 6ae11f4f9af8..3896e37d5b5d 100644 --- a/src/target/source/codegen_c_host.cc +++ b/src/target/source/codegen_c_host.cc @@ -23,6 +23,8 @@ #include "codegen_c_host.h" #include +#include +#include #include #include @@ -31,6 +33,7 @@ #include "../../support/str_escape.h" #include "../build_common.h" #include "../func_registry_generator.h" +#include "../llvm/codegen_params.h" namespace tvm { namespace codegen { @@ -57,6 +60,46 @@ void CodeGenCHost::AddFunction(const PrimFunc& f) { CodeGenC::AddFunction(f); } +void CodeGenCHost::LinkParameters(Map params) { + PrintFuncPrefix(); + stream << " " << tvm::runtime::symbol::tvm_lookup_linked_param + << "(void* args, int* arg_type_ids, int num_args, void* out_ret_value, " + << "int* out_ret_tcode, void* resource_handle) {\n"; + ICHECK_EQ(GetUniqueName(tvm::runtime::symbol::tvm_lookup_linked_param), + tvm::runtime::symbol::tvm_lookup_linked_param) + << "builtin PackedFunc name already taken: " + << tvm::runtime::symbol::tvm_lookup_linked_param; + stream << " switch (((int64_t*) args)[0]) {\n" + << " default:\n" + << " return " << kTvmErrorGeneratedInvalidStorageId << ";\n"; + + function_names_.emplace_back(tvm::runtime::symbol::tvm_lookup_linked_param); + for (auto kv : params) { + decl_stream << "#ifdef __cplusplus\n" + << "extern \"C\" {\n" + << "#endif\n" + << "static const "; + int64_t num_elements = 1; + for (int64_t dim : kv.second->param.Shape()) { + num_elements *= dim; + } + PrintType(kv.second->param.DataType(), decl_stream); + decl_stream << " " << ::tvm::runtime::symbol::tvm_param_prefix + << kv.first << "[" << num_elements << "] = {\n"; + NDArrayDataToC(kv.second->param, 4, decl_stream); + decl_stream << "};\n" + << "#ifdef __cplusplus\n" + << "} // extern \"C\"\n" + << "#endif\n"; + stream << " case " << kv.second->id << ":\n" + << " ((int64_t*)out_ret_value)[0] = (int64_t) " << ::tvm::runtime::symbol::tvm_param_prefix << kv.first << ";\n" + << " out_ret_tcode[0] = " << kTVMOpaqueHandle << ";\n" + << " return 0;\n"; + } + stream << " }\n" + << "}\n"; +} + void CodeGenCHost::PrintFuncPrefix() { // NOLINT(*) stream << "#ifdef __cplusplus\n" << "extern \"C\"\n" @@ -307,12 +350,31 @@ runtime::Module BuildCHost(IRModule mod, Target target) { CodeGenCHost cg; cg.Init(output_ssa, emit_asserts, target->str()); + Map linked_params; + bool found_linked_params = false; + bool could_have_linked_params = target->GetAttr("link-params").value_or(Bool(false)); for (auto kv : mod->functions) { + if (could_have_linked_params && + kv.first->name_hint == ::tvm::runtime::symbol::tvm_lookup_linked_param) { + Map attrs_dict = Downcast>(kv.second->attrs->dict); + CHECK(attrs_dict.find(::tvm::tir::attr::kLinkedParams) != attrs_dict.end()) + << "no " << ::tvm::tir::attr::kLinkedParams << " attribute found!"; + linked_params = Downcast>( + attrs_dict[::tvm::tir::attr::kLinkedParams]); + found_linked_params = true; + continue; + } + ICHECK(kv.second->IsInstance()) << "CodegenCHost: Can only take PrimFunc"; auto f = Downcast(kv.second); cg.AddFunction(f); } + if (could_have_linked_params) { + ICHECK(found_linked_params) << "-link-params given but none found"; + cg.LinkParameters(linked_params); + } + if (target->GetAttr("system-lib").value_or(Bool(false))) { ICHECK_EQ(target->GetAttr("runtime").value_or(""), "c") << "c target only supports generating C runtime SystemLibs"; diff --git a/src/target/source/codegen_c_host.h b/src/target/source/codegen_c_host.h index 1bf378be1422..b54b6fbfcfeb 100644 --- a/src/target/source/codegen_c_host.h +++ b/src/target/source/codegen_c_host.h @@ -42,6 +42,9 @@ class CodeGenCHost final : public CodeGenC { void AddFunction(const PrimFunc& f); + /*! \brief Add linked parameters, if they are present. */ + void LinkParameters(Map params); + void PrintType(DataType t, std::ostream& os) final; // NOLINT(*) void PrintFuncPrefix() final; // NOLINT(*) void PrintFinalReturn() final; // NOLINT(*) diff --git a/tests/python/unittest/test_target_codegen_llvm.py b/tests/python/unittest/test_target_codegen_llvm.py index ea2a1f165b30..162481bfdb6e 100644 --- a/tests/python/unittest/test_target_codegen_llvm.py +++ b/tests/python/unittest/test_target_codegen_llvm.py @@ -21,7 +21,7 @@ import tvm.testing from tvm import te from tvm import topi -from tvm.contrib import utils, clang +from tvm.contrib import utils import numpy as np import ctypes import math From e0259b0762a444891089f96ed303bb29043e81d5 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 9 Nov 2020 22:35:39 -0800 Subject: [PATCH 05/60] switch to floating point hex --- src/target/llvm/codegen_params.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index 365ab04505c9..e4ecf30d382f 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -173,7 +173,7 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& } } else if (arr_type.code() == runtime::DataType::TypeCode::kFloat) { // Floats and doubles are printed as hex but casted. - one_element_size_bytes += std::string{(arr_type.bits() == 32 ? kFloatCast : kDoubleCast)}.size(); + one_element_size_bytes += 1 /* sign */ + 1 /* decimal point */ + 1 /* exponent sign */; } int elements_per_row = 16; @@ -193,7 +193,8 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& std::unique_ptr tensor(arr.ToDLPack()); auto old_fmtflags = os.flags(); - os.setf(std::ios::right | std::ios::hex, std::ios::adjustfield | std::ios::basefield); + os.setf(std::ios::right | std::ios::hex | std::ios::fixed | std::ios::scientific, + std::ios::adjustfield | std::ios::basefield | std::ios::floatfield); os.fill('0'); switch (arr_type.code()) { case runtime::DataType::kInt: @@ -314,15 +315,14 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& case runtime::DataType::TypeCode::kFloat: if (arr_type.bits() == 32) { for (int i = 0; i < num_elements; i++) { - os << kFloatCast << "0x" << std::setw(8) - << static_cast(tensor->dl_tensor.data)[i] << "U"; + os << static_cast(tensor->dl_tensor.data)[i]; if (i < num_elements - 1) { os << ", "; } if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } } + std::cout << "\n"; } else if (arr_type.bits() == 64) { for (int i = 0; i < num_elements; i++) { - os << kDoubleCast << "0x" << std::setw(16) - << static_cast(tensor->dl_tensor.data)[i] << "UL"; + os << static_cast(tensor->dl_tensor.data)[i]; if (i < num_elements - 1) { os << ", "; } if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } } From cb7c001d61be324ee9a6656a7b46edefdc640d5b Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 9 Nov 2020 23:38:46 -0800 Subject: [PATCH 06/60] c backend works works --- src/target/llvm/codegen_params.cc | 45 ++- tests/python/unittest/test_link_params.py | 337 ++++++++++++++++++++++ 2 files changed, 374 insertions(+), 8 deletions(-) create mode 100644 tests/python/unittest/test_link_params.py diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index e4ecf30d382f..20a1efbb575e 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -173,7 +173,8 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& } } else if (arr_type.code() == runtime::DataType::TypeCode::kFloat) { // Floats and doubles are printed as hex but casted. - one_element_size_bytes += 1 /* sign */ + 1 /* decimal point */ + 1 /* exponent sign */; + one_element_size_bytes += 1 /* sign */ + 1 /* decimal point */ + + 1 /* exponent sign */ + 1 /* extra decimal digit in exponent */; } int elements_per_row = 16; @@ -193,8 +194,8 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& std::unique_ptr tensor(arr.ToDLPack()); auto old_fmtflags = os.flags(); - os.setf(std::ios::right | std::ios::hex | std::ios::fixed | std::ios::scientific, - std::ios::adjustfield | std::ios::basefield | std::ios::floatfield); + os.setf(std::ios::internal | std::ios::hex, + std::ios::adjustfield | std::ios::basefield | std::ios::showbase); os.fill('0'); switch (arr_type.code()) { case runtime::DataType::kInt: @@ -210,7 +211,7 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& // NOTE: for special types int8_t and uint8_t, need to promote to int type to avoid printing // as a char. int8_t elem = static_cast(tensor->dl_tensor.data)[i]; - uint8_t to_print; + uint16_t to_print; if (elem < 0) { os << "-"; to_print = -elem; @@ -240,7 +241,7 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& } else if (arr_type.bits() == 32) { for (int i = 0; i < num_elements; i++) { int32_t elem = static_cast(tensor->dl_tensor.data)[i]; - uint32_t to_print ; + uint32_t to_print; if (elem < 0) { os << "-"; to_print = -elem; @@ -312,17 +313,44 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& } break; - case runtime::DataType::TypeCode::kFloat: + case runtime::DataType::TypeCode::kFloat: { + std::stringstream ss; + ss.setf(std::ios::hex | std::ios::showbase | std::ios::fixed | std::ios::scientific, + std::ios::basefield | std::ios::showbase | std::ios::floatfield); + os.fill(' '); + os.setf(std::ios::left, std::ios::adjustfield); if (arr_type.bits() == 32) { for (int i = 0; i < num_elements; i++) { - os << static_cast(tensor->dl_tensor.data)[i]; + float elem = static_cast(tensor->dl_tensor.data)[i]; + if (isinf(elem)) { + // C99 standard. + os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; + } else if (isnan(elem)) { + // GNU extension, implemenatation-dependent. + os << std::setw(one_element_size_bytes) << "NAN"; + } else { + ss << elem; + os << std::setw(one_element_size_bytes) << ss.str(); + ss.str(""); + } if (i < num_elements - 1) { os << ", "; } if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } } std::cout << "\n"; } else if (arr_type.bits() == 64) { for (int i = 0; i < num_elements; i++) { - os << static_cast(tensor->dl_tensor.data)[i]; + double elem = static_cast(tensor->dl_tensor.data)[i]; + if (isinf(elem)) { + // C99 standard. + os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; + } else if (isnan(elem)) { + // GNU extension, implemenatation-dependent. + os << std::setw(one_element_size_bytes) << "NAN"; + } else { + ss << elem; + os << std::setw(one_element_size_bytes) << ss.str(); + ss.str(""); + } if (i < num_elements - 1) { os << ", "; } if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } } @@ -331,6 +359,7 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& << arr_type.bits() << "-bit array"; } break; + } default: CHECK(false) << "Data type not supported"; diff --git a/tests/python/unittest/test_link_params.py b/tests/python/unittest/test_link_params.py new file mode 100644 index 000000000000..a3ce97e383d8 --- /dev/null +++ b/tests/python/unittest/test_link_params.py @@ -0,0 +1,337 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import collections +import ctypes +import json +import os +import re +import struct +import sys + +import numpy as np +import pytest + +import tvm +import tvm.relay +import tvm.testing +from tvm.contrib import utils + + +TEST_SHAPE = (3, 4, 5) + + +# The data types that are linkable. +LINKABLE_DTYPES = ( + [f'uint{b}' for b in (8, 16, 32, 64)] + + [f'int{b}' for b in (8, 16, 32, 64)] + + ['float32', 'float64']) + + + +def dtype_info(dtype): + """Lookup numpy type info for the given string dtype (of LINKABLE_DTYPES above).""" + if 'int' in dtype: + return np.iinfo(getattr(np, dtype)) + else: + return np.finfo(getattr(np, dtype)) + + +# Note: for debugging, set this to an integer (i.e. 1.0). Then all "random" tensors will become +# predictable +RANDOM_TENSOR_START = None + + +def _make_random_tensor(dtype): + """Create a random test tensor of shape TEST_SHAPE and the given dtype.""" + global RAND_SEED + if RANDOM_TENSOR_START is not None: + to_return = np.arange(RANDOM_TENSOR_START, + RANDOM_TENSOR_START + np.prod(TEST_SHAPE), + dtype=dtype).reshape(TEST_SHAPE) + RAND_SEED += np.prod(TEST_SHAPE) + return to_return + + dinfo = dtype_info(dtype) + if 'int' in dtype: + return np.random.randint(dinfo.min, dinfo.max, TEST_SHAPE, dtype=dtype) + else: + to_return = np.random.uniform(0, dinfo.max, TEST_SHAPE) +# to_return = dinfo.min + (np.random.random(TEST_SHAPE) * dinfo.max) + np.reshape(to_return, np.prod(TEST_SHAPE))[::2] *= -1 + return to_return + + +def _lookup_sid(graph, name): + """Lookup the storage id of a named parameter. + + Arguments + --------- + graph : dict + Parsed JSON graph. + + name : str + Name of the tensor parameter to lookup. + + Returns + ------- + int : + The storage_id of the parameter. + """ + num_outputs_seen = 0 + for i, n in enumerate(graph['nodes']): + if n['name'] == name: + return graph['attrs']['storage_id'][1][num_outputs_seen] + else: + if 'attrs' in n and 'num_outputs' in n['attrs']: + num_outputs_seen += n['attrs']['num_outputs'] + else: + num_outputs_seen += 1 + + raise KeyError(f'no such param: {name}') + + +def _get_ctypes_dtype(dt): + """Return a ctypes c_* datatype given a string data type.""" + if 'int' in dt: + return getattr(ctypes, f'c_{dt}') + elif dt == 'float32': + return ctypes.c_float + elif dt == 'float64': + return ctypes.c_double + else: + assert False, f'unknown dtype: {dt}' + + +def _verify_linked_param(dtype, lib, mod, graph, name): + """Directly read memory from the linked library to verify the linked parameter is correct.""" + sid = _lookup_sid(graph, name) + # NOTE: query_imports=True because when loading a module from disk (i.e. for C backend), + # a GraphRuntimeFactory module is created instead of the module itself. + param_ptr = mod.get_function("_lookup_linked_param", True)(sid) + print('verify', param_ptr) + arr_data = (_get_ctypes_dtype(dtype) * np.prod(TEST_SHAPE)).from_address(param_ptr.value) + gen_param = lib.params[name] + print('gen param dtype', gen_param.dtype) + arr = np.ndarray( + shape=gen_param.shape, dtype=gen_param.dtype, buffer=arr_data, order='C') + if 'int' in gen_param.dtype: + np.testing.assert_equal(gen_param.asnumpy(), arr) + else: + np.testing.assert_allclose(gen_param.asnumpy(), arr) + + +def _make_mod_and_params(dtype): + """Create a Relay module and parameters to test the given datatype.""" + param_decls = collections.OrderedDict() + param_init = {} + + def _add_decl(name, dtype): + param_decls[name] = f'%{name} : Tensor[{TEST_SHAPE}, {dtype}]' + param_init[name] = _make_random_tensor(dtype) + + _add_decl(f'{dtype}_a', dtype) + _add_decl(f'{dtype}_b', dtype) + + mod_lines = [ + '#[version = "0.0.5"]', + f"def @main(%rand_input : Tensor[{TEST_SHAPE}, {dtype}], { ', '.join(param_decls.values()) } ) {{", + ] + if 'int' in dtype: + mod_lines.append( +# f' %0 = bitwise_xor(%rand_input, bitwise_xor(%{dtype}_a, %{dtype}_b));') + f' %0 = add(%rand_input, %{dtype}_a);') + else: + mod_lines.append( + f' %0 = cast(add(%rand_input, cast(add(%{dtype}_a, %{dtype}_b), dtype="{dtype}")), dtype="{dtype}");') +# f' %0 = cast(add(%rand_input, %{dtype}_a), dtype="{dtype}");') + mod_lines.extend([ + ' %0', + '}' + ]) + + mod = tvm.parser.fromtext('\n'.join(mod_lines)) + return mod, param_init + + +@tvm.testing.requires_llvm +def test_llvm_link_params(): + for dtype in LINKABLE_DTYPES: + mod, param_init = _make_mod_and_params(dtype) + rand_input = _make_random_tensor(dtype) + main_func = mod['main'] + target = 'llvm --runtime=c --system-lib --link-params' + with tvm.transform.PassContext(opt_level=3): + lib = tvm.relay.build(mod, target, params=param_init) + assert set(lib.params.keys()) == {"p0"} # NOTE: op folded + + graph = json.loads(lib.graph_json) + for p in lib.params: + _verify_linked_param(dtype, lib, lib.lib, graph, p) + + # Wrap in function to explicitly deallocate the runtime. + def _run_linked(lib): + graph_json, mod, _ = lib + graph_rt = tvm.contrib.graph_runtime.create(graph_json, mod, tvm.cpu(0)) + graph_rt.set_input('rand_input', rand_input) # NOTE: params not required. + graph_rt.run() + return graph_rt.get_output(0) + + linked_output = _run_linked(lib) + + with tvm.transform.PassContext(opt_level=3): + lib = tvm.relay.build(mod, 'llvm --system-lib', params=param_init) + + def _run_unlinked(lib): + graph_json, mod, lowered_params = lib + graph_rt = tvm.contrib.graph_runtime.create(graph_json, mod, tvm.cpu(0)) + graph_rt.set_input('rand_input', rand_input, **lowered_params) + graph_rt.run() + return graph_rt.get_output(0) + + unlinked_output = _run_unlinked(lib) + + if 'int' in dtype: + np.testing.assert_equal(unlinked_output.asnumpy(), linked_output.asnumpy()) + else: + np.testing.assert_allclose(unlinked_output.asnumpy(), linked_output.asnumpy()) + + +def _get_c_datatype(dtype): + """Translate LINKABLE_DTYPES element to c datatype.""" + if 'int' in dtype: + return f'{dtype}_t' + elif dtype == 'float32': + return 'float' + elif dtype == 'float64': + return 'double' + else: + assert False, f'unknown dtype {dtype}' + + +def _format_c_value(dtype, width, x): + if 'int' in dtype: + hex_formatstr = f'{{:{"+" if dtype.startswith("int") else ""}#0{width}x}}' + return hex_formatstr.format(x) + elif 'float' in dtype: + to_ret = float(x).hex() + if 'inf' in to_ret: + return ('-' if x < 0 else '') + 'INFINITY' + elif 'nan' in to_ret: + return 'NAN' + + before, after = to_ret.split('p') + return f'{before.rstrip("0")}p{after}' + else: + assert False, f"don't know dtype {dtype}" + + +HEX_NUM_RE = re.compile(r'[+\-]?(?:(?:0x[0-9A-Fa-f.p+-]+)|(?:INFINITY)|(?:NAN))') + + +def test_c_link_params(): + temp_dir = utils.tempdir() + for dtype in LINKABLE_DTYPES: + print("test", dtype) + mod, param_init = _make_mod_and_params(dtype) + print('built mod', mod) + rand_input = _make_random_tensor(dtype) + main_func = mod['main'] + target = 'c --link-params' + with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): + lib = tvm.relay.build(mod, target, params=param_init) + assert set(lib.params.keys()) == {"p0"} # NOTE: op folded + + src = lib.lib.get_source() + lib.lib.save('test.c', 'cc') + c_dtype = _get_c_datatype(dtype) + src_lines = src.split('\n') + param = lib.params['p0'].asnumpy().reshape(np.prod(TEST_SHAPE)) + param_def = f'static const {c_dtype} __tvm_param__p0[{np.prod(param.shape)}] = {{' + for i, line in enumerate(src_lines): + if line == param_def: + i += 1 + break + else: + assert False, f'did not find parameter definition "{param_def}":\n{src}' + + cursor = 0 + width = dtype_info(dtype).bits // 4 + 2 + if dtype.startswith("int"): + width += 1 # Account for sign + + print('check printing of', param) + while '};' not in src_lines[i]: + for match in HEX_NUM_RE.finditer(src_lines[i]): + assert match.group() == _format_c_value(dtype, width, param[cursor]), ( + f'p0 byte {cursor}: want "{_format_c_value(dtype, width, param[cursor])}" got ' + f'"{match.group(0)}"; full p0 follows:\n{src}') + cursor += 1 + i += 1 + + assert cursor == np.prod(param.shape) + temp = utils.tempdir() + + # Need a unique name per library to avoid dlopen caching the lib load. + lib_path = temp_dir.relpath(f'test-{dtype}-linked.so') + lib['remove_params']().export_library(lib_path) + lib_mod = tvm.runtime.load_module(lib_path) + +# lib_mod = lib_factory['default']() + graph = json.loads(lib.graph_json) + for p in lib.params: + _verify_linked_param(dtype, lib, lib_mod, graph, p) + + # Wrap in function to explicitly deallocate the runtime. + def _run_linked(lib_mod): + graph_rt = tvm.contrib.graph_runtime.GraphModule( + lib_mod['default'](tvm.cpu(0))) + graph_rt.set_input('rand_input', rand_input) # NOTE: params not required. + print('linked', graph_rt.get_input('p0')) + graph_rt.run() + + return graph_rt.get_output(0) + + linked_output = _run_linked(lib_mod) + + linked_params = lib.params + with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): + lib = tvm.relay.build(mod, 'c', params=param_init) + _, _, params = lib + # Need a unique name per library to avoid dlopen caching the lib load. + lib_path = temp_dir.relpath(f'test-{dtype}-unlinked.so') + lib.export_library(lib_path) + lib_mod = tvm.runtime.load_module(lib_path) + + print('unlinked', params) + def _run_unlinked(lib_mod): + graph_rt = tvm.contrib.graph_runtime.GraphModule(lib_mod['default'](tvm.cpu(0))) + graph_rt.set_input('rand_input', rand_input, **params) + graph_rt.run() + return graph_rt.get_output(0) + + unlinked_output = _run_unlinked(lib_mod) + + if 'int' in dtype: + np.testing.assert_equal(unlinked_output.asnumpy(), linked_output.asnumpy()) + else: + np.testing.assert_allclose(unlinked_output.asnumpy(), linked_output.asnumpy()) + + + + +if __name__ == '__main__': + sys.exit(pytest.main(sys.argv[1:])) From bbdfd3d71dbdf75f9bfff62982f576ceeb72dd68 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 10 Nov 2020 19:53:49 -0800 Subject: [PATCH 07/60] crt tests work --- cmake/modules/StandaloneCrt.cmake | 1 + include/tvm/runtime/crt/error_codes.h | 16 + include/tvm/runtime/crt/graph_runtime.h | 12 + .../tvm/runtime/crt/graph_runtime_module.h | 42 +++ include/tvm/runtime/crt/module.h | 8 + python/tvm/contrib/binutils.py | 277 +----------------- python/tvm/micro/build.py | 18 +- python/tvm/micro/debugger.py | 32 +- python/tvm/micro/transport/__init__.py | 1 + python/tvm/micro/transport/base.py | 2 +- python/tvm/micro/transport/file_descriptor.py | 9 + src/runtime/crt/Makefile | 2 +- src/runtime/crt/common/crt_runtime_api.c | 2 +- src/runtime/crt/graph_runtime/graph_runtime.c | 21 +- .../graph_runtime_module.c | 211 +++++++++++++ src/runtime/crt/host/main.cc | 9 + .../internal/graph_runtime/graph_runtime.h | 1 + src/runtime/micro/micro_session.cc | 2 + src/runtime/rpc/rpc_endpoint.cc | 1 + tests/python/unittest/test_link_params.py | 64 +++- 20 files changed, 443 insertions(+), 288 deletions(-) create mode 100644 include/tvm/runtime/crt/graph_runtime_module.h create mode 100644 src/runtime/crt/graph_runtime_module/graph_runtime_module.c diff --git a/cmake/modules/StandaloneCrt.cmake b/cmake/modules/StandaloneCrt.cmake index 73c85d13e2ef..256ce2a48a6c 100644 --- a/cmake/modules/StandaloneCrt.cmake +++ b/cmake/modules/StandaloneCrt.cmake @@ -44,6 +44,7 @@ if(USE_MICRO) "src/runtime/crt/include *.h -> include" "src/runtime/crt/common *.c -> src/runtime/crt/common" "src/runtime/crt/graph_runtime *.c -> src/runtime/crt/graph_runtime" + "src/runtime/crt/graph_runtime_module *.c -> src/runtime/crt/graph_runtime_module" "src/runtime/crt/host crt_config.h -> src/runtime/crt/host" "src/runtime/crt/utvm_rpc_common *.cc -> src/runtime/crt/utvm_rpc_common" "src/runtime/crt/utvm_rpc_server *.cc -> src/runtime/crt/utvm_rpc_server" diff --git a/include/tvm/runtime/crt/error_codes.h b/include/tvm/runtime/crt/error_codes.h index 16d0e793848b..93a332a5924f 100644 --- a/include/tvm/runtime/crt/error_codes.h +++ b/include/tvm/runtime/crt/error_codes.h @@ -41,6 +41,9 @@ typedef enum { kTvmErrorCategoryWriteStream = 3, kTvmErrorCategorySession = 4, kTvmErrorCategoryPlatform = 5, + kTvmErrorCategoryGenerated = 6, + kTvmErrorCategoryGraphRuntime = 7, + kTvmErrorCategoryFunctionCall = 8, } tvm_crt_error_category_t; typedef enum { @@ -74,6 +77,19 @@ typedef enum { kTvmErrorPlatformMemoryManagerInitialized = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 1), kTvmErrorPlatformShutdown = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 2), + // Common error codes returned from generated functions. + kTvmErrorGeneratedInvalidStorageId = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGenerated, 0), + + // Graph runtime + kTvmErrorGraphModuleAlreadyCreated = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphRuntime, 0), + kTvmErrorGraphModuleBadContext = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphRuntime, 1), + kTvmErrorGraphModuleNoSuchInput = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGraphRuntime, 2), + + // Function Calls - common problems encountered calling functions. + kTvmErrorFunctionCallNumArguments = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionCall, 0), + kTvmErrorFunctionCallWrongArgType = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionCall, 1), + kTvmErrorFunctionCallNotImplemented = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionCall, 2), + // System errors are always negative integers; this mask indicates presence of a system error. // Cast tvm_crt_error_t to a signed integer to interpret the negative error code. kTvmErrorSystemErrorMask = (1 << (sizeof(int) * 4 - 1)), diff --git a/include/tvm/runtime/crt/graph_runtime.h b/include/tvm/runtime/crt/graph_runtime.h index d2eb3b7785e9..dc17debbc69b 100644 --- a/include/tvm/runtime/crt/graph_runtime.h +++ b/include/tvm/runtime/crt/graph_runtime.h @@ -69,6 +69,12 @@ TVMGraphRuntime* TVMGraphRuntime_Create(const char* sym_json, const struct TVMMo int TVMGraphRuntime_GetInputIndex(TVMGraphRuntime* runtime, const char* name); +/*! + * \brief get number of input tensors allocated. + * \return integer number of tensors available to use. + */ +int TVMGraphRuntime_GetNumInputs(); + /*! * \brief set input to the graph based on name. * \param runtime The graph runtime. @@ -77,6 +83,12 @@ int TVMGraphRuntime_GetInputIndex(TVMGraphRuntime* runtime, const char* name); */ void TVMGraphRuntime_SetInput(TVMGraphRuntime* runtime, const char* name, DLTensor* data_in); +/*! + * \brief get number of output tensors allocated. + * \return integer number of output tensors allocated. + */ +int TVMGraphRuntime_GetNumOutputs(); + /*! * \brief Return NDArray for given output index. * \param runtime The graph runtime. diff --git a/include/tvm/runtime/crt/graph_runtime_module.h b/include/tvm/runtime/crt/graph_runtime_module.h new file mode 100644 index 000000000000..04e9184c8b8d --- /dev/null +++ b/include/tvm/runtime/crt/graph_runtime_module.h @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file graph_runtime.h + * \brief Tiny graph runtime that can run graph containing only tvm PackedFunc. + */ +#ifndef TVM_RUNTIME_CRT_GRAPH_RUNTIME_MODULE_H_ +#define TVM_RUNTIME_CRT_GRAPH_RUNTIME_MODULE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/*! + * \brief Register the "tvm.graph_runtime.create" constructor PackedFunc. + */ +tvm_crt_error_t TVMGraphRuntimeModule_Register(); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // TVM_RUNTIME_CRT_GRAPH_RUNTIME_MODULE_H_ diff --git a/include/tvm/runtime/crt/module.h b/include/tvm/runtime/crt/module.h index 2359025f6fe1..7b124c4faa3a 100644 --- a/include/tvm/runtime/crt/module.h +++ b/include/tvm/runtime/crt/module.h @@ -39,6 +39,14 @@ typedef struct TVMModule { const TVMFuncRegistry* registry; } TVMModule; +/*! + * \brief Create a new module handle from the given TVMModule instance. + * \param mod The module instance to register. + * \param out_handle Pointer to recieve the newly-minted handle for this module. + * \return 0 on success, non-zero on error. + */ +int TVMModCreateFromCModule(const TVMModule* mod, TVMModuleHandle* out_handle); + /*! \brief Entry point for the system lib module. */ const TVMModule* TVMSystemLibEntryPoint(void); diff --git a/python/tvm/contrib/binutils.py b/python/tvm/contrib/binutils.py index 646362a5587f..146944970827 100644 --- a/python/tvm/contrib/binutils.py +++ b/python/tvm/contrib/binutils.py @@ -16,61 +16,13 @@ # under the License. """Utilities for binary file manipulation""" +import logging import os import subprocess import tvm._ffi from . import utils -# TODO does this file still belong in `contrib`. is it too µTVM-specific? - -# TODO shouldn't need so many `ALIGN` directives -RELOCATION_LD_SCRIPT_TEMPLATE = """ -/* linker symbol for use in UTVMInit */ -_utvm_stack_pointer_init = 0x{stack_pointer_init:x}; - -SECTIONS -{{ - . = 0x{text_start:x}; - . = ALIGN({word_size}); - .text : - {{ - . = ALIGN({word_size}); - KEEP(*(.text)) - KEEP(*(.text*)) - . = ALIGN({word_size}); - }} - - . = 0x{rodata_start:x}; - . = ALIGN({word_size}); - .rodata : - {{ - . = ALIGN({word_size}); - KEEP(*(.rodata)) - KEEP(*(.rodata*)) - . = ALIGN({word_size}); - }} - - . = 0x{data_start:x}; - . = ALIGN({word_size}); - .data : - {{ - . = ALIGN({word_size}); - KEEP(*(.data)) - KEEP(*(.data*)) - . = ALIGN({word_size}); - }} - - . = 0x{bss_start:x}; - . = ALIGN({word_size}); - .bss : - {{ - . = ALIGN({word_size}); - KEEP(*(.bss)) - KEEP(*(.bss*)) - . = ALIGN({word_size}); - }} -}} -""" +_LOG = logging.getLogger(__name__) def run_cmd(cmd): @@ -86,6 +38,7 @@ def run_cmd(cmd): output : str resulting stdout capture from the subprocess """ + _LOG.debug('execute: %s', ' '.join(cmd)) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (output, _) = proc.communicate() output = output.decode("utf-8") @@ -94,227 +47,3 @@ def run_cmd(cmd): msg = f'error while running command "{cmd_str}":\n{output}' raise RuntimeError(msg) return output - - -@tvm._ffi.register_func("tvm_callback_get_section_size") -def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): - """Finds size of the section in the binary. - Assumes `size` shell command exists (typically works only on Linux machines) - - Parameters - ---------- - binary_path : str - path of the binary file - - section_name : str - name of section - - toolchain_prefix : str - prefix for binary names in target compiler toolchain - - Returns - ------- - size : integer - size of the section in bytes - """ - if not os.path.isfile(binary_path): - raise RuntimeError('no such file "{}"'.format(binary_path)) - # We use the "-A" flag here to get the ".rodata" section's size, which is - # not included by default. - size_output = run_cmd(["{}size".format(toolchain_prefix), "-A", binary_path]) - - # TODO(weberlo): Refactor this method and `*relocate_binary` so they are - # both aware of [".bss", ".sbss", ".sdata"] being relocated to ".bss". - section_mapping = { - ".text": [".text"], - ".rodata": [".rodata"], - ".data": [".data", ".sdata"], - ".bss": [".bss", ".sbss"], - } - sections_to_sum = section_mapping["." + section_name] - section_size = 0 - # Skip the first two header lines in the `size` output. - for line in size_output.split("\n")[2:]: - tokens = list(filter(lambda s: len(s) != 0, line.split(" "))) - if len(tokens) != 3: - continue - entry_name = tokens[0] - entry_size = int(tokens[1]) - for section in sections_to_sum: - if entry_name.startswith(section): - section_size += entry_size - break - - # NOTE: in the past, section_size has been wrong on x86. it may be - # inconsistent. TODO: maybe stop relying on `*size` to give us the size and - # instead read the section with `*objcopy` and count the bytes. - # NOTE(areusch): I think the problem is due to alignment ops in the linker. - # Since this is going away in the impending switch to on-device runtime, - # add a constant to hopefully absorb these relocations. - if section_size > 0: - section_size += 64 - - return section_size - - -@tvm._ffi.register_func("tvm_callback_relocate_binary") -def tvm_callback_relocate_binary( - binary_path, - word_size, - text_start, - rodata_start, - data_start, - bss_start, - stack_end, - toolchain_prefix, -): - """Relocates sections in the binary to new addresses - - Parameters - ---------- - binary_path : str - path of the binary file - - word_size : int - word size on the target machine - - text_start : int - text section address - - rodata_start : int - rodata section address - - data_start : int - data section address - - bss_start : int - bss section address - - stack_end : int - stack section end address - - toolchain_prefix : str - prefix for binary names in target compiler toolchain - - Returns - ------- - rel_bin : bytearray - the relocated binary - """ - assert text_start < rodata_start < data_start < bss_start < stack_end - stack_pointer_init = stack_end - word_size - ld_script_contents = "" - # TODO(weberlo): There should be a better way to configure this for different archs. - # TODO is this line even necessary? - if "riscv" in toolchain_prefix: - ld_script_contents += 'OUTPUT_ARCH( "riscv" )\n\n' - ld_script_contents += RELOCATION_LD_SCRIPT_TEMPLATE.format( - word_size=word_size, - text_start=text_start, - rodata_start=rodata_start, - data_start=data_start, - bss_start=bss_start, - stack_pointer_init=stack_pointer_init, - ) - - tmp_dir = utils.tempdir() - rel_obj_path = tmp_dir.relpath("relocated.obj") - rel_ld_script_path = tmp_dir.relpath("relocate.lds") - with open(rel_ld_script_path, "w") as f: - f.write(ld_script_contents) - run_cmd( - ["{}ld".format(toolchain_prefix), binary_path, "-T", rel_ld_script_path, "-o", rel_obj_path] - ) - - with open(rel_obj_path, "rb") as f: - rel_bin = bytearray(f.read()) - - gdb_init_dir = os.environ.get("MICRO_GDB_INIT_DIR") - if gdb_init_dir is not None: - gdb_init_path = f"{gdb_init_dir}/.gdbinit" - with open(gdb_init_path, "r") as f: - gdbinit_contents = f.read().split("\n") - new_contents = [] - for line in gdbinit_contents: - new_contents.append(line) - if line.startswith("target"): - new_contents.append(f"add-symbol-file {rel_obj_path}") - with open(gdb_init_path, "w") as f: - f.write("\n".join(new_contents)) - - return rel_bin - - -@tvm._ffi.register_func("tvm_callback_read_binary_section") -def tvm_callback_read_binary_section(binary, section, toolchain_prefix): - """Returns the contents of the specified section in the binary byte array - - Parameters - ---------- - binary : bytearray - contents of the binary - - section : str - type of section - - toolchain_prefix : str - prefix for binary names in target compiler toolchain - - Returns - ------- - section_bin : bytearray - contents of the read section - """ - tmp_dir = utils.tempdir() - tmp_bin = tmp_dir.relpath("temp.bin") - tmp_section = tmp_dir.relpath("tmp_section.bin") - with open(tmp_bin, "wb") as out_file: - out_file.write(bytes(binary)) - run_cmd( - [ - "{}objcopy".format(toolchain_prefix), - "--dump-section", - ".{}={}".format(section, tmp_section), - tmp_bin, - ] - ) - if os.path.isfile(tmp_section): - # Get section content if it exists. - with open(tmp_section, "rb") as f: - section_bin = bytearray(f.read()) - else: - # Return empty bytearray if the section does not exist. - section_bin = bytearray("", "utf-8") - return section_bin - - -@tvm._ffi.register_func("tvm_callback_get_symbol_map") -def tvm_callback_get_symbol_map(binary, toolchain_prefix): - """Obtains a map of symbols to addresses in the passed binary - - Parameters - ---------- - binary : bytearray - contents of the binary - - toolchain_prefix : str - prefix for binary names in target compiler toolchain - - Returns - ------- - map_str : str - map of defined symbols to addresses, encoded as a series of - alternating newline-separated keys and values - """ - tmp_dir = utils.tempdir() - tmp_obj = tmp_dir.relpath("tmp_obj.bin") - with open(tmp_obj, "wb") as out_file: - out_file.write(bytes(binary)) - nm_output = run_cmd(["{}nm".format(toolchain_prefix), "-C", "--defined-only", tmp_obj]) - nm_output = nm_output.splitlines() - map_str = "" - for line in nm_output: - line = line.split() - map_str += line[2] + "\n" - map_str += line[0] + "\n" - return map_str diff --git a/python/tvm/micro/build.py b/python/tvm/micro/build.py index d1a3c4163755..bed5bde6f916 100644 --- a/python/tvm/micro/build.py +++ b/python/tvm/micro/build.py @@ -23,6 +23,8 @@ import re from tvm.contrib import utils +from .micro_library import MicroLibrary + _LOG = logging.getLogger(__name__) @@ -109,7 +111,8 @@ def default_options(target_include_dir): def build_static_runtime( - workspace, compiler, module, lib_opts=None, bin_opts=None, generated_lib_opts=None + workspace, compiler, module, lib_opts=None, bin_opts=None, generated_lib_opts=None, + extra_libs=None ): """Build the on-device runtime, statically linking the given modules. @@ -131,6 +134,12 @@ def build_static_runtime( The `options` parameter passed to compiler.library() when compiling the generated TVM C source module. + extra_libs : Optional[List[MicroLibrary|str]] + If specified, extra libraries to be compiled into the binary. If a MicroLibrary, it is + included into the binary directly. If a string, the path to a directory; all direct children + of this directory matching RUNTIME_SRC_REGEX are built into a library. These libraries are + placed before any common CRT libraries in the link order. + Returns ------- MicroBinary : @@ -150,7 +159,12 @@ def build_static_runtime( module.save(mod_src_path, "cc") libs = [] - for lib_src_dir in RUNTIME_LIB_SRC_DIRS: + for mod_or_src_dir in (extra_libs or []) + RUNTIME_LIB_SRC_DIRS: + if isinstance(mod_or_src_dir, MicroLibrary): + libs.append(mod_or_src_dir) + continue + + lib_src_dir = mod_or_src_dir lib_name = os.path.basename(lib_src_dir) lib_build_dir = workspace.relpath(f"build/{lib_name}") os.makedirs(lib_build_dir) diff --git a/python/tvm/micro/debugger.py b/python/tvm/micro/debugger.py index b76d46a04db6..18ed350305d8 100644 --- a/python/tvm/micro/debugger.py +++ b/python/tvm/micro/debugger.py @@ -19,6 +19,7 @@ import atexit import abc +import errno import logging import os import signal @@ -26,12 +27,14 @@ import sys import termios import threading +import time import psutil from .._ffi import register_func from . import class_factory from . import transport +from .transport.file_descriptor import FdTransport _LOG = logging.getLogger(__name__) @@ -195,7 +198,8 @@ def popen_kwargs(self): else: raise NotImplementedError(f"System {sysname} is not yet supported") - self.fd_transport = fd.FdTransport(stdout_read, stdin_write) + self.fd_transport = FdTransport( + stdout_read, stdin_write, timeouts=transport.debug_transport_timeouts()) self.fd_transport.open() return { @@ -227,13 +231,33 @@ def open(self): pass # Pipes opened by parent class. def write(self, data, timeout_sec): - return self.gdb_transport_debugger.fd_transport.write(data, timeout_sec) + end_time = time.monotonic() + timeout_sec + while timeout_sec == 0 or time.monotonic() < end_time: + try: + return self.gdb_transport_debugger.fd_transport.write(data, timeout_sec) + except OSError as e: + if e.errno == errno.EAGAIN: + time.sleep(0.1) + continue + raise e + + raise base.IoTimeoutError() def read(self, n, timeout_sec): - return self.gdb_transport_debugger.fd_transport.read(n, timeout_sec) + end_time = time.monotonic() + timeout_sec + while timeout_sec == 0 or time.monotonic() < end_time: + try: + return self.gdb_transport_debugger.fd_transport.read(n, timeout_sec) + except OSError as e: + if e.errno == errno.EAGAIN: + time.sleep(0.1) + continue + raise e + + raise base.IoTimeoutError() def close(self): - pass # Pipes closed by parent class. + pass # Pipes closed by parent class (DebugWrapperTransport calls stop() next). def transport(self): return self._Transport(self) diff --git a/python/tvm/micro/transport/__init__.py b/python/tvm/micro/transport/__init__.py index 1e1709707568..dffe9ae32792 100644 --- a/python/tvm/micro/transport/__init__.py +++ b/python/tvm/micro/transport/__init__.py @@ -22,5 +22,6 @@ from .base import TransportClosedError from .base import TransportLogger from .base import TransportTimeouts +from .base import debug_transport_timeouts from .debug import DebugWrapperTransport from .subprocess import SubprocessTransport diff --git a/python/tvm/micro/transport/base.py b/python/tvm/micro/transport/base.py index f8951f6226a5..07332e8a745d 100644 --- a/python/tvm/micro/transport/base.py +++ b/python/tvm/micro/transport/base.py @@ -64,7 +64,7 @@ class IoTimeoutError(Exception): ) -def debug_transport_timeouts(session_start_retry_timeout_sec=0.0): +def debug_transport_timeouts(session_start_retry_timeout_sec=0): return TransportTimeouts( session_start_retry_timeout_sec=session_start_retry_timeout_sec, session_start_timeout_sec=0, diff --git a/python/tvm/micro/transport/file_descriptor.py b/python/tvm/micro/transport/file_descriptor.py index 3f69c4c26751..6df6cd425eff 100644 --- a/python/tvm/micro/transport/file_descriptor.py +++ b/python/tvm/micro/transport/file_descriptor.py @@ -62,8 +62,11 @@ def open(self): def close(self): if self.read_fd is not None: os.close(self.read_fd) + self.read_fd = None + if self.write_fd is not None: os.close(self.write_fd) + self.write_fd = None def _await_ready(self, rlist, wlist, timeout_sec=None, end_time=None): if end_time is None: @@ -78,6 +81,9 @@ def _await_ready(self, rlist, wlist, timeout_sec=None, end_time=None): return True def read(self, n, timeout_sec): + if self.read_fd is None: + raise base.TransportClosedError() + end_time = None if timeout_sec is None else time.monotonic() + timeout_sec self._await_ready([self.read_fd], [], end_time=end_time) @@ -90,6 +96,9 @@ def read(self, n, timeout_sec): return to_return def write(self, data, timeout_sec): + if self.write_fd is None: + raise base.TransportClosedError() + end_time = None if timeout_sec is None else time.monotonic() + timeout_sec data_len = len(data) diff --git a/src/runtime/crt/Makefile b/src/runtime/crt/Makefile index 8a24db4e8b2b..6e462431173f 100644 --- a/src/runtime/crt/Makefile +++ b/src/runtime/crt/Makefile @@ -65,7 +65,7 @@ $(notdir $(1)): $${BUILD_DIR}/lib$(notdir $(1)).a endef -LIBS = src/runtime/crt/common src/runtime/crt/graph_runtime src/runtime/crt/utvm_rpc_common src/runtime/crt/utvm_rpc_server +LIBS = src/runtime/crt/common src/runtime/crt/graph_runtime src/runtime/crt/graph_runtime_module src/runtime/crt/utvm_rpc_common src/runtime/crt/utvm_rpc_server $(foreach lib,$(LIBS),$(eval $(call LIB_template,$(lib)))) diff --git a/src/runtime/crt/common/crt_runtime_api.c b/src/runtime/crt/common/crt_runtime_api.c index d6f78d9e3a03..9a0663fc704d 100644 --- a/src/runtime/crt/common/crt_runtime_api.c +++ b/src/runtime/crt/common/crt_runtime_api.c @@ -127,7 +127,7 @@ static TVMModuleHandle EncodeModuleHandle(tvm_module_index_t module_index) { return (TVMModuleHandle)((uintptr_t)(module_index | 0x8000)); } -static int TVMModCreateFromCModule(const TVMModule* mod, TVMModuleHandle* out_handle) { +int TVMModCreateFromCModule(const TVMModule* mod, TVMModuleHandle* out_handle) { tvm_module_index_t idx; for (idx = 0; idx < TVM_CRT_MAX_REGISTERED_MODULES; idx++) { diff --git a/src/runtime/crt/graph_runtime/graph_runtime.c b/src/runtime/crt/graph_runtime/graph_runtime.c index a6cd77ad6a22..68213b7dd3c3 100644 --- a/src/runtime/crt/graph_runtime/graph_runtime.c +++ b/src/runtime/crt/graph_runtime/graph_runtime.c @@ -539,6 +539,15 @@ uint32_t TVMGraphRuntime_GetEntryId(TVMGraphRuntime* runtime, uint32_t nid, uint return runtime->node_row_ptr[nid] + index; } +/*! + * \brief Get the number of input tensors allocated. + * \param runtime The graph runtime. + * \return the number of input tensors allocated. + */ +int TVMGraphRuntime_GetNumInputs(TVMGraphRuntime* runtime) { + return runtime->input_nodes_count; +} + /*! * \brief Get the input index given the name of input. * \param runtime The graph runtime. @@ -675,6 +684,15 @@ void TVMGraphRuntime_Run(TVMGraphRuntime* runtime) { } } +/*! + * \brief Get the number of output tensors allocated. + * \param runtime The graph runtime. + * \return the number of output tensors allocated. + */ +int TVMGraphRuntime_GetNumOutputs(TVMGraphRuntime* runtime) { + return runtime->outputs_count; +} + int TVMGraphRuntime_GetOutput(TVMGraphRuntime* runtime, const int32_t idx, DLTensor* out) { int status = 0; uint32_t nid = runtime->outputs[idx].node_id; @@ -875,7 +893,6 @@ void TVMGraphRuntime_Init(TVMGraphRuntime* runtime, const char* graph_json, cons TVMGraphRuntime* TVMGraphRuntime_Create(const char* sym_json, const TVMModule* m, const TVMContext* ctxs) { - CHECK_EQ(vleak_size, 1, "memory leak checking won't work with concurrent CRT use"); TVMGraphRuntime* runtime = (TVMGraphRuntime*)vmalloc(sizeof(TVMGraphRuntime)); // NOLINT(*) memset(runtime, 0, sizeof(TVMGraphRuntime)); // init @@ -909,6 +926,4 @@ void TVMGraphRuntime_Release(TVMGraphRuntime** pptr) { vfree(g_fexecs); g_fexecs = 0; } - - CHECK_EQ(vleak_size, 1, "found memory leak, leak size=%d", vleak_size - 1); } diff --git a/src/runtime/crt/graph_runtime_module/graph_runtime_module.c b/src/runtime/crt/graph_runtime_module/graph_runtime_module.c new file mode 100644 index 000000000000..a8de71e33f9d --- /dev/null +++ b/src/runtime/crt/graph_runtime_module/graph_runtime_module.c @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// LINT_C_FILE + +/*! + * \file graph_runtime_module.c + * \brief wrap graph_runtime into a TVMModule for use with RPC. + */ + +#include +#include +#include +#include + +#include "tvm/runtime/crt/internal/graph_runtime/graph_runtime.h" + +typedef struct { + TVMModule mod; + TVMGraphRuntime* runtime; +} GraphRuntimeModule; + +static GraphRuntimeModule graph_runtime; + +int32_t TVMGraphRuntimeModule_Create(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { + if (graph_runtime.runtime != NULL) { + return kTvmErrorGraphModuleAlreadyCreated; + } + + if (nargs != 4) { + return kTvmErrorFunctionCallNumArguments; + } + + if (tcodes[0] != kTVMStr || tcodes[1] != kTVMModuleHandle || tcodes[2] != kTVMArgInt || tcodes[3] != kTVMArgInt) { + return kTvmErrorFunctionCallWrongArgType; + } + + if (args[2].v_int64 != kDLCPU || args[3].v_int64 != 0) { + return kTvmErrorGraphModuleBadContext; + } + + TVMContext ctx = {(DLDeviceType) args[2].v_int64, (int) args[3].v_int64}; + graph_runtime.runtime = TVMGraphRuntime_Create(args[0].v_str, args[1].v_handle, &ctx); + + TVMModuleHandle out; + int ret_value = TVMModCreateFromCModule(&graph_runtime.mod, &out); + if (ret_value != 0) { + ret_tcodes[0] = kTVMNullptr; + TVMGraphRuntime_Release(&graph_runtime.runtime); + return ret_value; + } + + ret_values[0].v_handle = out; + ret_tcodes[0] = kTVMModuleHandle; + return kTvmErrorNoError; +} + +int32_t TVMGraphRuntimeModule_GetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { + if (nargs != 1) { + return kTvmErrorFunctionCallNumArguments; + } + + if (tcodes[0] != kTVMStr) { + return kTvmErrorFunctionCallWrongArgType; + } + + int index = TVMGraphRuntime_GetInputIndex(graph_runtime.runtime, args[0].v_str); + if (index < 0) { + return kTvmErrorGraphModuleNoSuchInput; + } + + uint32_t eid = TVMGraphRuntime_GetEntryId( + graph_runtime.runtime, graph_runtime.runtime->input_nodes[index], 0); + ret_values[0].v_handle = (void*) &graph_runtime.runtime->data_entry[eid].dl_tensor; + ret_tcodes[0] = kTVMNDArrayHandle; + return 0; +} + +int32_t TVMGraphRuntimeModule_GetNumInputs(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { + if (nargs != 0) { + return kTvmErrorFunctionCallNumArguments; + } + + ret_values[0].v_int64 = TVMGraphRuntime_GetNumInputs(); + ret_tcodes[0] = kTVMArgInt; + return 0; +} + +int32_t TVMGraphRuntimeModule_GetNumOutputs(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { + if (nargs != 0) { + return kTvmErrorFunctionCallNumArguments; + } + + ret_values[0].v_int64 = TVMGraphRuntime_GetNumOutputs(); + ret_tcodes[0] = kTVMArgInt; + return 0; +} + +int32_t TVMGraphRuntimeModule_GetOutput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { + if (nargs != 1) { + return kTvmErrorFunctionCallNumArguments; + } + + if (tcodes[0] != kTVMArgInt) { + return kTvmErrorFunctionCallWrongArgType; + } + + int output_index = args[0].v_int64; + if (output_index < 0 || output_index > TVMGraphRuntime_GetNumOutputs()) { + return kTvmErrorGraphModuleNoSuchInput; + } + + uint32_t nid = graph_runtime.runtime->outputs[output_index].node_id; + uint32_t index = graph_runtime.runtime->outputs[output_index].index; + uint32_t eid = TVMGraphRuntime_GetEntryId(graph_runtime.runtime, nid, index); + + ret_values[0].v_handle = (void*) &(graph_runtime.runtime->data_entry[eid].dl_tensor); + ret_tcodes[0] = kTVMNDArrayHandle; + return 0; +} + +int32_t TVMGraphRuntimeModule_LoadParams(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { + if (nargs != 1) { + return kTvmErrorFunctionCallNumArguments; + } + + if (tcodes[0] != kTVMBytes) { + return kTvmErrorFunctionCallWrongArgType; + } + + ret_tcodes[0] = kTVMNullptr; + + TVMByteArray* arr = (TVMByteArray*) args[0].v_handle; + return TVMGraphRuntime_LoadParams(graph_runtime.runtime, arr->data, arr->size); +} + +int32_t TVMGraphRuntimeModule_Run(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { + if (nargs != 0) { + return kTvmErrorFunctionCallNumArguments; + } + + TVMGraphRuntime_Run(graph_runtime.runtime); + + ret_tcodes[0] = kTVMNullptr; + return 0; +} + +int32_t TVMGraphRuntimeModule_SetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { + if (nargs != 2) { + return kTvmErrorFunctionCallNumArguments; + } + + if (tcodes[0] != kTVMStr || tcodes[1] != kTVMDLTensorHandle) { + return kTvmErrorFunctionCallWrongArgType; + } + + TVMGraphRuntime_SetInput(graph_runtime.runtime, args[0].v_str, (DLTensor*) args[1].v_handle); + + ret_tcodes[0] = kTVMNullptr; + return 0; +} + +int32_t TVMGraphRuntimeModule_NotImplemented(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { + return kTvmErrorFunctionCallNotImplemented; +} + +static const TVMBackendPackedCFunc graph_runtime_registry_funcs[] = { + &TVMGraphRuntimeModule_GetInput, + &TVMGraphRuntimeModule_GetNumInputs, + &TVMGraphRuntimeModule_GetNumOutputs, + &TVMGraphRuntimeModule_GetOutput, + &TVMGraphRuntimeModule_LoadParams, + &TVMGraphRuntimeModule_Run, + &TVMGraphRuntimeModule_SetInput, + &TVMGraphRuntimeModule_NotImplemented, +}; + + +static const TVMFuncRegistry graph_runtime_registry = { + "\x08get_input\0" + "get_num_inputs\0" + "get_num_outputs\0" + "get_output\0" + "load_params\0" + "run\0" + "set_input\0" + "share_params\0", + graph_runtime_registry_funcs}; + +tvm_crt_error_t TVMGraphRuntimeModule_Register() { + graph_runtime.mod.registry = &graph_runtime_registry; + graph_runtime.runtime = NULL; + + return TVMFuncRegisterGlobal("tvm.graph_runtime.create", &TVMGraphRuntimeModule_Create, 0); +} diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc index 60797c39b41d..8705ca899103 100644 --- a/src/runtime/crt/host/main.cc +++ b/src/runtime/crt/host/main.cc @@ -32,6 +32,10 @@ #include "crt_config.h" +#ifdef TVM_HOST_USE_GRAPH_RUNTIME_MODULE +#include +#endif + using namespace std::chrono; extern "C" { @@ -95,6 +99,11 @@ int main(int argc, char** argv) { utvm_rpc_server_t rpc_server = UTvmRpcServerInit(memory, sizeof(memory), 8, &UTvmWriteFunc, nullptr); +#ifdef TVM_HOST_USE_GRAPH_RUNTIME_MODULE + CHECK_EQ(TVMGraphRuntimeModule_Register(), kTvmErrorNoError, + "failed to register GraphRuntime TVMModule"); +#endif + if (TVMFuncRegisterGlobal("tvm.testing.reset_server", (TVMFunctionHandle)&testonly_reset_server, 0)) { fprintf(stderr, "utvm runtime: internal error registering global packedfunc; exiting\n"); diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h index 7ea7a4f035c8..ee095325deef 100644 --- a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h +++ b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h @@ -100,6 +100,7 @@ typedef struct TVMGraphRuntime { typedef DLTensor* DLTensorPtr; // private functions +uint32_t TVMGraphRuntime_GetEntryId(TVMGraphRuntime* runtime, uint32_t nid, uint32_t index); void TVMGraphRuntime_SetInput(TVMGraphRuntime* runtime, const char* name, DLTensor* data_in); int TVMGraphRuntime_LoadParams(TVMGraphRuntime* runtime, const char* param_blob, const uint32_t param_size); diff --git a/src/runtime/micro/micro_session.cc b/src/runtime/micro/micro_session.cc index 662597086d8a..38252bc27745 100644 --- a/src/runtime/micro/micro_session.cc +++ b/src/runtime/micro/micro_session.cc @@ -121,6 +121,7 @@ class MicroTransportChannel : public RPCChannel { ::std::string chunk; if (timeout != ::std::chrono::microseconds::zero()) { + LOG(INFO) << "ReceiveUntil no-timeout " << timeout.count() << " us"; ::std::chrono::microseconds iter_timeout{ ::std::max(::std::chrono::microseconds{0}, ::std::chrono::duration_cast<::std::chrono::microseconds>( @@ -288,6 +289,7 @@ class MicroTransportChannel : public RPCChannel { }; TVM_REGISTER_GLOBAL("micro._rpc_connect").set_body([](TVMArgs args, TVMRetValue* rv) { + LOG(INFO) << "MICRO RPC CONNECT " << uint64_t(args[3]) << ", " << uint64_t(args[4]) << ", " << uint64_t(args[5]); MicroTransportChannel* micro_channel = new MicroTransportChannel(args[1], args[2], ::std::chrono::microseconds(uint64_t(args[3])), ::std::chrono::microseconds(uint64_t(args[4])), diff --git a/src/runtime/rpc/rpc_endpoint.cc b/src/runtime/rpc/rpc_endpoint.cc index fbdd93fb4f62..ef7b34079606 100644 --- a/src/runtime/rpc/rpc_endpoint.cc +++ b/src/runtime/rpc/rpc_endpoint.cc @@ -370,6 +370,7 @@ class RPCEndpoint::EventHandler : public dmlc::Stream { */ void HandleReturn(RPCCode code, RPCSession::FEncodeReturn setreturn) { TVMArgs args = RecvPackedSeq(); + LOG(INFO) << "Receive PackedSeq " << args.size(); if (code == RPCCode::kException) { // switch to the state before sending exception. diff --git a/tests/python/unittest/test_link_params.py b/tests/python/unittest/test_link_params.py index a3ce97e383d8..34e5a4a9dd8e 100644 --- a/tests/python/unittest/test_link_params.py +++ b/tests/python/unittest/test_link_params.py @@ -245,9 +245,7 @@ def _format_c_value(dtype, width, x): def test_c_link_params(): temp_dir = utils.tempdir() for dtype in LINKABLE_DTYPES: - print("test", dtype) mod, param_init = _make_mod_and_params(dtype) - print('built mod', mod) rand_input = _make_random_tensor(dtype) main_func = mod['main'] target = 'c --link-params' @@ -331,6 +329,68 @@ def _run_unlinked(lib_mod): np.testing.assert_allclose(unlinked_output.asnumpy(), linked_output.asnumpy()) +@tvm.testing.requires_micro +def test_crt_link_params(): + import tvm.micro + + + for dtype in LINKABLE_DTYPES: + mod, param_init = _make_mod_and_params(dtype) + rand_input = _make_random_tensor(dtype) + main_func = mod['main'] + target = 'c -mcpu=native --system-lib --runtime=c --link-params' + with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): + graph_json, lib, params = tvm.relay.build(mod, target, params=param_init) + assert set(params.keys()) == {"p0"} # NOTE: op folded + + workspace = tvm.micro.Workspace() + compiler = tvm.micro.DefaultCompiler(target=target) + opts = tvm.micro.default_options(os.path.join(tvm.micro.CRT_ROOT_DIR, "host")) + opts['bin_opts']['ldflags'].append('-DTVM_HOST_USE_GRAPH_RUNTIME_MODULE') + + micro_binary = tvm.micro.build_static_runtime( + # the x86 compiler *expects* you to give the exact same dictionary for both + # lib_opts and bin_opts. so the library compiler is mutating lib_opts and + # the binary compiler is expecting those mutations to be in bin_opts. + # TODO(weberlo) fix this very bizarre behavior + workspace, + compiler, + lib, + lib_opts=opts["bin_opts"], + bin_opts=opts["bin_opts"], + extra_libs=[os.path.join(tvm.micro.CRT_ROOT_DIR, m) + for m in ('graph_runtime', 'graph_runtime_module')], + ) + + flasher_kw = { + "debug": False, + } + flasher = compiler.flasher(**flasher_kw) + with tvm.micro.Session(binary=micro_binary, flasher=flasher) as sess: + rpc_lib = sess.get_system_lib() + graph_rt = tvm.contrib.graph_runtime.create( + graph_json, rpc_lib, sess.context) + + graph_rt.set_input('rand_input', rand_input, **params) + graph_rt.run() + linked_output = graph_rt.get_output(0).asnumpy() + + with tvm.transform.PassContext(opt_level=3): + lib = tvm.relay.build(mod, 'llvm --system-lib', params=param_init) + + def _run_unlinked(lib): + graph_json, mod, lowered_params = lib + graph_rt = tvm.contrib.graph_runtime.create(graph_json, mod, tvm.cpu(0)) + graph_rt.set_input('rand_input', rand_input, **lowered_params) + graph_rt.run() + return graph_rt.get_output(0) + + unlinked_output = _run_unlinked(lib).asnumpy() + + if 'int' in dtype: + np.testing.assert_equal(unlinked_output, linked_output) + else: + np.testing.assert_allclose(unlinked_output, linked_output) if __name__ == '__main__': From 1afa10e9fddb7a5d6c9a8a4088fd8d0b3f7c3d6c Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 10 Nov 2020 21:47:31 -0800 Subject: [PATCH 08/60] CRT works! --- include/tvm/runtime/crt/graph_runtime.h | 4 +- src/runtime/crt/graph_runtime/graph_runtime.c | 66 ++++++++++++++----- .../internal/graph_runtime/graph_runtime.h | 9 ++- tests/python/unittest/test_link_params.py | 3 +- 4 files changed, 62 insertions(+), 20 deletions(-) diff --git a/include/tvm/runtime/crt/graph_runtime.h b/include/tvm/runtime/crt/graph_runtime.h index dc17debbc69b..e8413aa1723d 100644 --- a/include/tvm/runtime/crt/graph_runtime.h +++ b/include/tvm/runtime/crt/graph_runtime.h @@ -61,10 +61,10 @@ typedef struct TVMGraphRuntime TVMGraphRuntime; * \brief Allocate a new GraphRuntime with vmalloc and initialize it. * * \param sym_json JSON-encoded graph. - * \param m TVM Module that exposes the functions to call. + * \param module_handle TVM Module that exposes the functions to call. * \param ctxs runtime execution context. */ -TVMGraphRuntime* TVMGraphRuntime_Create(const char* sym_json, const struct TVMModule* m, +TVMGraphRuntime* TVMGraphRuntime_Create(const char* sym_json, TVMModuleHandle module_handle, const TVMContext* ctxs); int TVMGraphRuntime_GetInputIndex(TVMGraphRuntime* runtime, const char* name); diff --git a/src/runtime/crt/graph_runtime/graph_runtime.c b/src/runtime/crt/graph_runtime/graph_runtime.c index 68213b7dd3c3..03d81aa184f8 100644 --- a/src/runtime/crt/graph_runtime/graph_runtime.c +++ b/src/runtime/crt/graph_runtime/graph_runtime.c @@ -711,8 +711,19 @@ int TVMGraphRuntime_GetOutput(TVMGraphRuntime* runtime, const int32_t idx, DLTen } void TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) { + TVMPackedFunc lookup_linked_param; + int lookup_linked_param_valid; uint32_t idx; + { + TVMArgs temp_args; + temp_args.values[0].v_int64 = 0; + temp_args.tcodes[0] = kTVMArgInt; + temp_args.values_count = 1; + lookup_linked_param_valid = + (TVMPackedFunc_InitModuleFunc(&lookup_linked_param, runtime->module_handle, "_lookup_linked_param", &temp_args) == 0); + } + // Grab saved optimization plan from graph. TVMGraphRuntimeGraphAttr* attrs = &(runtime->attrs); DLDataType* vtype = vmalloc(sizeof(DLDataType) * attrs->dltype_count); @@ -739,6 +750,7 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) { if (sid >= pool_entry_count) { pool_entry_count = sid + 1; } + pool_entry[sid].entry_id = idx; pool_entry[sid].size = MAX(pool_entry[sid].size, bytes); pool_entry[sid].device_type = device_type; } @@ -746,17 +758,36 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) { // Allocate the space. for (idx = 0; idx < pool_entry_count; idx++) { runtime->storage_pool = - vrealloc(runtime->storage_pool, sizeof(TVMNDArray) * (runtime->storage_pool_count + 1)); + vrealloc(runtime->storage_pool, + sizeof(TVMGraphRuntimeStorageEntry) * (runtime->storage_pool_count + 1)); TVMGraphRuntimePoolEntry pit = pool_entry[idx]; - int64_t shape[TVM_CRT_MAX_NDIM] = { - 0, - }; TVMContext ctx = runtime->ctxs[0]; - DLDataType dtype = {kDLFloat, 32, 1}; - shape[0] = (pit.size + 3) / 4; - runtime->storage_pool[runtime->storage_pool_count] = TVMNDArray_Empty(1, shape, dtype, ctx); - CHECK_NE(runtime->storage_pool[runtime->storage_pool_count].dl_tensor.data, 0, - "fail to create storage_pool with idx=%d\n", idx); + uint8_t did_find_linked_param = 0; + if (lookup_linked_param_valid) { + lookup_linked_param.args.values[0].v_int64 = idx; + if (lookup_linked_param.Call(&lookup_linked_param) == 0) { + runtime->storage_pool[runtime->storage_pool_count].is_linked_param = 1; + DLTensor* tensor = &runtime->storage_pool[runtime->storage_pool_count].array.dl_tensor; + tensor->data = lookup_linked_param.ret_value.values[0].v_handle; + tensor->ctx = ctx; + tensor->ndim = attrs->ndim[pit.entry_id]; + tensor->shape = attrs->shape + idx * TVM_CRT_MAX_NDIM; + tensor->strides = NULL; + tensor->byte_offset = 0; + did_find_linked_param = 1; + } + } + if (did_find_linked_param == 0) { + int64_t shape[TVM_CRT_MAX_NDIM] = { + 0, + }; + DLDataType dtype = {kDLFloat, 32, 1}; + shape[0] = (pit.size + 3) / 4; + runtime->storage_pool[runtime->storage_pool_count].is_linked_param = 0; + runtime->storage_pool[runtime->storage_pool_count].array = TVMNDArray_Empty(1, shape, dtype, ctx); + CHECK_NE(runtime->storage_pool[runtime->storage_pool_count].array.dl_tensor.data, 0, + "fail to create storage_pool with idx=%d\n", idx); + } runtime->storage_pool_count++; } @@ -769,7 +800,7 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) { uint32_t storage_id = attrs->storage_id[idx]; CHECK(storage_id < runtime->storage_pool_count); runtime->data_entry[idx] = - TVMNDArray_CreateView(&(runtime->storage_pool[storage_id]), + TVMNDArray_CreateView(&(runtime->storage_pool[storage_id].array), attrs->shape + idx * TVM_CRT_MAX_NDIM, attrs->ndim[idx], vtype[idx]); CHECK_NE(runtime->data_entry[idx].dl_tensor.data, 0, "fail to create for node with idx=%d, storage_id=%u\n", idx, storage_id); @@ -876,27 +907,28 @@ int32_t TVMGraphRuntime_CreateTVMOp(TVMGraphRuntime* runtime, const TVMOpParam* /*! * \brief Initialize the graph executor with graph and context. * \param graph_json The execution graph. - * \param module The module containing the compiled functions for the host + * \param module_handle The module containing the compiled functions for the host * processor. * \param ctxs The context of the host and devices where graph nodes will be * executed on. */ -void TVMGraphRuntime_Init(TVMGraphRuntime* runtime, const char* graph_json, const TVMModule* module, - const TVMContext* ctxs) { +void TVMGraphRuntime_Init(TVMGraphRuntime* runtime, const char* graph_json, + TVMModuleHandle module_handle, const TVMContext* ctxs) { JSONReader reader = JSONReader_Create(graph_json); TVMGraphRuntime_Load(runtime, &reader); JSONReader_Release(&reader); + runtime->module_handle = module_handle; runtime->ctxs[0] = ctxs[0]; TVMGraphRuntime_SetupStorage(runtime); TVMGraphRuntime_SetupOpExecs(runtime); } -TVMGraphRuntime* TVMGraphRuntime_Create(const char* sym_json, const TVMModule* m, +TVMGraphRuntime* TVMGraphRuntime_Create(const char* sym_json, TVMModuleHandle module_handle, const TVMContext* ctxs) { TVMGraphRuntime* runtime = (TVMGraphRuntime*)vmalloc(sizeof(TVMGraphRuntime)); // NOLINT(*) memset(runtime, 0, sizeof(TVMGraphRuntime)); // init - TVMGraphRuntime_Init(runtime, sym_json, m, ctxs); + TVMGraphRuntime_Init(runtime, sym_json, module_handle, ctxs); return runtime; } @@ -909,7 +941,9 @@ void TVMGraphRuntime_Release(TVMGraphRuntime** pptr) { vfree(runtime->nodes); TVMGraphRuntimeGraphAttr_Release(&(runtime->attrs)); for (idx = 0; idx < runtime->storage_pool_count; ++idx) { - TVMNDArray_Release(&(runtime->storage_pool[idx])); + if (runtime->storage_pool[idx].is_linked_param == 0) { + TVMNDArray_Release(&(runtime->storage_pool[idx].array)); + } } for (idx = 0; idx < runtime->data_entry_count; ++idx) { vfree(runtime->data_entry[idx].dl_tensor.shape); diff --git a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h index ee095325deef..8e0faaa4f199 100644 --- a/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h +++ b/src/runtime/crt/include/tvm/runtime/crt/internal/graph_runtime/graph_runtime.h @@ -33,6 +33,7 @@ typedef struct TVMGraphRuntimePoolEntry { size_t size; int device_type; + int entry_id; } TVMGraphRuntimePoolEntry; // Node entry @@ -44,6 +45,12 @@ typedef struct TVMGraphRuntimeNodeEntry { void (*Load)(JSONReader* reader); } TVMGraphRuntimeNodeEntry; +// Storage entry. +typedef struct TVMGraphRuntimeStorageEntry { + uint8_t is_linked_param; + TVMNDArray array; +} TVMGraphRuntimeStorageEntry; + // Node typedef struct TVMGraphRuntimeNode { // operator type in string @@ -87,7 +94,7 @@ typedef struct TVMGraphRuntime { TVMContext ctxs[1]; uint32_t ctxs_count; /*! \brief Common storage pool for all devices. */ - TVMNDArray* storage_pool; + TVMGraphRuntimeStorageEntry* storage_pool; uint32_t storage_pool_count; /*! \brief Data entry of each node. */ TVMNDArray* data_entry; diff --git a/tests/python/unittest/test_link_params.py b/tests/python/unittest/test_link_params.py index 34e5a4a9dd8e..f134db37c36d 100644 --- a/tests/python/unittest/test_link_params.py +++ b/tests/python/unittest/test_link_params.py @@ -371,7 +371,8 @@ def test_crt_link_params(): graph_rt = tvm.contrib.graph_runtime.create( graph_json, rpc_lib, sess.context) - graph_rt.set_input('rand_input', rand_input, **params) + # NOTE: not setting params here. + graph_rt.set_input('rand_input', rand_input) graph_rt.run() linked_output = graph_rt.get_output(0).asnumpy() From b85d90f9519155ba654a45d3a5a548611bf6764d Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 10 Nov 2020 23:12:57 -0800 Subject: [PATCH 09/60] make stm repo work (half done) --- python/tvm/target/target.py | 3 ++- src/runtime/micro/micro_session.cc | 2 -- src/runtime/rpc/rpc_endpoint.cc | 1 - src/target/llvm/codegen_params.cc | 8 ++++---- src/target/source/codegen_c_host.cc | 5 +++-- 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py index ba4a1a2f744e..6ef41748ca5b 100644 --- a/python/tvm/target/target.py +++ b/python/tvm/target/target.py @@ -236,7 +236,8 @@ def micro(model="unknown", options=None): "stm32f746xx": ["-mcpu=cortex-m7"], } opts = _merge_opts( - trans_table[model] + ["-runtime=c", "--system-lib", f"-model={model}"], options + trans_table[model] + ["-runtime=c", "--system-lib", "--link-params", f"-model={model}"], + options ) # NOTE: in the future, the default micro target will be LLVM except when diff --git a/src/runtime/micro/micro_session.cc b/src/runtime/micro/micro_session.cc index 38252bc27745..662597086d8a 100644 --- a/src/runtime/micro/micro_session.cc +++ b/src/runtime/micro/micro_session.cc @@ -121,7 +121,6 @@ class MicroTransportChannel : public RPCChannel { ::std::string chunk; if (timeout != ::std::chrono::microseconds::zero()) { - LOG(INFO) << "ReceiveUntil no-timeout " << timeout.count() << " us"; ::std::chrono::microseconds iter_timeout{ ::std::max(::std::chrono::microseconds{0}, ::std::chrono::duration_cast<::std::chrono::microseconds>( @@ -289,7 +288,6 @@ class MicroTransportChannel : public RPCChannel { }; TVM_REGISTER_GLOBAL("micro._rpc_connect").set_body([](TVMArgs args, TVMRetValue* rv) { - LOG(INFO) << "MICRO RPC CONNECT " << uint64_t(args[3]) << ", " << uint64_t(args[4]) << ", " << uint64_t(args[5]); MicroTransportChannel* micro_channel = new MicroTransportChannel(args[1], args[2], ::std::chrono::microseconds(uint64_t(args[3])), ::std::chrono::microseconds(uint64_t(args[4])), diff --git a/src/runtime/rpc/rpc_endpoint.cc b/src/runtime/rpc/rpc_endpoint.cc index ef7b34079606..fbdd93fb4f62 100644 --- a/src/runtime/rpc/rpc_endpoint.cc +++ b/src/runtime/rpc/rpc_endpoint.cc @@ -370,7 +370,6 @@ class RPCEndpoint::EventHandler : public dmlc::Stream { */ void HandleReturn(RPCCode code, RPCSession::FEncodeReturn setreturn) { TVMArgs args = RecvPackedSeq(); - LOG(INFO) << "Receive PackedSeq " << args.size(); if (code == RPCCode::kException) { // switch to the state before sending exception. diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index 20a1efbb575e..9c0b979044f4 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -322,10 +322,10 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& if (arr_type.bits() == 32) { for (int i = 0; i < num_elements; i++) { float elem = static_cast(tensor->dl_tensor.data)[i]; - if (isinf(elem)) { + if (std::isinf(elem)) { // C99 standard. os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; - } else if (isnan(elem)) { + } else if (std::isnan(elem)) { // GNU extension, implemenatation-dependent. os << std::setw(one_element_size_bytes) << "NAN"; } else { @@ -340,10 +340,10 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& } else if (arr_type.bits() == 64) { for (int i = 0; i < num_elements; i++) { double elem = static_cast(tensor->dl_tensor.data)[i]; - if (isinf(elem)) { + if (std::isinf(elem)) { // C99 standard. os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; - } else if (isnan(elem)) { + } else if (std::isnan(elem)) { // GNU extension, implemenatation-dependent. os << std::setw(one_element_size_bytes) << "NAN"; } else { diff --git a/src/target/source/codegen_c_host.cc b/src/target/source/codegen_c_host.cc index 3896e37d5b5d..9a524b0428cc 100644 --- a/src/target/source/codegen_c_host.cc +++ b/src/target/source/codegen_c_host.cc @@ -75,7 +75,8 @@ void CodeGenCHost::LinkParameters(Map params) { function_names_.emplace_back(tvm::runtime::symbol::tvm_lookup_linked_param); for (auto kv : params) { - decl_stream << "#ifdef __cplusplus\n" + decl_stream << "\n" + << "#ifdef __cplusplus\n" << "extern \"C\" {\n" << "#endif\n" << "static const "; @@ -92,7 +93,7 @@ void CodeGenCHost::LinkParameters(Map params) { << "} // extern \"C\"\n" << "#endif\n"; stream << " case " << kv.second->id << ":\n" - << " ((int64_t*)out_ret_value)[0] = (int64_t) " << ::tvm::runtime::symbol::tvm_param_prefix << kv.first << ";\n" + << " ((uint64_t*)out_ret_value)[0] = (uint64_t) (uintptr_t) " << ::tvm::runtime::symbol::tvm_param_prefix << kv.first << ";\n" << " out_ret_tcode[0] = " << kTVMOpaqueHandle << ";\n" << " return 0;\n"; } From bbb6e806832da56c9b2d3e780bf6a247805e4f50 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Thu, 12 Nov 2020 22:41:32 -0800 Subject: [PATCH 10/60] works-ish on micro --- python/tvm/micro/session.py | 46 +++++++- .../graph_runtime_module.c | 6 +- .../graph/debug/graph_runtime_debug.cc | 15 ++- src/runtime/graph/graph_runtime.cc | 102 +++++++++++++----- src/runtime/graph/graph_runtime.h | 14 ++- src/runtime/graph/graph_runtime_factory.cc | 2 +- src/runtime/rpc/rpc_module.cc | 56 +++++----- src/target/llvm/codegen_llvm.cc | 10 +- src/target/source/codegen_c_host.cc | 3 +- 9 files changed, 185 insertions(+), 69 deletions(-) diff --git a/python/tvm/micro/session.py b/python/tvm/micro/session.py index 3f84f3beab5b..5be7d59a143f 100644 --- a/python/tvm/micro/session.py +++ b/python/tvm/micro/session.py @@ -154,6 +154,20 @@ def __exit__(self, exc_type, exc_value, exc_traceback): self.transport.__exit__(exc_type, exc_value, exc_traceback) +def lookup_remote_linked_param(mod, storage_id, template_tensor, ctx): + try: + lookup_linked_param = mod.get_function('_lookup_linked_param') + except KeyError: + return None + + remote_data = lookup_linked_param(storage_id) + if remote_data is None: + return None + + return get_global_func('tvm.rpc.NDArrayFromRemoteOpaqueHandle')( + mod, remote_data, template_tensor, ctx, lambda: None) + + def create_local_graph_runtime(graph_json_str, mod, ctx): """Create a local graph runtime driving execution on the remote CPU context given. @@ -175,4 +189,34 @@ def create_local_graph_runtime(graph_json_str, mod, ctx): """ device_type_id = [ctx.device_type, ctx.device_id] fcreate = get_global_func("tvm.graph_runtime.create") - return graph_runtime.GraphModule(fcreate(graph_json_str, mod, *device_type_id)) + return graph_runtime.GraphModule(fcreate(graph_json_str, mod, lookup_remote_linked_param, + *device_type_id)) + + +def create_local_debug_runtime(graph_json_str, mod, ctx, dump_root=None): + """Create a local debug runtime driving execution on the remote CPU context given. + + Parameters + ---------- + graph_json_str : str + A string containing the graph representation. + + mod : tvm.runtime.Module + The remote module containing functions in graph_json_str. + + ctx : tvm.Context + The remote CPU execution context. + + dump_root : Optional[str] + If given, passed as dump_root= to GraphModuleDebug. + + Returns + ------- + tvm.contrib.GraphRuntime : + A local graph runtime instance that executes on the remote device. + """ + device_type_id = [ctx.device_type, ctx.device_id] + fcreate = get_global_func("tvm.graph_runtime_debug.create") + return debug_runtime.GraphModuleDebug( + fcreate(graph_json_str, mod, lookup_remote_linked_param, *device_type_id), + [ctx], graph_json_str, dump_root=dump_root) diff --git a/src/runtime/crt/graph_runtime_module/graph_runtime_module.c b/src/runtime/crt/graph_runtime_module/graph_runtime_module.c index a8de71e33f9d..98e4693a4fb6 100644 --- a/src/runtime/crt/graph_runtime_module/graph_runtime_module.c +++ b/src/runtime/crt/graph_runtime_module/graph_runtime_module.c @@ -88,7 +88,8 @@ int32_t TVMGraphRuntimeModule_GetInput(TVMValue* args, int* tcodes, int nargs, T uint32_t eid = TVMGraphRuntime_GetEntryId( graph_runtime.runtime, graph_runtime.runtime->input_nodes[index], 0); ret_values[0].v_handle = (void*) &graph_runtime.runtime->data_entry[eid].dl_tensor; - ret_tcodes[0] = kTVMNDArrayHandle; + ret_tcodes[0] = kTVMOpaqueHandle; + //ret_tcodes[0] = kTVMNDArrayHandle; return 0; } @@ -131,7 +132,8 @@ int32_t TVMGraphRuntimeModule_GetOutput(TVMValue* args, int* tcodes, int nargs, uint32_t eid = TVMGraphRuntime_GetEntryId(graph_runtime.runtime, nid, index); ret_values[0].v_handle = (void*) &(graph_runtime.runtime->data_entry[eid].dl_tensor); - ret_tcodes[0] = kTVMNDArrayHandle; +// ret_tcodes[0] = kTVMNDArrayHandle; + ret_tcodes[0] = kTVMOpaqueHandle; return 0; } diff --git a/src/runtime/graph/debug/graph_runtime_debug.cc b/src/runtime/graph/debug/graph_runtime_debug.cc index 3e9ff4f279e7..d02a6d9a0d64 100644 --- a/src/runtime/graph/debug/graph_runtime_debug.cc +++ b/src/runtime/graph/debug/graph_runtime_debug.cc @@ -202,9 +202,10 @@ PackedFunc GraphRuntimeDebug::GetFunction(const std::string& name, * \param ctxs All devices contexts. */ Module GraphRuntimeDebugCreate(const std::string& sym_json, const tvm::runtime::Module& m, - const std::vector& ctxs) { + const std::vector& ctxs, + PackedFunc lookup_linked_param_func) { auto exec = make_object(); - exec->Init(sym_json, m, ctxs); + exec->Init(sym_json, m, ctxs, lookup_linked_param_func); return Module(exec); } @@ -212,7 +213,15 @@ TVM_REGISTER_GLOBAL("tvm.graph_runtime_debug.create").set_body([](TVMArgs args, ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_runtime.create is " "at least 4, but it has " << args.num_args; - *rv = GraphRuntimeDebugCreate(args[0], args[1], GetAllContext(args)); + PackedFunc lookup_linked_param_func; + int ctx_start_arg = 2; + if (args[2].type_code() == kTVMPackedFuncHandle) { + lookup_linked_param_func = args[2]; + ctx_start_arg++; + } + + *rv = GraphRuntimeDebugCreate(args[0], args[1], GetAllContext(args, ctx_start_arg), + lookup_linked_param_func); }); } // namespace runtime } // namespace tvm diff --git a/src/runtime/graph/graph_runtime.cc b/src/runtime/graph/graph_runtime.cc index c64f773f5157..7c34d9626181 100644 --- a/src/runtime/graph/graph_runtime.cc +++ b/src/runtime/graph/graph_runtime.cc @@ -64,14 +64,19 @@ void GraphRuntime::Run() { * processor. * \param ctxs The context of the host and devices where graph nodes will be * executed on. + * \param lookup_linked_param_func Linked parameter lookup function. */ void GraphRuntime::Init(const std::string& graph_json, tvm::runtime::Module module, - const std::vector& ctxs) { + const std::vector& ctxs, PackedFunc lookup_linked_param_func) { std::istringstream is(graph_json); dmlc::JSONReader reader(&is); this->Load(&reader); module_ = module; ctxs_ = ctxs; + lookup_linked_param_ = lookup_linked_param_func; + if (lookup_linked_param_ == nullptr) { + lookup_linked_param_ = PackedFunc(&GraphRuntime::DefaultLookupLinkedParam); + } this->SetupStorage(); this->SetupOpExecs(); for (size_t i = 0; i < input_nodes_.size(); i++) { @@ -249,12 +254,47 @@ void GraphRuntime::PreAllocatedDLTensorDeleter(DLManagedTensor* tensor) { delete reinterpret_cast(tensor); } -void GraphRuntime::SetupStorage() { +void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) { + Module mod = args[0]; + int64_t storage_id = args[1]; + NDArray template_tensor = args[2]; + TVMContext ctx = args[3]; // Get pre-linked parameter lookup function, if it was generated. When pf == nullptr, no linked // params are present. - tvm::runtime::PackedFunc pf = module_.GetFunction( + tvm::runtime::PackedFunc pf = mod.GetFunction( ::tvm::runtime::symbol::tvm_lookup_linked_param, true); + if (pf == nullptr) { + *rv = nullptr; + return; + } + + TVMRetValue opaque_handle = pf(storage_id); + if (opaque_handle.type_code() == kTVMNullptr) { + *rv = nullptr; + return; + } + + std::unique_ptr container{new NDArray::Container( + static_cast(opaque_handle), template_tensor.Shape(), template_tensor.DataType(), ctx)}; + *rv = NDArray(GetObjectPtr(container.release())); +} + +std::string List2String(std::vector shape) { + if (shape.size() == 0) { + return "[]"; + } + std::stringstream ss; + ss << "[" << shape[0]; + for (int i = 1; i < shape.size(); i++) { + ss << ", " << shape[i]; + } + ss << "]"; + return ss.str(); +} + + +void GraphRuntime::SetupStorage() { // Grab saved optimization plan from graph. std::vector vtype; for (const std::string& s_type : attrs_.dltype) { @@ -288,12 +328,16 @@ void GraphRuntime::SetupStorage() { ICHECK(pool_entry[sid].device_type == -1 || pool_entry[sid].device_type == device_type) << "The same pool entry cannot be assigned to multiple devices"; } - if (pf != nullptr && pool_entry[sid].pre_linked_param == nullptr) { - try { - pool_entry[sid].pre_linked_param = pf(sid); - } catch (std::runtime_error& e) { - // Indicates this storage_id is not pre-linked. - } + TVMRetValue lookup_rv; + { + std::vector shape_vec{attrs_.shape[i].begin(), attrs_.shape[i].end()}; + DLTensor template_tensor{ + nullptr, TVMContext{kDLCPU, 0}, static_cast(shape_vec.size()), vtype[i], shape_vec.data(), nullptr, 0}; + lookup_rv = lookup_linked_param_( + module_, sid, &template_tensor, ctxs_[0]); + } + if (lookup_rv.type_code() != kTVMNullptr) { + pool_entry[sid].linked_param = lookup_rv; } pool_entry[sid].param_data_entry = i; pool_entry[sid].size = std::max(pool_entry[sid].size, bytes); @@ -308,21 +352,11 @@ void GraphRuntime::SetupStorage() { return pit.device_type == static_cast(c.device_type); }); TVMContext ctx = cit == ctxs_.end() ? ctxs_[0] : *cit; - if (pit.pre_linked_param != nullptr) { - LOG(INFO) << "param " << pit.param_data_entry << " pre-loaded!"; - auto param_shape = &attrs_.shape[pit.param_data_entry]; - DLManagedTensor* param_tensor = new DLManagedTensor{ - {pit.pre_linked_param, ctx, static_cast(param_shape->size()), - vtype[pit.param_data_entry], param_shape->data(), nullptr, 0}, - nullptr, - PreAllocatedDLTensorDeleter}; - - storage_pool_.push_back(NDArray::FromDLPack(param_tensor)); - LOG(INFO) << "Loaded data entry " << pit.param_data_entry - << " from pre-linked blob: " << param_tensor->dl_tensor.data; - + if (pit.linked_param.defined()) { + LOG(INFO) << "param " << storage_pool_.size() << " pre-loaded!"; + storage_pool_.push_back(pit.linked_param); } else { - LOG(INFO) << "param " << pit.param_data_entry << " blank!"; + LOG(INFO) << "param " << storage_pool_.size() << " blank!"; std::vector shape; shape.push_back(static_cast(pit.size + 3) / 4); storage_pool_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx)); @@ -337,6 +371,9 @@ void GraphRuntime::SetupStorage() { for (size_t i = 0; i < data_entry_.size(); ++i) { int storage_id = attrs_.storage_id[i]; ICHECK_LT(static_cast(storage_id), storage_pool_.size()); + LOG(INFO) << "sid " << i << ": (" << List2String(storage_pool_[storage_id].Shape()) + << ", dtype=" << storage_pool_[storage_id].DataType() << ")" + << ": setup view: " << List2String(attrs_.shape[i]); data_entry_[i] = storage_pool_[storage_id].CreateView(attrs_.shape[i], vtype[i]); const DLTensor* tmp = data_entry_[i].operator->(); @@ -497,18 +534,19 @@ PackedFunc GraphRuntime::GetFunction(const std::string& name, } Module GraphRuntimeCreate(const std::string& sym_json, const tvm::runtime::Module& m, - const std::vector& ctxs) { + const std::vector& ctxs, + const PackedFunc lookup_linked_param_func) { auto exec = make_object(); - exec->Init(sym_json, m, ctxs); + exec->Init(sym_json, m, ctxs, lookup_linked_param_func); return Module(exec); } // Get all context for the host and other runtime devices. -std::vector GetAllContext(const TVMArgs& args) { +std::vector GetAllContext(const TVMArgs& args, int ctx_start_arg) { // Reserve the first item as the fallback device. std::vector ret; TVMContext ctx; - for (int i = 2; i < args.num_args; i += 2) { + for (int i = ctx_start_arg; i < args.num_args; i += 2) { int dev_type = args[i]; ctx.device_type = static_cast(dev_type); ctx.device_id = args[i + 1]; @@ -526,8 +564,14 @@ TVM_REGISTER_GLOBAL("tvm.graph_runtime.create").set_body([](TVMArgs args, TVMRet ICHECK_GE(args.num_args, 4) << "The expected number of arguments for graph_runtime.create is " "at least 4, but it has " << args.num_args; - const auto& contexts = GetAllContext(args); - *rv = GraphRuntimeCreate(args[0], args[1], contexts); + PackedFunc lookup_linked_param_func; + int ctx_start_arg = 2; + if (args[2].type_code() == kTVMPackedFuncHandle) { + lookup_linked_param_func = args[2]; + ctx_start_arg++; + } + const auto& contexts = GetAllContext(args, ctx_start_arg); + *rv = GraphRuntimeCreate(args[0], args[1], contexts, lookup_linked_param_func); }); } // namespace runtime } // namespace tvm diff --git a/src/runtime/graph/graph_runtime.h b/src/runtime/graph/graph_runtime.h index 9f0b0962333a..9e95dfc9bf96 100644 --- a/src/runtime/graph/graph_runtime.h +++ b/src/runtime/graph/graph_runtime.h @@ -94,10 +94,13 @@ class TVM_DLL GraphRuntime : public ModuleNode { * processor. * \param ctxs The context of the host and devices where graph nodes will be * executed on. + * \param lookup_linked_param_func If given, a PackedFunc invoked to lookup linked parameters + * by storage_id. If not given, linked parameters are looked-up using an internal implementation, + * which is not compatible with RPCModules. */ void Init(const std::string& graph_json, tvm::runtime::Module module, - const std::vector& ctxs); + const std::vector& ctxs, const PackedFunc lookup_linked_param_func); /*! * \brief Get the input index given the name of input. @@ -182,8 +185,8 @@ class TVM_DLL GraphRuntime : public ModuleNode { struct PoolEntry { size_t size; int device_type; - void* pre_linked_param; int param_data_entry; + NDArray linked_param; // PoolEntry(int s, int dev_type, void* pre_linked_param) : // size(s), device_type(dev_type), pre_linked_param(std::move(pre_linked_param)) {} }; @@ -366,6 +369,8 @@ class TVM_DLL GraphRuntime : public ModuleNode { } ICHECK_EQ(bitmask, 1 | 2 | 4 | 8 | 16) << "invalid format"; } + /*! \brief PackedFunc to lookup a linked paramter from a local Module. */ + static void DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv); /*! \brief Delete pre-allocated DLTensor. */ static void PreAllocatedDLTensorDeleter(DLManagedTensor* tensor); /*! \brief Setup the temporal storage */ @@ -413,9 +418,12 @@ class TVM_DLL GraphRuntime : public ModuleNode { std::vector data_alignment_; /*! \brief Operator on each node. */ std::vector> op_execs_; + /*! \brief Linked parameter lookup function. */ + PackedFunc lookup_linked_param_; + }; -std::vector GetAllContext(const TVMArgs& args); +std::vector GetAllContext(const TVMArgs& args, int ctx_start_arg); } // namespace runtime } // namespace tvm diff --git a/src/runtime/graph/graph_runtime_factory.cc b/src/runtime/graph/graph_runtime_factory.cc index 632a25c987bc..2c055e16cc9f 100644 --- a/src/runtime/graph/graph_runtime_factory.cc +++ b/src/runtime/graph/graph_runtime_factory.cc @@ -97,7 +97,7 @@ void GraphRuntimeFactory::SaveToBinary(dmlc::Stream* stream) { Module GraphRuntimeFactory::RuntimeCreate(const std::vector& ctxs) { auto exec = make_object(); - exec->Init(this->graph_json_, this->imports_[0], ctxs); + exec->Init(this->graph_json_, this->imports_[0], ctxs, PackedFunc()); // set params SetParams(exec.get(), this->params_); return Module(exec); diff --git a/src/runtime/rpc/rpc_module.cc b/src/runtime/rpc/rpc_module.cc index 7f810a229887..0ddd13572949 100644 --- a/src/runtime/rpc/rpc_module.cc +++ b/src/runtime/rpc/rpc_module.cc @@ -22,6 +22,7 @@ * \brief RPC runtime module. */ #include +#include #include #include @@ -48,37 +49,29 @@ static void RemoteNDArrayDeleter(Object* obj) { /*! * \brief Build a local NDArray with remote backing storage. + * \param sess the RPCSession which owns the given handle. * \param handle A pointer valid on the remote end which should form the `data` field of the * underlying DLTensor. - * \param shape The shape field of this DLTensor. - * \param ndim The rank of this DLTensor. + * \param template_tensor An empty DLTensor whose shape and dtype fields are used to fill the newly + * created array. Needed because it's difficult to pass a shape vector as a PackedFunc arg. * \param ctx Remote context used with this tensor. Must have non-zero RPCSessMask. * \param deleter A function invoked when the local NDArray object is no longer used. If `handle` * needs to be explicitly deleted after the NDArray is freed, this function should do that. * \param deleter_ctx An opaque pointer passed to deleter to identify the tensor being deleted. */ -NDArray NDArrayFromRemoteOpaqueHandle(void* handle, int64_t* shape, int64_t ndim, DLContext* ctx, FDeleter deleter, void* deleter_ctx) { - NDArray::Container* data = new NDArray::Container(); +NDArray NDArrayFromRemoteOpaqueHandle(std::shared_ptr sess, void* handle, DLTensor* template_tensor, TVMContext ctx, ADTObj::FDeleter deleter, void* deleter_ctx) { + ICHECK_EQ(sess->table_index(), GetRPCSessionIndex(ctx)) + << "The TVMContext given does not belong to the given session"; + RemoteSpace* space = new RemoteSpace(); + space->sess = sess; + space->data = handle; + std::vector shape_vec{template_tensor->shape, + template_tensor->shape + template_tensor->ndim}; + NDArray::Container* data = new NDArray::Container( + static_cast(space), std::move(shape_vec), template_tensor->dtype, ctx); data->manager_ctx = deleter_ctx; data->SetDeleter(deleter); - RemoteSpace* space = new RemoteSpace(); - space->sess = sess_; - space->data = tensor->data; - data->dl_tensor.data = space; - NDArray ret(GetObjectPtr(data)); - // RAII now in effect - data->shape_ = std::vector(tensor->shape, tensor->shape + tensor->ndim); - data->dl_tensor.shape = dmlc::BeginPtr(data->shape_); - data->dl_tensor.ndim = static_cast(data->shape_.size()); - // setup dtype - data->dl_tensor.dtype = tensor->dtype; - // setup ctx - data->dl_tensor.ctx = ctx; - // check strides. - ICHECK(tensor->strides == nullptr); - // setup byteoffset - data->dl_tensor.byte_offset = tensor->byte_offset; - return ret; + return NDArray(GetObjectPtr(data)); } @@ -291,7 +284,7 @@ void RPCWrappedFunc::WrapRemoteReturnToValue(TVMArgs args, TVMRetValue* rv) cons ICHECK_EQ(args.size(), 3); DLTensor* tensor = args[1]; void* nd_handle = args[2]; - *rv = NDArrayFromRemoteOpaqueHandle(tensor->data, tensor->shape, tensor->ndim, AddRPCSessionMask(ctx, sess_->table_index()), RemoteNDArrayDeleter, nd_handle); + *rv = NDArrayFromRemoteOpaqueHandle(sess_, tensor->data, tensor, AddRPCSessionMask(tensor->ctx, sess_->table_index()), RemoteNDArrayDeleter, nd_handle); } else { ICHECK_EQ(args.size(), 2); *rv = args[1]; @@ -477,11 +470,20 @@ TVM_REGISTER_GLOBAL("rpc.SessTableIndex").set_body([](TVMArgs args, TVMRetValue* *rv = static_cast(m.operator->())->sess()->table_index(); }); -TVM_REGISTER_GLOBAL("tvm.rpc.wrap_remote_ndarray").set_body_typed([](void* remote_array, PackedFunc deleter) { - *rv = WrapRemoteNDArray(remote_array, [pf](Object* ctx) { - pf(); +TVM_REGISTER_GLOBAL("tvm.rpc.NDArrayFromRemoteOpaqueHandle").set_body_typed( + [](Module mod, void* remote_array, DLTensor* template_tensor, TVMContext ctx, PackedFunc deleter) -> NDArray { +// auto func = new std::function([deleter]() -> void { +// deleter(); +// }); + return NDArrayFromRemoteOpaqueHandle( + RPCModuleGetSession(mod), remote_array, template_tensor, ctx, + [](Object* context) { +// auto container = static_cast(context); +// auto cb_func = reinterpret_cast*>(container->manager_ctx); +// (*cb_func)(); +// delete cb_func; + }, nullptr);//(void*) func); }); -}); } // namespace runtime } // namespace tvm diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc index 20cbdf83b971..1b25a691ee4a 100644 --- a/src/target/llvm/codegen_llvm.cc +++ b/src/target/llvm/codegen_llvm.cc @@ -228,7 +228,14 @@ void CodeGenLLVM::LinkParameters(const Map params) { llvm::SwitchInst* switch_inst = builder_->CreateSwitch(sid, default_block, params.size() + 1); builder_->SetInsertPoint(default_block); - builder_->CreateRet(ConstInt32(kTvmErrorGeneratedInvalidStorageId)); + { + auto ret_types_array = builder_->CreateBitCast( + &function->arg_begin()[4], llvm::ArrayType::get(t_int_, 1)); + builder_->CreateStore( + llvm::ConstantInt::get(t_int_, kTVMNullptr), + builder_->CreateGEP(ret_types_array, zero_index_list)); + builder_->CreateRet(ConstInt32(kTvmErrorNoError)); + } llvm::raw_os_ostream os{std::cout}; @@ -249,7 +256,6 @@ void CodeGenLLVM::LinkParameters(const Map params) { auto retval_array = builder_->CreateBitCast( &function->arg_begin()[3], llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)); builder_->CreateStore( -// param_symbol, builder_->CreatePointerCast(param_symbol, t_void_->getPointerTo(GetGlobalAddressSpace())), builder_->CreateGEP(retval_array, zero_index_list)); auto ret_types_array = builder_->CreateBitCast( diff --git a/src/target/source/codegen_c_host.cc b/src/target/source/codegen_c_host.cc index 9a524b0428cc..915d43cffb13 100644 --- a/src/target/source/codegen_c_host.cc +++ b/src/target/source/codegen_c_host.cc @@ -71,7 +71,8 @@ void CodeGenCHost::LinkParameters(Map params) { << tvm::runtime::symbol::tvm_lookup_linked_param; stream << " switch (((int64_t*) args)[0]) {\n" << " default:\n" - << " return " << kTvmErrorGeneratedInvalidStorageId << ";\n"; + << " out_ret_tcode[0] = " << kTVMNullptr << ";\n" + << " return 0;\n"; function_names_.emplace_back(tvm::runtime::symbol::tvm_lookup_linked_param); for (auto kv : params) { From 6e19b2593a332bc9112e2bb49ecca35104c64261 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Fri, 13 Nov 2020 09:34:56 -0800 Subject: [PATCH 11/60] final changes for link-params --- src/runtime/crt/common/memory.c | 13 ++++++------- src/runtime/crt/graph_runtime/graph_runtime.c | 7 +++++-- .../crt/graph_runtime_module/graph_runtime_module.c | 6 ++---- src/runtime/graph/graph_runtime.cc | 8 ++++++-- tests/python/unittest/test_link_params.py | 3 +-- 5 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/runtime/crt/common/memory.c b/src/runtime/crt/common/memory.c index 68cad3645146..646ba46feecb 100644 --- a/src/runtime/crt/common/memory.c +++ b/src/runtime/crt/common/memory.c @@ -151,8 +151,8 @@ void* MemoryManager_Alloc(MemoryManager* mgr, tvm_index_t size) { } vleak_size++; #if TVM_CRT_DEBUG > 1 - printf("allocate: addr=%p, start=%" PRId64 "/%zu, npage=%" PRId64 ", vleak=%d\n", data, start, - ptable->max_pages, npage, vleak_size); + TVMLogf("allocate: addr=%p, start=%" PRId64 "/%zu, npage=%" PRId64 ", vleak=%d\n", data, start, + ptable->max_pages, npage, vleak_size); #endif // TVM_CRT_DEBUG return data; } @@ -229,9 +229,8 @@ void* MemoryManager_Realloc(MemoryManager* mgr, void* ptr, tvm_index_t size) { vleak_size++; } #if TVM_CRT_DEBUG > 1 - printf("reallocate: addr=%p, start=%" PRId64 "/%zu, npage=%" PRId64 ", vleak=%d, size=%" PRId64 - "\n", - data, start, mgr->ptable.max_pages, npage, vleak_size, size); + TVMLogf("reallocate: addr=%p, start=%" PRId64 "/%zu, npage=%" PRId64 ", vleak=%d, size=%zu", + data, start, mgr->ptable.max_pages, npage, vleak_size, size); #endif // TVM_CRT_DEBUG return data; } @@ -251,8 +250,8 @@ void MemoryManager_Free(MemoryManager* mgr, void* ptr) { free_map->insert(free_map, p->num_pages, p); vleak_size--; #if TVM_CRT_DEBUG > 1 - printf("release: addr=%p, start=%" PRId64 "/%zu, npage=%" PRId64 ", vleak=%d\n", ptr, - entry->page.ptable_begin, mgr->ptable.max_pages, entry->page.num_pages, vleak_size); + TVMLogf("release: addr=%p, start=%" PRId64 "/%zu, npage=%zu, vleak=%d", ptr, + entry->page.ptable_begin, mgr->ptable.max_pages, entry->page.num_pages, vleak_size); #endif // TVM_CRT_DEBUG } diff --git a/src/runtime/crt/graph_runtime/graph_runtime.c b/src/runtime/crt/graph_runtime/graph_runtime.c index 03d81aa184f8..c5dc792cf315 100644 --- a/src/runtime/crt/graph_runtime/graph_runtime.c +++ b/src/runtime/crt/graph_runtime/graph_runtime.c @@ -765,10 +765,13 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) { uint8_t did_find_linked_param = 0; if (lookup_linked_param_valid) { lookup_linked_param.args.values[0].v_int64 = idx; - if (lookup_linked_param.Call(&lookup_linked_param) == 0) { + CHECK_EQ(lookup_linked_param.Call(&lookup_linked_param), 0, "lookup_linked_param"); + + void* linked_param_data = lookup_linked_param.ret_value.values[0].v_handle; + if (linked_param_data != NULL) { runtime->storage_pool[runtime->storage_pool_count].is_linked_param = 1; DLTensor* tensor = &runtime->storage_pool[runtime->storage_pool_count].array.dl_tensor; - tensor->data = lookup_linked_param.ret_value.values[0].v_handle; + tensor->data = linked_param_data; tensor->ctx = ctx; tensor->ndim = attrs->ndim[pit.entry_id]; tensor->shape = attrs->shape + idx * TVM_CRT_MAX_NDIM; diff --git a/src/runtime/crt/graph_runtime_module/graph_runtime_module.c b/src/runtime/crt/graph_runtime_module/graph_runtime_module.c index 98e4693a4fb6..a8de71e33f9d 100644 --- a/src/runtime/crt/graph_runtime_module/graph_runtime_module.c +++ b/src/runtime/crt/graph_runtime_module/graph_runtime_module.c @@ -88,8 +88,7 @@ int32_t TVMGraphRuntimeModule_GetInput(TVMValue* args, int* tcodes, int nargs, T uint32_t eid = TVMGraphRuntime_GetEntryId( graph_runtime.runtime, graph_runtime.runtime->input_nodes[index], 0); ret_values[0].v_handle = (void*) &graph_runtime.runtime->data_entry[eid].dl_tensor; - ret_tcodes[0] = kTVMOpaqueHandle; - //ret_tcodes[0] = kTVMNDArrayHandle; + ret_tcodes[0] = kTVMNDArrayHandle; return 0; } @@ -132,8 +131,7 @@ int32_t TVMGraphRuntimeModule_GetOutput(TVMValue* args, int* tcodes, int nargs, uint32_t eid = TVMGraphRuntime_GetEntryId(graph_runtime.runtime, nid, index); ret_values[0].v_handle = (void*) &(graph_runtime.runtime->data_entry[eid].dl_tensor); -// ret_tcodes[0] = kTVMNDArrayHandle; - ret_tcodes[0] = kTVMOpaqueHandle; + ret_tcodes[0] = kTVMNDArrayHandle; return 0; } diff --git a/src/runtime/graph/graph_runtime.cc b/src/runtime/graph/graph_runtime.cc index 7c34d9626181..423a2d62ea93 100644 --- a/src/runtime/graph/graph_runtime.cc +++ b/src/runtime/graph/graph_runtime.cc @@ -257,7 +257,7 @@ void GraphRuntime::PreAllocatedDLTensorDeleter(DLManagedTensor* tensor) { void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) { Module mod = args[0]; int64_t storage_id = args[1]; - NDArray template_tensor = args[2]; + DLTensor* template_tensor = args[2]; TVMContext ctx = args[3]; // Get pre-linked parameter lookup function, if it was generated. When pf == nullptr, no linked // params are present. @@ -274,8 +274,12 @@ void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) { return; } + std::vector shape_vec{ + template_tensor->shape, + template_tensor->shape + template_tensor->ndim}; + std::unique_ptr container{new NDArray::Container( - static_cast(opaque_handle), template_tensor.Shape(), template_tensor.DataType(), ctx)}; + static_cast(opaque_handle), shape_vec, template_tensor->dtype, ctx)}; *rv = NDArray(GetObjectPtr(container.release())); } diff --git a/tests/python/unittest/test_link_params.py b/tests/python/unittest/test_link_params.py index f134db37c36d..630966892fd4 100644 --- a/tests/python/unittest/test_link_params.py +++ b/tests/python/unittest/test_link_params.py @@ -333,7 +333,6 @@ def _run_unlinked(lib_mod): def test_crt_link_params(): import tvm.micro - for dtype in LINKABLE_DTYPES: mod, param_init = _make_mod_and_params(dtype) rand_input = _make_random_tensor(dtype) @@ -395,4 +394,4 @@ def _run_unlinked(lib): if __name__ == '__main__': - sys.exit(pytest.main(sys.argv[1:])) + sys.exit(pytest.main([__file__] + sys.argv[1:])) From 22a587c1fdb07248e6b9fd87aed2ca4498336326 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Fri, 13 Nov 2020 10:55:43 -0800 Subject: [PATCH 12/60] missed stuff --- include/tvm/tir/function.h | 38 ++++++++++++++++++++++ src/relay/backend/build_module.cc | 21 +++++++----- src/relay/backend/graph_runtime_codegen.cc | 31 ++++++++++++++---- src/target/llvm/codegen_llvm.cc | 4 +++ src/target/llvm/codegen_llvm.h | 12 +++++++ src/target/llvm/llvm_module.cc | 4 +++ src/target/target_kind.cc | 2 ++ src/tir/ir/function.cc | 7 ++++ 8 files changed, 105 insertions(+), 14 deletions(-) diff --git a/include/tvm/tir/function.h b/include/tvm/tir/function.h index 64dbb5cf8ec3..ecc0e672749a 100644 --- a/include/tvm/tir/function.h +++ b/include/tvm/tir/function.h @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -150,6 +151,32 @@ class PrimFunc : public BaseFunc { TVM_DEFINE_OBJECT_REF_COW_METHOD(PrimFuncNode); }; +class LinkedParamNode : public Object { + public: + /*! \brief Unique numeric identifier used by runtimes to lookup this parameter. */ + int64_t id; + + /*! \brief Parameter data which should get linked into the final module. */ + ::tvm::runtime::NDArray param; + + void VisitAttrs(tvm::AttrVisitor* v) { + v->Visit("id", &id); + v->Visit("param", ¶m); + } + + static constexpr const char* _type_key = "tir.LinkedParam"; + TVM_DECLARE_FINAL_OBJECT_INFO(LinkedParamNode, Object); +}; + +class LinkedParam : public ObjectRef { + public: + LinkedParam(int64_t id, ::tvm::runtime::NDArray param); + + TVM_DEFINE_OBJECT_REF_METHODS(LinkedParam, ObjectRef, LinkedParamNode); + TVM_DEFINE_OBJECT_REF_COW_METHOD(LinkedParamNode); +}; + + /*! * \brief PrimFunc specific attribute names. * @@ -192,6 +219,17 @@ constexpr const char* kNoAlias = "tir.noalias"; * \note There can only be one entry function per module. */ constexpr const char* kIsEntryFunc = "tir.is_entry_func"; + +/*! + * \brief Parameters used in the module that should be linked by the codegen. + * + * Type: Map + * + * \note This should be present only on a function named + * tvm::target::packed_func::kLookupLinkedParam. + */ +constexpr const char* kLinkedParams = "tir.linked_params"; + } // namespace attr } // namespace tir } // namespace tvm diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc index cc304808b16f..762d29b90933 100644 --- a/src/relay/backend/build_module.cc +++ b/src/relay/backend/build_module.cc @@ -22,6 +22,7 @@ * \brief Code generation for TVM's graph runtime. */ #include +#include #include #include #include @@ -30,6 +31,7 @@ #include +#include "../../target/func_registry_generator.h" #include "../../target/source/codegen_source_base.h" #include "compile_engine.h" #include "utils.h" @@ -88,6 +90,17 @@ struct GraphCodegen { return ret; } + std::unordered_map GetParamIds() { + std::unordered_map ret; + auto names = CallFunc>("list_params_name", nullptr); + for (const auto& expr : names) { + // Implicit cast from runtime::String to std::string + std::string key = expr; + ret[key] = CallFunc("get_param_id", key); + } + return ret; + } + protected: tvm::runtime::Module mod; template @@ -474,14 +487,6 @@ class RelayBuildModule : public runtime::ModuleNode { // When there is no lowered_funcs due to reasons such as optimization. if (lowered_funcs.size() == 0) { - Target target_host = GetTargetHost(); - - // If no target_host has been set, we choose a default one, which is - // llvm if "codegen.LLVMModuleCreate" is accessible. - const runtime::PackedFunc* pf = runtime::Registry::Get("codegen.LLVMModuleCreate"); - if (!target_host.defined()) - target_host = (pf != nullptr) ? Target("llvm") : Target("stackvm"); - if (target_host.defined() && target_host->kind->name == "llvm") { // If we can decide the target is LLVM, we then create an empty LLVM module. ret_.mod = (*pf)(target_host->str(), "empty_module"); diff --git a/src/relay/backend/graph_runtime_codegen.cc b/src/relay/backend/graph_runtime_codegen.cc index e24d18de931c..609327ca1071 100644 --- a/src/relay/backend/graph_runtime_codegen.cc +++ b/src/relay/backend/graph_runtime_codegen.cc @@ -56,7 +56,7 @@ struct LoweredOutput { std::string graph_json; Map lowered_funcs; Array external_mods; - std::unordered_map params; + std::unordered_map> params; }; /*! \brief Node types */ @@ -203,7 +203,11 @@ class GraphRuntimeCodegen : public backend::MemoizedExprTranslator>(); + for (auto param : params_) { + ret.params.emplace( + std::make_pair(param.first, std::make_pair(int(param_storage_ids_[param.first]), param.second))); + } for (auto& kv : lowered_funcs_) { if (ret.lowered_funcs.count(kv.first) == 0) { @@ -312,9 +316,12 @@ class GraphRuntimeCodegen : public backend::MemoizedExprTranslator(op); size_t index = params_.size(); std::string name = "p" + std::to_string(index); - params_[name] = op->data; auto node = GraphInputNode::make_node_ptr(name, GraphAttrs()); - return AddNode(node, expr); + auto to_return = AddNode(node, expr); + CHECK_EQ(to_return.size(), 1) << "Expected exactly 1 parameter node created"; + param_storage_ids_[name] = nodes_.size() - 1; + params_[name] = op->data; + return to_return; } std::vector VisitExpr_(const TupleNode* op) override { @@ -531,8 +538,14 @@ class GraphRuntimeCodegen : public backend::MemoizedExprTranslator> var_map_; /*! \brief target device */ TargetsMap targets_; - /*! \brief params */ + /*! + * \brief parameters (i.e. ConstantNodes found in the graph). + * These are take as inputs to the GraphRuntime. + * Maps param name to a pair of storage_id and NDArray. At runtime, the storage_id can be + * used to lookup the parameter. + */ std::unordered_map params_; + std::unordered_map param_storage_ids_; /*! \brief plan memory of device result */ Map> storage_device_map_; /*! \brief lowered funcs */ @@ -582,7 +595,13 @@ class GraphRuntimeCodegenModule : public runtime::ModuleNode { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { String key = args[0]; ICHECK_GT(this->output_.params.count(key), 0); - *rv = this->output_.params[key]; + *rv = this->output_.params[key].second; + }); + } else if (name == "get_param_id") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + String key = args[0]; + ICHECK_GT(this->output_.params.count(key), 0); + *rv = this->output_.params[key].first; }); } else if (name == "get_irmodule") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc index 1b25a691ee4a..87a49cc5bd06 100644 --- a/src/target/llvm/codegen_llvm.cc +++ b/src/target/llvm/codegen_llvm.cc @@ -26,13 +26,17 @@ #include #include +#include #include #include +#include "llvm/Support/raw_os_ostream.h" #include "../../arith/pattern_match.h" #include "../build_common.h" +#include "../func_registry_generator.h" #include "codegen_cpu.h" +#include "codegen_params.h" namespace tvm { namespace codegen { diff --git a/src/target/llvm/codegen_llvm.h b/src/target/llvm/codegen_llvm.h index 78eb5e2dcac7..71583708da2c 100644 --- a/src/target/llvm/codegen_llvm.h +++ b/src/target/llvm/codegen_llvm.h @@ -98,6 +98,18 @@ class CodeGenLLVM : public ExprFunctor, * \param mod The module to be linked. */ void AddLinkModule(std::unique_ptr&& mod); + /*! + * \brief Link parameters into the module so they don't need to be supplied at runtime. + * Parameters can be linked into the module so that the generated code is easier to use, or so + * that RAM space doesn't need to be allocated for them. This function adds the given parameters + * to the generated LLVM module. + * \param storage_id_offset Offset added to the index of each entry in params_by_sid to form the + * storage_id of that parameter. Storage ids for parameters are expected to be contiguous. + * \param params_by_sid Array of NDArray. Each entry is a parameter. The index of the array (added + * to sid_offset) is the storage_id of the param. + * \param param_names Array containing the name for each param in params_by_sid. + */ + void LinkParameters(const Map params); /*! * \brief Create Value for expression e * \param e The expression to be created value for. diff --git a/src/target/llvm/llvm_module.cc b/src/target/llvm/llvm_module.cc index 89774ec61618..ceb609c1e666 100644 --- a/src/target/llvm/llvm_module.cc +++ b/src/target/llvm/llvm_module.cc @@ -32,6 +32,7 @@ #include "../../runtime/file_utils.h" #include "../../runtime/library_module.h" +#include "../func_registry_generator.h" #include "codegen_blob.h" #include "codegen_llvm.h" #include "llvm_common.h" @@ -199,6 +200,9 @@ class LLVMModuleNode final : public runtime::ModuleNode { std::vector funcs; std::string entry_func; + Map linked_params; + bool found_linked_params = false; + bool could_have_linked_params = target->GetAttr("link-params").value_or(Bool(false)); for (auto kv : mod->functions) { if (could_have_linked_params && kv.first->name_hint == ::tvm::runtime::symbol::tvm_lookup_linked_param) { diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc index 017ba396f861..f249ef8f529d 100644 --- a/src/target/target_kind.cc +++ b/src/target/target_kind.cc @@ -213,10 +213,12 @@ TVM_REGISTER_TARGET_KIND("llvm", kDLCPU) .add_attr_option("mfloat-abi") .add_attr_option("system-lib") .add_attr_option("runtime") + .add_attr_option("link-params") .set_default_keys({"cpu"}); TVM_REGISTER_TARGET_KIND("c", kDLCPU) .add_attr_option("system-lib") + .add_attr_option("link-params") .add_attr_option("runtime") .add_attr_option("mcpu") .set_default_keys({"cpu"}); diff --git a/src/tir/ir/function.cc b/src/tir/ir/function.cc index ef7f4f8e16dd..101d80a52ea1 100644 --- a/src/tir/ir/function.cc +++ b/src/tir/ir/function.cc @@ -28,6 +28,13 @@ namespace tvm { namespace tir { +LinkedParam::LinkedParam(int64_t id, ::tvm::runtime::NDArray param) { + auto n = make_object(); + n->id = id; + n->param = param; + data_ = std::move(n); +} + // Get the function type of a PrimFunc PrimFunc::PrimFunc(Array params, Stmt body, Type ret_type, Map buffer_map, DictAttrs attrs, Span span) { From f7b15b70dbf7d5242f1833336763d308556705f6 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Fri, 13 Nov 2020 13:51:45 -0800 Subject: [PATCH 13/60] git-clang-format --- include/tvm/tir/function.h | 3 +-- src/relay/backend/graph_runtime_codegen.cc | 4 ++-- src/target/llvm/codegen_llvm.cc | 4 ++-- src/target/llvm/llvm_module.cc | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/include/tvm/tir/function.h b/include/tvm/tir/function.h index ecc0e672749a..a22552ea190c 100644 --- a/include/tvm/tir/function.h +++ b/include/tvm/tir/function.h @@ -25,10 +25,10 @@ #define TVM_TIR_FUNCTION_H_ #include +#include #include #include #include -#include #include @@ -176,7 +176,6 @@ class LinkedParam : public ObjectRef { TVM_DEFINE_OBJECT_REF_COW_METHOD(LinkedParamNode); }; - /*! * \brief PrimFunc specific attribute names. * diff --git a/src/relay/backend/graph_runtime_codegen.cc b/src/relay/backend/graph_runtime_codegen.cc index 609327ca1071..a5073326c13c 100644 --- a/src/relay/backend/graph_runtime_codegen.cc +++ b/src/relay/backend/graph_runtime_codegen.cc @@ -205,8 +205,8 @@ class GraphRuntimeCodegen : public backend::MemoizedExprTranslator>(); for (auto param : params_) { - ret.params.emplace( - std::make_pair(param.first, std::make_pair(int(param_storage_ids_[param.first]), param.second))); + ret.params.emplace(std::make_pair( + param.first, std::make_pair(int(param_storage_ids_[param.first]), param.second))); } for (auto& kv : lowered_funcs_) { diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc index 87a49cc5bd06..39ea82065377 100644 --- a/src/target/llvm/codegen_llvm.cc +++ b/src/target/llvm/codegen_llvm.cc @@ -25,18 +25,18 @@ #include "codegen_llvm.h" #include -#include #include +#include #include #include -#include "llvm/Support/raw_os_ostream.h" #include "../../arith/pattern_match.h" #include "../build_common.h" #include "../func_registry_generator.h" #include "codegen_cpu.h" #include "codegen_params.h" +#include "llvm/Support/raw_os_ostream.h" namespace tvm { namespace codegen { diff --git a/src/target/llvm/llvm_module.cc b/src/target/llvm/llvm_module.cc index ceb609c1e666..ab2fcee00b9e 100644 --- a/src/target/llvm/llvm_module.cc +++ b/src/target/llvm/llvm_module.cc @@ -200,7 +200,7 @@ class LLVMModuleNode final : public runtime::ModuleNode { std::vector funcs; std::string entry_func; - Map linked_params; + Map linked_params; bool found_linked_params = false; bool could_have_linked_params = target->GetAttr("link-params").value_or(Bool(false)); for (auto kv : mod->functions) { From ef6e14f1668c3716c02c24f3d8c3338e6a6c26b9 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Fri, 13 Nov 2020 13:52:40 -0800 Subject: [PATCH 14/60] black format --- python/tvm/contrib/binutils.py | 2 +- python/tvm/micro/build.py | 9 +- python/tvm/micro/debugger.py | 3 +- python/tvm/micro/session.py | 17 +- python/tvm/relay/param_dict.py | 6 +- python/tvm/target/target.py | 2 +- tests/python/unittest/test_link_params.py | 236 +++++++++++----------- 7 files changed, 143 insertions(+), 132 deletions(-) diff --git a/python/tvm/contrib/binutils.py b/python/tvm/contrib/binutils.py index 146944970827..53f92b9855fe 100644 --- a/python/tvm/contrib/binutils.py +++ b/python/tvm/contrib/binutils.py @@ -38,7 +38,7 @@ def run_cmd(cmd): output : str resulting stdout capture from the subprocess """ - _LOG.debug('execute: %s', ' '.join(cmd)) + _LOG.debug("execute: %s", " ".join(cmd)) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (output, _) = proc.communicate() output = output.decode("utf-8") diff --git a/python/tvm/micro/build.py b/python/tvm/micro/build.py index bed5bde6f916..4aec9ea5ecbb 100644 --- a/python/tvm/micro/build.py +++ b/python/tvm/micro/build.py @@ -111,8 +111,13 @@ def default_options(target_include_dir): def build_static_runtime( - workspace, compiler, module, lib_opts=None, bin_opts=None, generated_lib_opts=None, - extra_libs=None + workspace, + compiler, + module, + lib_opts=None, + bin_opts=None, + generated_lib_opts=None, + extra_libs=None, ): """Build the on-device runtime, statically linking the given modules. diff --git a/python/tvm/micro/debugger.py b/python/tvm/micro/debugger.py index 18ed350305d8..9dd496a950e5 100644 --- a/python/tvm/micro/debugger.py +++ b/python/tvm/micro/debugger.py @@ -199,7 +199,8 @@ def popen_kwargs(self): raise NotImplementedError(f"System {sysname} is not yet supported") self.fd_transport = FdTransport( - stdout_read, stdin_write, timeouts=transport.debug_transport_timeouts()) + stdout_read, stdin_write, timeouts=transport.debug_transport_timeouts() + ) self.fd_transport.open() return { diff --git a/python/tvm/micro/session.py b/python/tvm/micro/session.py index 5be7d59a143f..adbad20cda06 100644 --- a/python/tvm/micro/session.py +++ b/python/tvm/micro/session.py @@ -156,7 +156,7 @@ def __exit__(self, exc_type, exc_value, exc_traceback): def lookup_remote_linked_param(mod, storage_id, template_tensor, ctx): try: - lookup_linked_param = mod.get_function('_lookup_linked_param') + lookup_linked_param = mod.get_function("_lookup_linked_param") except KeyError: return None @@ -164,8 +164,9 @@ def lookup_remote_linked_param(mod, storage_id, template_tensor, ctx): if remote_data is None: return None - return get_global_func('tvm.rpc.NDArrayFromRemoteOpaqueHandle')( - mod, remote_data, template_tensor, ctx, lambda: None) + return get_global_func("tvm.rpc.NDArrayFromRemoteOpaqueHandle")( + mod, remote_data, template_tensor, ctx, lambda: None + ) def create_local_graph_runtime(graph_json_str, mod, ctx): @@ -189,8 +190,9 @@ def create_local_graph_runtime(graph_json_str, mod, ctx): """ device_type_id = [ctx.device_type, ctx.device_id] fcreate = get_global_func("tvm.graph_runtime.create") - return graph_runtime.GraphModule(fcreate(graph_json_str, mod, lookup_remote_linked_param, - *device_type_id)) + return graph_runtime.GraphModule( + fcreate(graph_json_str, mod, lookup_remote_linked_param, *device_type_id) + ) def create_local_debug_runtime(graph_json_str, mod, ctx, dump_root=None): @@ -219,4 +221,7 @@ def create_local_debug_runtime(graph_json_str, mod, ctx, dump_root=None): fcreate = get_global_func("tvm.graph_runtime_debug.create") return debug_runtime.GraphModuleDebug( fcreate(graph_json_str, mod, lookup_remote_linked_param, *device_type_id), - [ctx], graph_json_str, dump_root=dump_root) + [ctx], + graph_json_str, + dump_root=dump_root, + ) diff --git a/python/tvm/relay/param_dict.py b/python/tvm/relay/param_dict.py index 463eae51d7b8..37b4f1c72c4a 100644 --- a/python/tvm/relay/param_dict.py +++ b/python/tvm/relay/param_dict.py @@ -83,9 +83,9 @@ def linkable_param_dict(graph_json, params, target): graph = json.loads(graph_json) data_by_sid = [None] * len(params) for param_name, param in params.items(): - for node in graph['nodes']: - if node['name'] == param_name: - sid = node['storage_id'] + for node in graph["nodes"]: + if node["name"] == param_name: + sid = node["storage_id"] data_by_sid[sid] = param # GraphRuntimeCodegen is expected to allocated the first len(params) storage_ids to contain diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py index 6ef41748ca5b..cd874b8bffe4 100644 --- a/python/tvm/target/target.py +++ b/python/tvm/target/target.py @@ -237,7 +237,7 @@ def micro(model="unknown", options=None): } opts = _merge_opts( trans_table[model] + ["-runtime=c", "--system-lib", "--link-params", f"-model={model}"], - options + options, ) # NOTE: in the future, the default micro target will be LLVM except when diff --git a/tests/python/unittest/test_link_params.py b/tests/python/unittest/test_link_params.py index 630966892fd4..3dc3122af81c 100644 --- a/tests/python/unittest/test_link_params.py +++ b/tests/python/unittest/test_link_params.py @@ -36,15 +36,15 @@ # The data types that are linkable. LINKABLE_DTYPES = ( - [f'uint{b}' for b in (8, 16, 32, 64)] + - [f'int{b}' for b in (8, 16, 32, 64)] + - ['float32', 'float64']) - + [f"uint{b}" for b in (8, 16, 32, 64)] + + [f"int{b}" for b in (8, 16, 32, 64)] + + ["float32", "float64"] +) def dtype_info(dtype): """Lookup numpy type info for the given string dtype (of LINKABLE_DTYPES above).""" - if 'int' in dtype: + if "int" in dtype: return np.iinfo(getattr(np, dtype)) else: return np.finfo(getattr(np, dtype)) @@ -59,18 +59,18 @@ def _make_random_tensor(dtype): """Create a random test tensor of shape TEST_SHAPE and the given dtype.""" global RAND_SEED if RANDOM_TENSOR_START is not None: - to_return = np.arange(RANDOM_TENSOR_START, - RANDOM_TENSOR_START + np.prod(TEST_SHAPE), - dtype=dtype).reshape(TEST_SHAPE) - RAND_SEED += np.prod(TEST_SHAPE) - return to_return + to_return = np.arange( + RANDOM_TENSOR_START, RANDOM_TENSOR_START + np.prod(TEST_SHAPE), dtype=dtype + ).reshape(TEST_SHAPE) + RAND_SEED += np.prod(TEST_SHAPE) + return to_return dinfo = dtype_info(dtype) - if 'int' in dtype: + if "int" in dtype: return np.random.randint(dinfo.min, dinfo.max, TEST_SHAPE, dtype=dtype) else: to_return = np.random.uniform(0, dinfo.max, TEST_SHAPE) -# to_return = dinfo.min + (np.random.random(TEST_SHAPE) * dinfo.max) + # to_return = dinfo.min + (np.random.random(TEST_SHAPE) * dinfo.max) np.reshape(to_return, np.prod(TEST_SHAPE))[::2] *= -1 return to_return @@ -92,28 +92,28 @@ def _lookup_sid(graph, name): The storage_id of the parameter. """ num_outputs_seen = 0 - for i, n in enumerate(graph['nodes']): - if n['name'] == name: - return graph['attrs']['storage_id'][1][num_outputs_seen] + for i, n in enumerate(graph["nodes"]): + if n["name"] == name: + return graph["attrs"]["storage_id"][1][num_outputs_seen] else: - if 'attrs' in n and 'num_outputs' in n['attrs']: - num_outputs_seen += n['attrs']['num_outputs'] + if "attrs" in n and "num_outputs" in n["attrs"]: + num_outputs_seen += n["attrs"]["num_outputs"] else: num_outputs_seen += 1 - raise KeyError(f'no such param: {name}') + raise KeyError(f"no such param: {name}") def _get_ctypes_dtype(dt): """Return a ctypes c_* datatype given a string data type.""" - if 'int' in dt: - return getattr(ctypes, f'c_{dt}') - elif dt == 'float32': + if "int" in dt: + return getattr(ctypes, f"c_{dt}") + elif dt == "float32": return ctypes.c_float - elif dt == 'float64': + elif dt == "float64": return ctypes.c_double else: - assert False, f'unknown dtype: {dt}' + assert False, f"unknown dtype: {dt}" def _verify_linked_param(dtype, lib, mod, graph, name): @@ -122,13 +122,12 @@ def _verify_linked_param(dtype, lib, mod, graph, name): # NOTE: query_imports=True because when loading a module from disk (i.e. for C backend), # a GraphRuntimeFactory module is created instead of the module itself. param_ptr = mod.get_function("_lookup_linked_param", True)(sid) - print('verify', param_ptr) + print("verify", param_ptr) arr_data = (_get_ctypes_dtype(dtype) * np.prod(TEST_SHAPE)).from_address(param_ptr.value) gen_param = lib.params[name] - print('gen param dtype', gen_param.dtype) - arr = np.ndarray( - shape=gen_param.shape, dtype=gen_param.dtype, buffer=arr_data, order='C') - if 'int' in gen_param.dtype: + print("gen param dtype", gen_param.dtype) + arr = np.ndarray(shape=gen_param.shape, dtype=gen_param.dtype, buffer=arr_data, order="C") + if "int" in gen_param.dtype: np.testing.assert_equal(gen_param.asnumpy(), arr) else: np.testing.assert_allclose(gen_param.asnumpy(), arr) @@ -140,30 +139,29 @@ def _make_mod_and_params(dtype): param_init = {} def _add_decl(name, dtype): - param_decls[name] = f'%{name} : Tensor[{TEST_SHAPE}, {dtype}]' + param_decls[name] = f"%{name} : Tensor[{TEST_SHAPE}, {dtype}]" param_init[name] = _make_random_tensor(dtype) - _add_decl(f'{dtype}_a', dtype) - _add_decl(f'{dtype}_b', dtype) + _add_decl(f"{dtype}_a", dtype) + _add_decl(f"{dtype}_b", dtype) mod_lines = [ '#[version = "0.0.5"]', f"def @main(%rand_input : Tensor[{TEST_SHAPE}, {dtype}], { ', '.join(param_decls.values()) } ) {{", ] - if 'int' in dtype: + if "int" in dtype: mod_lines.append( -# f' %0 = bitwise_xor(%rand_input, bitwise_xor(%{dtype}_a, %{dtype}_b));') - f' %0 = add(%rand_input, %{dtype}_a);') + # f' %0 = bitwise_xor(%rand_input, bitwise_xor(%{dtype}_a, %{dtype}_b));') + f" %0 = add(%rand_input, %{dtype}_a);" + ) else: mod_lines.append( - f' %0 = cast(add(%rand_input, cast(add(%{dtype}_a, %{dtype}_b), dtype="{dtype}")), dtype="{dtype}");') -# f' %0 = cast(add(%rand_input, %{dtype}_a), dtype="{dtype}");') - mod_lines.extend([ - ' %0', - '}' - ]) - - mod = tvm.parser.fromtext('\n'.join(mod_lines)) + f' %0 = cast(add(%rand_input, cast(add(%{dtype}_a, %{dtype}_b), dtype="{dtype}")), dtype="{dtype}");' + ) + # f' %0 = cast(add(%rand_input, %{dtype}_a), dtype="{dtype}");') + mod_lines.extend([" %0", "}"]) + + mod = tvm.parser.fromtext("\n".join(mod_lines)) return mod, param_init @@ -172,8 +170,8 @@ def test_llvm_link_params(): for dtype in LINKABLE_DTYPES: mod, param_init = _make_mod_and_params(dtype) rand_input = _make_random_tensor(dtype) - main_func = mod['main'] - target = 'llvm --runtime=c --system-lib --link-params' + main_func = mod["main"] + target = "llvm --runtime=c --system-lib --link-params" with tvm.transform.PassContext(opt_level=3): lib = tvm.relay.build(mod, target, params=param_init) assert set(lib.params.keys()) == {"p0"} # NOTE: op folded @@ -186,60 +184,60 @@ def test_llvm_link_params(): def _run_linked(lib): graph_json, mod, _ = lib graph_rt = tvm.contrib.graph_runtime.create(graph_json, mod, tvm.cpu(0)) - graph_rt.set_input('rand_input', rand_input) # NOTE: params not required. + graph_rt.set_input("rand_input", rand_input) # NOTE: params not required. graph_rt.run() return graph_rt.get_output(0) linked_output = _run_linked(lib) with tvm.transform.PassContext(opt_level=3): - lib = tvm.relay.build(mod, 'llvm --system-lib', params=param_init) + lib = tvm.relay.build(mod, "llvm --system-lib", params=param_init) def _run_unlinked(lib): graph_json, mod, lowered_params = lib graph_rt = tvm.contrib.graph_runtime.create(graph_json, mod, tvm.cpu(0)) - graph_rt.set_input('rand_input', rand_input, **lowered_params) + graph_rt.set_input("rand_input", rand_input, **lowered_params) graph_rt.run() return graph_rt.get_output(0) unlinked_output = _run_unlinked(lib) - if 'int' in dtype: + if "int" in dtype: np.testing.assert_equal(unlinked_output.asnumpy(), linked_output.asnumpy()) else: np.testing.assert_allclose(unlinked_output.asnumpy(), linked_output.asnumpy()) def _get_c_datatype(dtype): - """Translate LINKABLE_DTYPES element to c datatype.""" - if 'int' in dtype: - return f'{dtype}_t' - elif dtype == 'float32': - return 'float' - elif dtype == 'float64': - return 'double' - else: - assert False, f'unknown dtype {dtype}' + """Translate LINKABLE_DTYPES element to c datatype.""" + if "int" in dtype: + return f"{dtype}_t" + elif dtype == "float32": + return "float" + elif dtype == "float64": + return "double" + else: + assert False, f"unknown dtype {dtype}" def _format_c_value(dtype, width, x): - if 'int' in dtype: - hex_formatstr = f'{{:{"+" if dtype.startswith("int") else ""}#0{width}x}}' - return hex_formatstr.format(x) - elif 'float' in dtype: - to_ret = float(x).hex() - if 'inf' in to_ret: - return ('-' if x < 0 else '') + 'INFINITY' - elif 'nan' in to_ret: - return 'NAN' - - before, after = to_ret.split('p') - return f'{before.rstrip("0")}p{after}' - else: - assert False, f"don't know dtype {dtype}" + if "int" in dtype: + hex_formatstr = f'{{:{"+" if dtype.startswith("int") else ""}#0{width}x}}' + return hex_formatstr.format(x) + elif "float" in dtype: + to_ret = float(x).hex() + if "inf" in to_ret: + return ("-" if x < 0 else "") + "INFINITY" + elif "nan" in to_ret: + return "NAN" + + before, after = to_ret.split("p") + return f'{before.rstrip("0")}p{after}' + else: + assert False, f"don't know dtype {dtype}" -HEX_NUM_RE = re.compile(r'[+\-]?(?:(?:0x[0-9A-Fa-f.p+-]+)|(?:INFINITY)|(?:NAN))') +HEX_NUM_RE = re.compile(r"[+\-]?(?:(?:0x[0-9A-Fa-f.p+-]+)|(?:INFINITY)|(?:NAN))") def test_c_link_params(): @@ -247,58 +245,58 @@ def test_c_link_params(): for dtype in LINKABLE_DTYPES: mod, param_init = _make_mod_and_params(dtype) rand_input = _make_random_tensor(dtype) - main_func = mod['main'] - target = 'c --link-params' + main_func = mod["main"] + target = "c --link-params" with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): lib = tvm.relay.build(mod, target, params=param_init) assert set(lib.params.keys()) == {"p0"} # NOTE: op folded src = lib.lib.get_source() - lib.lib.save('test.c', 'cc') + lib.lib.save("test.c", "cc") c_dtype = _get_c_datatype(dtype) - src_lines = src.split('\n') - param = lib.params['p0'].asnumpy().reshape(np.prod(TEST_SHAPE)) - param_def = f'static const {c_dtype} __tvm_param__p0[{np.prod(param.shape)}] = {{' + src_lines = src.split("\n") + param = lib.params["p0"].asnumpy().reshape(np.prod(TEST_SHAPE)) + param_def = f"static const {c_dtype} __tvm_param__p0[{np.prod(param.shape)}] = {{" for i, line in enumerate(src_lines): - if line == param_def: - i += 1 - break + if line == param_def: + i += 1 + break else: - assert False, f'did not find parameter definition "{param_def}":\n{src}' + assert False, f'did not find parameter definition "{param_def}":\n{src}' cursor = 0 width = dtype_info(dtype).bits // 4 + 2 if dtype.startswith("int"): - width += 1 # Account for sign - - print('check printing of', param) - while '};' not in src_lines[i]: - for match in HEX_NUM_RE.finditer(src_lines[i]): - assert match.group() == _format_c_value(dtype, width, param[cursor]), ( - f'p0 byte {cursor}: want "{_format_c_value(dtype, width, param[cursor])}" got ' - f'"{match.group(0)}"; full p0 follows:\n{src}') - cursor += 1 - i += 1 + width += 1 # Account for sign + + print("check printing of", param) + while "};" not in src_lines[i]: + for match in HEX_NUM_RE.finditer(src_lines[i]): + assert match.group() == _format_c_value(dtype, width, param[cursor]), ( + f'p0 byte {cursor}: want "{_format_c_value(dtype, width, param[cursor])}" got ' + f'"{match.group(0)}"; full p0 follows:\n{src}' + ) + cursor += 1 + i += 1 assert cursor == np.prod(param.shape) temp = utils.tempdir() # Need a unique name per library to avoid dlopen caching the lib load. - lib_path = temp_dir.relpath(f'test-{dtype}-linked.so') - lib['remove_params']().export_library(lib_path) + lib_path = temp_dir.relpath(f"test-{dtype}-linked.so") + lib["remove_params"]().export_library(lib_path) lib_mod = tvm.runtime.load_module(lib_path) -# lib_mod = lib_factory['default']() + # lib_mod = lib_factory['default']() graph = json.loads(lib.graph_json) for p in lib.params: _verify_linked_param(dtype, lib, lib_mod, graph, p) # Wrap in function to explicitly deallocate the runtime. def _run_linked(lib_mod): - graph_rt = tvm.contrib.graph_runtime.GraphModule( - lib_mod['default'](tvm.cpu(0))) - graph_rt.set_input('rand_input', rand_input) # NOTE: params not required. - print('linked', graph_rt.get_input('p0')) + graph_rt = tvm.contrib.graph_runtime.GraphModule(lib_mod["default"](tvm.cpu(0))) + graph_rt.set_input("rand_input", rand_input) # NOTE: params not required. + print("linked", graph_rt.get_input("p0")) graph_rt.run() return graph_rt.get_output(0) @@ -307,23 +305,24 @@ def _run_linked(lib_mod): linked_params = lib.params with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): - lib = tvm.relay.build(mod, 'c', params=param_init) + lib = tvm.relay.build(mod, "c", params=param_init) _, _, params = lib # Need a unique name per library to avoid dlopen caching the lib load. - lib_path = temp_dir.relpath(f'test-{dtype}-unlinked.so') + lib_path = temp_dir.relpath(f"test-{dtype}-unlinked.so") lib.export_library(lib_path) lib_mod = tvm.runtime.load_module(lib_path) - print('unlinked', params) + print("unlinked", params) + def _run_unlinked(lib_mod): - graph_rt = tvm.contrib.graph_runtime.GraphModule(lib_mod['default'](tvm.cpu(0))) - graph_rt.set_input('rand_input', rand_input, **params) + graph_rt = tvm.contrib.graph_runtime.GraphModule(lib_mod["default"](tvm.cpu(0))) + graph_rt.set_input("rand_input", rand_input, **params) graph_rt.run() return graph_rt.get_output(0) unlinked_output = _run_unlinked(lib_mod) - if 'int' in dtype: + if "int" in dtype: np.testing.assert_equal(unlinked_output.asnumpy(), linked_output.asnumpy()) else: np.testing.assert_allclose(unlinked_output.asnumpy(), linked_output.asnumpy()) @@ -336,8 +335,8 @@ def test_crt_link_params(): for dtype in LINKABLE_DTYPES: mod, param_init = _make_mod_and_params(dtype) rand_input = _make_random_tensor(dtype) - main_func = mod['main'] - target = 'c -mcpu=native --system-lib --runtime=c --link-params' + main_func = mod["main"] + target = "c -mcpu=native --system-lib --runtime=c --link-params" with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): graph_json, lib, params = tvm.relay.build(mod, target, params=param_init) assert set(params.keys()) == {"p0"} # NOTE: op folded @@ -345,7 +344,7 @@ def test_crt_link_params(): workspace = tvm.micro.Workspace() compiler = tvm.micro.DefaultCompiler(target=target) opts = tvm.micro.default_options(os.path.join(tvm.micro.CRT_ROOT_DIR, "host")) - opts['bin_opts']['ldflags'].append('-DTVM_HOST_USE_GRAPH_RUNTIME_MODULE') + opts["bin_opts"]["ldflags"].append("-DTVM_HOST_USE_GRAPH_RUNTIME_MODULE") micro_binary = tvm.micro.build_static_runtime( # the x86 compiler *expects* you to give the exact same dictionary for both @@ -357,8 +356,10 @@ def test_crt_link_params(): lib, lib_opts=opts["bin_opts"], bin_opts=opts["bin_opts"], - extra_libs=[os.path.join(tvm.micro.CRT_ROOT_DIR, m) - for m in ('graph_runtime', 'graph_runtime_module')], + extra_libs=[ + os.path.join(tvm.micro.CRT_ROOT_DIR, m) + for m in ("graph_runtime", "graph_runtime_module") + ], ) flasher_kw = { @@ -367,31 +368,30 @@ def test_crt_link_params(): flasher = compiler.flasher(**flasher_kw) with tvm.micro.Session(binary=micro_binary, flasher=flasher) as sess: rpc_lib = sess.get_system_lib() - graph_rt = tvm.contrib.graph_runtime.create( - graph_json, rpc_lib, sess.context) + graph_rt = tvm.contrib.graph_runtime.create(graph_json, rpc_lib, sess.context) # NOTE: not setting params here. - graph_rt.set_input('rand_input', rand_input) + graph_rt.set_input("rand_input", rand_input) graph_rt.run() linked_output = graph_rt.get_output(0).asnumpy() with tvm.transform.PassContext(opt_level=3): - lib = tvm.relay.build(mod, 'llvm --system-lib', params=param_init) + lib = tvm.relay.build(mod, "llvm --system-lib", params=param_init) def _run_unlinked(lib): graph_json, mod, lowered_params = lib graph_rt = tvm.contrib.graph_runtime.create(graph_json, mod, tvm.cpu(0)) - graph_rt.set_input('rand_input', rand_input, **lowered_params) + graph_rt.set_input("rand_input", rand_input, **lowered_params) graph_rt.run() return graph_rt.get_output(0) unlinked_output = _run_unlinked(lib).asnumpy() - if 'int' in dtype: + if "int" in dtype: np.testing.assert_equal(unlinked_output, linked_output) else: np.testing.assert_allclose(unlinked_output, linked_output) -if __name__ == '__main__': - sys.exit(pytest.main([__file__] + sys.argv[1:])) +if __name__ == "__main__": + sys.exit(pytest.main([__file__] + sys.argv[1:])) From 6d6aa6687e25ce7f1dab66549bd67a4984cadc97 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Fri, 13 Nov 2020 14:09:36 -0800 Subject: [PATCH 15/60] git-clang-format again --- include/tvm/runtime/crt/platform.h | 4 +- include/tvm/runtime/device_api.h | 10 +- src/relay/backend/build_module.cc | 12 +- src/runtime/crt/common/memory.c | 4 +- src/runtime/crt/graph_runtime/graph_runtime.c | 19 +- .../graph_runtime_module.c | 70 ++- src/runtime/crt/host/main.cc | 4 +- src/runtime/crt/utvm_rpc_server/rpc_server.cc | 3 +- src/runtime/graph/graph_runtime.cc | 18 +- src/runtime/graph/graph_runtime.h | 5 +- src/runtime/rpc/rpc_module.cc | 47 +- src/target/llvm/codegen_llvm.cc | 50 +- src/target/llvm/codegen_params.cc | 537 +++++++++--------- src/target/llvm/codegen_params.h | 10 +- src/target/llvm/llvm_module.cc | 9 +- src/target/source/codegen_c_host.cc | 22 +- tests/micro/qemu/zephyr-runtime/src/main.c | 4 +- 17 files changed, 428 insertions(+), 400 deletions(-) diff --git a/include/tvm/runtime/crt/platform.h b/include/tvm/runtime/crt/platform.h index 3eac45f64e4e..0f8c6ba7baf2 100644 --- a/include/tvm/runtime/crt/platform.h +++ b/include/tvm/runtime/crt/platform.h @@ -53,8 +53,8 @@ void __attribute__((noreturn)) TVMPlatformAbort(tvm_crt_error_t code); * \param args extra arguments to be formatted. * \return number of bytes written. */ -size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, - const char* fmt, va_list args); +size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, const char* fmt, + va_list args); #ifdef __cplusplus } // extern "C" diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h index 40989e4057e0..a6f5624de084 100644 --- a/include/tvm/runtime/device_api.h +++ b/include/tvm/runtime/device_api.h @@ -243,9 +243,7 @@ inline const char* DeviceName(int type) { /*! * \brief Return true if a TVMContext is owned by an RPC session. */ -inline bool IsRPCSessionContext(TVMContext ctx) { - return (ctx.device_type / kRPCSessMask) > 0; -} +inline bool IsRPCSessionContext(TVMContext ctx) { return (ctx.device_type / kRPCSessMask) > 0; } /*! * \brief Return the RPCSessTable index of the RPC Session that owns this context. @@ -279,9 +277,9 @@ inline std::ostream& operator<<(std::ostream& os, DLContext ctx); */ inline TVMContext AddRPCSessionMask(TVMContext ctx, int session_table_index) { CHECK(!IsRPCSessionContext(ctx)) - << "AddRPCSessionMask: ctx already non-zero RPCSessionIndex: " << ctx; - ctx.device_type = static_cast( - ctx.device_type | (kRPCSessMask * (session_table_index + 1))); + << "AddRPCSessionMask: ctx already non-zero RPCSessionIndex: " << ctx; + ctx.device_type = + static_cast(ctx.device_type | (kRPCSessMask * (session_table_index + 1))); return ctx; } diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc index 762d29b90933..189227bb15a1 100644 --- a/src/relay/backend/build_module.cc +++ b/src/relay/backend/build_module.cc @@ -460,29 +460,27 @@ class RelayBuildModule : public runtime::ModuleNode { // If no target_host has been set, we choose a default one, which is // llvm if "codegen.LLVMModuleCreate" is accessible. const runtime::PackedFunc* pf = runtime::Registry::Get("codegen.LLVMModuleCreate"); - if (!target_host.defined()) - target_host = (pf != nullptr) ? Target("llvm") : Target("stackvm"); + if (!target_host.defined()) target_host = (pf != nullptr) ? Target("llvm") : Target("stackvm"); if (target_host->GetAttr("link-params").value_or(Bool(false))) { CHECK(pf != nullptr) << "Unable to link-params with no target_host and no llvm codegen."; auto param_ids = graph_codegen_->GetParamIds(); auto link_params = Map(); for (auto param : ret_.params) { - link_params.Set( - param.first, tir::LinkedParam(param_ids[param.first], param.second)); + link_params.Set(param.first, tir::LinkedParam(param_ids[param.first], param.second)); } Map dict; dict.Set(tvm::tir::attr::kLinkedParams, link_params); dict.Set(tvm::attr::kGlobalSymbol, String(::tvm::runtime::symbol::tvm_lookup_linked_param)); DictAttrs attrs{dict}; - auto prim = tir::PrimFunc( - Array(), tir::SeqStmt(Array()), VoidType(), Map(), attrs); + auto prim = tir::PrimFunc(Array(), tir::SeqStmt(Array()), VoidType(), + Map(), attrs); if (lowered_funcs.find(target_host->str()) == lowered_funcs.end()) { lowered_funcs.Set(target_host->str(), IRModule(Map({}))); } lowered_funcs[target_host->str()]->Add( - GlobalVar(::tvm::runtime::symbol::tvm_lookup_linked_param), prim); + GlobalVar(::tvm::runtime::symbol::tvm_lookup_linked_param), prim); } // When there is no lowered_funcs due to reasons such as optimization. diff --git a/src/runtime/crt/common/memory.c b/src/runtime/crt/common/memory.c index 646ba46feecb..876c10efe3ea 100644 --- a/src/runtime/crt/common/memory.c +++ b/src/runtime/crt/common/memory.c @@ -229,8 +229,8 @@ void* MemoryManager_Realloc(MemoryManager* mgr, void* ptr, tvm_index_t size) { vleak_size++; } #if TVM_CRT_DEBUG > 1 - TVMLogf("reallocate: addr=%p, start=%" PRId64 "/%zu, npage=%" PRId64 ", vleak=%d, size=%zu", - data, start, mgr->ptable.max_pages, npage, vleak_size, size); + TVMLogf("reallocate: addr=%p, start=%" PRId64 "/%zu, npage=%" PRId64 ", vleak=%d, size=%zu", data, + start, mgr->ptable.max_pages, npage, vleak_size, size); #endif // TVM_CRT_DEBUG return data; } diff --git a/src/runtime/crt/graph_runtime/graph_runtime.c b/src/runtime/crt/graph_runtime/graph_runtime.c index c5dc792cf315..450272d8722b 100644 --- a/src/runtime/crt/graph_runtime/graph_runtime.c +++ b/src/runtime/crt/graph_runtime/graph_runtime.c @@ -544,9 +544,7 @@ uint32_t TVMGraphRuntime_GetEntryId(TVMGraphRuntime* runtime, uint32_t nid, uint * \param runtime The graph runtime. * \return the number of input tensors allocated. */ -int TVMGraphRuntime_GetNumInputs(TVMGraphRuntime* runtime) { - return runtime->input_nodes_count; -} +int TVMGraphRuntime_GetNumInputs(TVMGraphRuntime* runtime) { return runtime->input_nodes_count; } /*! * \brief Get the input index given the name of input. @@ -689,9 +687,7 @@ void TVMGraphRuntime_Run(TVMGraphRuntime* runtime) { * \param runtime The graph runtime. * \return the number of output tensors allocated. */ -int TVMGraphRuntime_GetNumOutputs(TVMGraphRuntime* runtime) { - return runtime->outputs_count; -} +int TVMGraphRuntime_GetNumOutputs(TVMGraphRuntime* runtime) { return runtime->outputs_count; } int TVMGraphRuntime_GetOutput(TVMGraphRuntime* runtime, const int32_t idx, DLTensor* out) { int status = 0; @@ -721,7 +717,8 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) { temp_args.tcodes[0] = kTVMArgInt; temp_args.values_count = 1; lookup_linked_param_valid = - (TVMPackedFunc_InitModuleFunc(&lookup_linked_param, runtime->module_handle, "_lookup_linked_param", &temp_args) == 0); + (TVMPackedFunc_InitModuleFunc(&lookup_linked_param, runtime->module_handle, + "_lookup_linked_param", &temp_args) == 0); } // Grab saved optimization plan from graph. @@ -757,9 +754,8 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) { // Allocate the space. for (idx = 0; idx < pool_entry_count; idx++) { - runtime->storage_pool = - vrealloc(runtime->storage_pool, - sizeof(TVMGraphRuntimeStorageEntry) * (runtime->storage_pool_count + 1)); + runtime->storage_pool = vrealloc(runtime->storage_pool, sizeof(TVMGraphRuntimeStorageEntry) * + (runtime->storage_pool_count + 1)); TVMGraphRuntimePoolEntry pit = pool_entry[idx]; TVMContext ctx = runtime->ctxs[0]; uint8_t did_find_linked_param = 0; @@ -787,7 +783,8 @@ void TVMGraphRuntime_SetupStorage(TVMGraphRuntime* runtime) { DLDataType dtype = {kDLFloat, 32, 1}; shape[0] = (pit.size + 3) / 4; runtime->storage_pool[runtime->storage_pool_count].is_linked_param = 0; - runtime->storage_pool[runtime->storage_pool_count].array = TVMNDArray_Empty(1, shape, dtype, ctx); + runtime->storage_pool[runtime->storage_pool_count].array = + TVMNDArray_Empty(1, shape, dtype, ctx); CHECK_NE(runtime->storage_pool[runtime->storage_pool_count].array.dl_tensor.data, 0, "fail to create storage_pool with idx=%d\n", idx); } diff --git a/src/runtime/crt/graph_runtime_module/graph_runtime_module.c b/src/runtime/crt/graph_runtime_module/graph_runtime_module.c index a8de71e33f9d..3e73efcc62ab 100644 --- a/src/runtime/crt/graph_runtime_module/graph_runtime_module.c +++ b/src/runtime/crt/graph_runtime_module/graph_runtime_module.c @@ -24,9 +24,9 @@ * \brief wrap graph_runtime into a TVMModule for use with RPC. */ -#include #include #include +#include #include #include "tvm/runtime/crt/internal/graph_runtime/graph_runtime.h" @@ -38,7 +38,8 @@ typedef struct { static GraphRuntimeModule graph_runtime; -int32_t TVMGraphRuntimeModule_Create(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { +int32_t TVMGraphRuntimeModule_Create(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, + int* ret_tcodes, void* resource_handle) { if (graph_runtime.runtime != NULL) { return kTvmErrorGraphModuleAlreadyCreated; } @@ -47,7 +48,8 @@ int32_t TVMGraphRuntimeModule_Create(TVMValue* args, int* tcodes, int nargs, TVM return kTvmErrorFunctionCallNumArguments; } - if (tcodes[0] != kTVMStr || tcodes[1] != kTVMModuleHandle || tcodes[2] != kTVMArgInt || tcodes[3] != kTVMArgInt) { + if (tcodes[0] != kTVMStr || tcodes[1] != kTVMModuleHandle || tcodes[2] != kTVMArgInt || + tcodes[3] != kTVMArgInt) { return kTvmErrorFunctionCallWrongArgType; } @@ -55,7 +57,7 @@ int32_t TVMGraphRuntimeModule_Create(TVMValue* args, int* tcodes, int nargs, TVM return kTvmErrorGraphModuleBadContext; } - TVMContext ctx = {(DLDeviceType) args[2].v_int64, (int) args[3].v_int64}; + TVMContext ctx = {(DLDeviceType)args[2].v_int64, (int)args[3].v_int64}; graph_runtime.runtime = TVMGraphRuntime_Create(args[0].v_str, args[1].v_handle, &ctx); TVMModuleHandle out; @@ -71,7 +73,8 @@ int32_t TVMGraphRuntimeModule_Create(TVMValue* args, int* tcodes, int nargs, TVM return kTvmErrorNoError; } -int32_t TVMGraphRuntimeModule_GetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { +int32_t TVMGraphRuntimeModule_GetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, + int* ret_tcodes, void* resource_handle) { if (nargs != 1) { return kTvmErrorFunctionCallNumArguments; } @@ -85,14 +88,16 @@ int32_t TVMGraphRuntimeModule_GetInput(TVMValue* args, int* tcodes, int nargs, T return kTvmErrorGraphModuleNoSuchInput; } - uint32_t eid = TVMGraphRuntime_GetEntryId( - graph_runtime.runtime, graph_runtime.runtime->input_nodes[index], 0); - ret_values[0].v_handle = (void*) &graph_runtime.runtime->data_entry[eid].dl_tensor; + uint32_t eid = TVMGraphRuntime_GetEntryId(graph_runtime.runtime, + graph_runtime.runtime->input_nodes[index], 0); + ret_values[0].v_handle = (void*)&graph_runtime.runtime->data_entry[eid].dl_tensor; ret_tcodes[0] = kTVMNDArrayHandle; return 0; } -int32_t TVMGraphRuntimeModule_GetNumInputs(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { +int32_t TVMGraphRuntimeModule_GetNumInputs(TVMValue* args, int* tcodes, int nargs, + TVMValue* ret_values, int* ret_tcodes, + void* resource_handle) { if (nargs != 0) { return kTvmErrorFunctionCallNumArguments; } @@ -102,7 +107,9 @@ int32_t TVMGraphRuntimeModule_GetNumInputs(TVMValue* args, int* tcodes, int narg return 0; } -int32_t TVMGraphRuntimeModule_GetNumOutputs(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { +int32_t TVMGraphRuntimeModule_GetNumOutputs(TVMValue* args, int* tcodes, int nargs, + TVMValue* ret_values, int* ret_tcodes, + void* resource_handle) { if (nargs != 0) { return kTvmErrorFunctionCallNumArguments; } @@ -112,7 +119,9 @@ int32_t TVMGraphRuntimeModule_GetNumOutputs(TVMValue* args, int* tcodes, int nar return 0; } -int32_t TVMGraphRuntimeModule_GetOutput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { +int32_t TVMGraphRuntimeModule_GetOutput(TVMValue* args, int* tcodes, int nargs, + TVMValue* ret_values, int* ret_tcodes, + void* resource_handle) { if (nargs != 1) { return kTvmErrorFunctionCallNumArguments; } @@ -130,12 +139,14 @@ int32_t TVMGraphRuntimeModule_GetOutput(TVMValue* args, int* tcodes, int nargs, uint32_t index = graph_runtime.runtime->outputs[output_index].index; uint32_t eid = TVMGraphRuntime_GetEntryId(graph_runtime.runtime, nid, index); - ret_values[0].v_handle = (void*) &(graph_runtime.runtime->data_entry[eid].dl_tensor); + ret_values[0].v_handle = (void*)&(graph_runtime.runtime->data_entry[eid].dl_tensor); ret_tcodes[0] = kTVMNDArrayHandle; return 0; } -int32_t TVMGraphRuntimeModule_LoadParams(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { +int32_t TVMGraphRuntimeModule_LoadParams(TVMValue* args, int* tcodes, int nargs, + TVMValue* ret_values, int* ret_tcodes, + void* resource_handle) { if (nargs != 1) { return kTvmErrorFunctionCallNumArguments; } @@ -146,11 +157,12 @@ int32_t TVMGraphRuntimeModule_LoadParams(TVMValue* args, int* tcodes, int nargs, ret_tcodes[0] = kTVMNullptr; - TVMByteArray* arr = (TVMByteArray*) args[0].v_handle; + TVMByteArray* arr = (TVMByteArray*)args[0].v_handle; return TVMGraphRuntime_LoadParams(graph_runtime.runtime, arr->data, arr->size); } -int32_t TVMGraphRuntimeModule_Run(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { +int32_t TVMGraphRuntimeModule_Run(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, + int* ret_tcodes, void* resource_handle) { if (nargs != 0) { return kTvmErrorFunctionCallNumArguments; } @@ -161,7 +173,8 @@ int32_t TVMGraphRuntimeModule_Run(TVMValue* args, int* tcodes, int nargs, TVMVal return 0; } -int32_t TVMGraphRuntimeModule_SetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { +int32_t TVMGraphRuntimeModule_SetInput(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, + int* ret_tcodes, void* resource_handle) { if (nargs != 2) { return kTvmErrorFunctionCallNumArguments; } @@ -170,28 +183,25 @@ int32_t TVMGraphRuntimeModule_SetInput(TVMValue* args, int* tcodes, int nargs, T return kTvmErrorFunctionCallWrongArgType; } - TVMGraphRuntime_SetInput(graph_runtime.runtime, args[0].v_str, (DLTensor*) args[1].v_handle); + TVMGraphRuntime_SetInput(graph_runtime.runtime, args[0].v_str, (DLTensor*)args[1].v_handle); ret_tcodes[0] = kTVMNullptr; return 0; } -int32_t TVMGraphRuntimeModule_NotImplemented(TVMValue* args, int* tcodes, int nargs, TVMValue* ret_values, int* ret_tcodes, void* resource_handle) { +int32_t TVMGraphRuntimeModule_NotImplemented(TVMValue* args, int* tcodes, int nargs, + TVMValue* ret_values, int* ret_tcodes, + void* resource_handle) { return kTvmErrorFunctionCallNotImplemented; } static const TVMBackendPackedCFunc graph_runtime_registry_funcs[] = { - &TVMGraphRuntimeModule_GetInput, - &TVMGraphRuntimeModule_GetNumInputs, - &TVMGraphRuntimeModule_GetNumOutputs, - &TVMGraphRuntimeModule_GetOutput, - &TVMGraphRuntimeModule_LoadParams, - &TVMGraphRuntimeModule_Run, - &TVMGraphRuntimeModule_SetInput, - &TVMGraphRuntimeModule_NotImplemented, + &TVMGraphRuntimeModule_GetInput, &TVMGraphRuntimeModule_GetNumInputs, + &TVMGraphRuntimeModule_GetNumOutputs, &TVMGraphRuntimeModule_GetOutput, + &TVMGraphRuntimeModule_LoadParams, &TVMGraphRuntimeModule_Run, + &TVMGraphRuntimeModule_SetInput, &TVMGraphRuntimeModule_NotImplemented, }; - static const TVMFuncRegistry graph_runtime_registry = { "\x08get_input\0" "get_num_inputs\0" @@ -204,8 +214,8 @@ static const TVMFuncRegistry graph_runtime_registry = { graph_runtime_registry_funcs}; tvm_crt_error_t TVMGraphRuntimeModule_Register() { - graph_runtime.mod.registry = &graph_runtime_registry; - graph_runtime.runtime = NULL; + graph_runtime.mod.registry = &graph_runtime_registry; + graph_runtime.runtime = NULL; - return TVMFuncRegisterGlobal("tvm.graph_runtime.create", &TVMGraphRuntimeModule_Create, 0); + return TVMFuncRegisterGlobal("tvm.graph_runtime.create", &TVMGraphRuntimeModule_Create, 0); } diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc index 8705ca899103..41f2dc3b0a1b 100644 --- a/src/runtime/crt/host/main.cc +++ b/src/runtime/crt/host/main.cc @@ -47,8 +47,8 @@ ssize_t UTvmWriteFunc(void* context, const uint8_t* data, size_t num_bytes) { return to_return; } -size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, - const char* fmt, va_list args) { +size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, const char* fmt, + va_list args) { return vsnprintf(out_buf, out_buf_size_bytes, fmt, args); } diff --git a/src/runtime/crt/utvm_rpc_server/rpc_server.cc b/src/runtime/crt/utvm_rpc_server/rpc_server.cc index 84930866367e..6674d5993cc6 100644 --- a/src/runtime/crt/utvm_rpc_server/rpc_server.cc +++ b/src/runtime/crt/utvm_rpc_server/rpc_server.cc @@ -219,8 +219,7 @@ void TVMLogf(const char* format, ...) { va_list args; char log_buffer[256]; va_start(args, format); - size_t num_bytes_logged = TVMPlatformFormatMessage( - log_buffer, sizeof(log_buffer), format, args); + size_t num_bytes_logged = TVMPlatformFormatMessage(log_buffer, sizeof(log_buffer), format, args); va_end(args); // Most header-based logging frameworks tend to insert '\n' at the end of the log message. diff --git a/src/runtime/graph/graph_runtime.cc b/src/runtime/graph/graph_runtime.cc index 423a2d62ea93..e0401134cccc 100644 --- a/src/runtime/graph/graph_runtime.cc +++ b/src/runtime/graph/graph_runtime.cc @@ -261,8 +261,8 @@ void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) { TVMContext ctx = args[3]; // Get pre-linked parameter lookup function, if it was generated. When pf == nullptr, no linked // params are present. - tvm::runtime::PackedFunc pf = mod.GetFunction( - ::tvm::runtime::symbol::tvm_lookup_linked_param, true); + tvm::runtime::PackedFunc pf = + mod.GetFunction(::tvm::runtime::symbol::tvm_lookup_linked_param, true); if (pf == nullptr) { *rv = nullptr; return; @@ -274,9 +274,8 @@ void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) { return; } - std::vector shape_vec{ - template_tensor->shape, - template_tensor->shape + template_tensor->ndim}; + std::vector shape_vec{template_tensor->shape, + template_tensor->shape + template_tensor->ndim}; std::unique_ptr container{new NDArray::Container( static_cast(opaque_handle), shape_vec, template_tensor->dtype, ctx)}; @@ -297,7 +296,6 @@ std::string List2String(std::vector shape) { return ss.str(); } - void GraphRuntime::SetupStorage() { // Grab saved optimization plan from graph. std::vector vtype; @@ -335,10 +333,10 @@ void GraphRuntime::SetupStorage() { TVMRetValue lookup_rv; { std::vector shape_vec{attrs_.shape[i].begin(), attrs_.shape[i].end()}; - DLTensor template_tensor{ - nullptr, TVMContext{kDLCPU, 0}, static_cast(shape_vec.size()), vtype[i], shape_vec.data(), nullptr, 0}; - lookup_rv = lookup_linked_param_( - module_, sid, &template_tensor, ctxs_[0]); + DLTensor template_tensor{nullptr, TVMContext{kDLCPU, 0}, static_cast(shape_vec.size()), + vtype[i], shape_vec.data(), nullptr, + 0}; + lookup_rv = lookup_linked_param_(module_, sid, &template_tensor, ctxs_[0]); } if (lookup_rv.type_code() != kTVMNullptr) { pool_entry[sid].linked_param = lookup_rv; diff --git a/src/runtime/graph/graph_runtime.h b/src/runtime/graph/graph_runtime.h index 9e95dfc9bf96..f1894c4830d0 100644 --- a/src/runtime/graph/graph_runtime.h +++ b/src/runtime/graph/graph_runtime.h @@ -187,8 +187,8 @@ class TVM_DLL GraphRuntime : public ModuleNode { int device_type; int param_data_entry; NDArray linked_param; -// PoolEntry(int s, int dev_type, void* pre_linked_param) : -// size(s), device_type(dev_type), pre_linked_param(std::move(pre_linked_param)) {} + // PoolEntry(int s, int dev_type, void* pre_linked_param) : + // size(s), device_type(dev_type), pre_linked_param(std::move(pre_linked_param)) {} }; // Node entry struct NodeEntry { @@ -420,7 +420,6 @@ class TVM_DLL GraphRuntime : public ModuleNode { std::vector> op_execs_; /*! \brief Linked parameter lookup function. */ PackedFunc lookup_linked_param_; - }; std::vector GetAllContext(const TVMArgs& args, int ctx_start_arg); diff --git a/src/runtime/rpc/rpc_module.cc b/src/runtime/rpc/rpc_module.cc index 0ddd13572949..c34ec26fb6c4 100644 --- a/src/runtime/rpc/rpc_module.cc +++ b/src/runtime/rpc/rpc_module.cc @@ -37,7 +37,6 @@ namespace tvm { namespace runtime { - // deleter of RPC remote array static void RemoteNDArrayDeleter(Object* obj) { auto* ptr = static_cast(obj); @@ -59,22 +58,23 @@ static void RemoteNDArrayDeleter(Object* obj) { * needs to be explicitly deleted after the NDArray is freed, this function should do that. * \param deleter_ctx An opaque pointer passed to deleter to identify the tensor being deleted. */ -NDArray NDArrayFromRemoteOpaqueHandle(std::shared_ptr sess, void* handle, DLTensor* template_tensor, TVMContext ctx, ADTObj::FDeleter deleter, void* deleter_ctx) { +NDArray NDArrayFromRemoteOpaqueHandle(std::shared_ptr sess, void* handle, + DLTensor* template_tensor, TVMContext ctx, + ADTObj::FDeleter deleter, void* deleter_ctx) { ICHECK_EQ(sess->table_index(), GetRPCSessionIndex(ctx)) - << "The TVMContext given does not belong to the given session"; + << "The TVMContext given does not belong to the given session"; RemoteSpace* space = new RemoteSpace(); space->sess = sess; space->data = handle; std::vector shape_vec{template_tensor->shape, template_tensor->shape + template_tensor->ndim}; - NDArray::Container* data = new NDArray::Container( - static_cast(space), std::move(shape_vec), template_tensor->dtype, ctx); + NDArray::Container* data = new NDArray::Container(static_cast(space), std::move(shape_vec), + template_tensor->dtype, ctx); data->manager_ctx = deleter_ctx; data->SetDeleter(deleter); return NDArray(GetObjectPtr(data)); } - /*! * \brief A wrapped remote function as a PackedFunc. */ @@ -284,7 +284,9 @@ void RPCWrappedFunc::WrapRemoteReturnToValue(TVMArgs args, TVMRetValue* rv) cons ICHECK_EQ(args.size(), 3); DLTensor* tensor = args[1]; void* nd_handle = args[2]; - *rv = NDArrayFromRemoteOpaqueHandle(sess_, tensor->data, tensor, AddRPCSessionMask(tensor->ctx, sess_->table_index()), RemoteNDArrayDeleter, nd_handle); + *rv = NDArrayFromRemoteOpaqueHandle(sess_, tensor->data, tensor, + AddRPCSessionMask(tensor->ctx, sess_->table_index()), + RemoteNDArrayDeleter, nd_handle); } else { ICHECK_EQ(args.size(), 2); *rv = args[1]; @@ -470,20 +472,23 @@ TVM_REGISTER_GLOBAL("rpc.SessTableIndex").set_body([](TVMArgs args, TVMRetValue* *rv = static_cast(m.operator->())->sess()->table_index(); }); -TVM_REGISTER_GLOBAL("tvm.rpc.NDArrayFromRemoteOpaqueHandle").set_body_typed( - [](Module mod, void* remote_array, DLTensor* template_tensor, TVMContext ctx, PackedFunc deleter) -> NDArray { -// auto func = new std::function([deleter]() -> void { -// deleter(); -// }); - return NDArrayFromRemoteOpaqueHandle( - RPCModuleGetSession(mod), remote_array, template_tensor, ctx, - [](Object* context) { -// auto container = static_cast(context); -// auto cb_func = reinterpret_cast*>(container->manager_ctx); -// (*cb_func)(); -// delete cb_func; - }, nullptr);//(void*) func); - }); +TVM_REGISTER_GLOBAL("tvm.rpc.NDArrayFromRemoteOpaqueHandle") + .set_body_typed([](Module mod, void* remote_array, DLTensor* template_tensor, TVMContext ctx, + PackedFunc deleter) -> NDArray { + // auto func = new std::function([deleter]() -> void { + // deleter(); + // }); + return NDArrayFromRemoteOpaqueHandle( + RPCModuleGetSession(mod), remote_array, template_tensor, ctx, + [](Object* context) { + // auto container = static_cast(context); + // auto cb_func = + // reinterpret_cast*>(container->manager_ctx); + // (*cb_func)(); + // delete cb_func; + }, + nullptr); //(void*) func); + }); } // namespace runtime } // namespace tvm diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc index 39ea82065377..611ce47c6126 100644 --- a/src/target/llvm/codegen_llvm.cc +++ b/src/target/llvm/codegen_llvm.cc @@ -210,9 +210,9 @@ void CodeGenLLVM::LinkParameters(const Map params) { // Once we allow more flexibility in the PrimFunc. llvm::FunctionType* ftype = llvm::FunctionType::get(t_int_, param_types, false); - llvm::Function* function = llvm::Function::Create( - ftype, llvm::Function::ExternalLinkage, - ::tvm::runtime::symbol::tvm_lookup_linked_param, module_.get()); + llvm::Function* function = + llvm::Function::Create(ftype, llvm::Function::ExternalLinkage, + ::tvm::runtime::symbol::tvm_lookup_linked_param, module_.get()); function->setCallingConv(llvm::CallingConv::C); function->setDLLStorageClass(llvm::GlobalValue::DLLStorageClassTypes::DLLExportStorageClass); @@ -220,24 +220,24 @@ void CodeGenLLVM::LinkParameters(const Map params) { builder_->SetInsertPoint(entry); std::vector zero_index_list{llvm::ConstantInt::get(t_int32_, 0)}; auto args_array = builder_->CreateBitCast( - &function->arg_begin()[0], llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)); - llvm::Value* sid = - builder_->CreateBitCast( + &function->arg_begin()[0], + llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)); + llvm::Value* sid = builder_->CreateBitCast( builder_->CreateLoad(t_void_->getPointerTo(GetGlobalAddressSpace()), - builder_->CreateInBoundsGEP(args_array, zero_index_list)), t_int64_); - // -// builder_->CreateGEP(&function->arg_begin()[0], zero_index_list), t_int64_); + builder_->CreateInBoundsGEP(args_array, zero_index_list)), + t_int64_); + // + // builder_->CreateGEP(&function->arg_begin()[0], zero_index_list), t_int64_); llvm::BasicBlock* default_block = llvm::BasicBlock::Create(*ctx_, "default_block", function); llvm::SwitchInst* switch_inst = builder_->CreateSwitch(sid, default_block, params.size() + 1); builder_->SetInsertPoint(default_block); { - auto ret_types_array = builder_->CreateBitCast( - &function->arg_begin()[4], llvm::ArrayType::get(t_int_, 1)); - builder_->CreateStore( - llvm::ConstantInt::get(t_int_, kTVMNullptr), - builder_->CreateGEP(ret_types_array, zero_index_list)); + auto ret_types_array = + builder_->CreateBitCast(&function->arg_begin()[4], llvm::ArrayType::get(t_int_, 1)); + builder_->CreateStore(llvm::ConstantInt::get(t_int_, kTVMNullptr), + builder_->CreateGEP(ret_types_array, zero_index_list)); builder_->CreateRet(ConstInt32(kTvmErrorNoError)); } @@ -249,24 +249,22 @@ void CodeGenLLVM::LinkParameters(const Map params) { array->print(os); std::string symbol_name = std::string{::tvm::runtime::symbol::tvm_param_prefix} + kv.first; llvm::GlobalVariable* param_symbol = new llvm::GlobalVariable( - *module_, array->getType(), true, llvm::GlobalValue::InternalLinkage, - array, symbol_name); + *module_, array->getType(), true, llvm::GlobalValue::InternalLinkage, array, symbol_name); llvm::BasicBlock* case_block = llvm::BasicBlock::Create(*ctx_, "case_" + symbol_name, function); switch_inst->addCase( - llvm::cast(llvm::ConstantInt::get(t_int64_, kv.second->id)), - case_block); + llvm::cast(llvm::ConstantInt::get(t_int64_, kv.second->id)), case_block); builder_->SetInsertPoint(case_block); auto retval_array = builder_->CreateBitCast( - &function->arg_begin()[3], llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)); - builder_->CreateStore( - builder_->CreatePointerCast(param_symbol, t_void_->getPointerTo(GetGlobalAddressSpace())), - builder_->CreateGEP(retval_array, zero_index_list)); - auto ret_types_array = builder_->CreateBitCast( - &function->arg_begin()[4], llvm::ArrayType::get(t_int_, 1)); + &function->arg_begin()[3], + llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)); builder_->CreateStore( - llvm::ConstantInt::get(t_int_, kTVMOpaqueHandle), - builder_->CreateGEP(ret_types_array, zero_index_list)); + builder_->CreatePointerCast(param_symbol, t_void_->getPointerTo(GetGlobalAddressSpace())), + builder_->CreateGEP(retval_array, zero_index_list)); + auto ret_types_array = + builder_->CreateBitCast(&function->arg_begin()[4], llvm::ArrayType::get(t_int_, 1)); + builder_->CreateStore(llvm::ConstantInt::get(t_int_, kTVMOpaqueHandle), + builder_->CreateGEP(ret_types_array, zero_index_list)); builder_->CreateRet(ConstInt32(0)); } diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index 9c0b979044f4..688daf6a7191 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -31,17 +31,15 @@ namespace codegen { class DLManagedTensorDeleter { public: - void operator()(DLManagedTensor* ptr) { - ptr->deleter(ptr); - } + void operator()(DLManagedTensor* ptr) { ptr->deleter(ptr); } }; llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime::NDArray arr) { llvm::Type* element_type = nullptr; auto arr_type = arr.DataType(); - CHECK_EQ(arr_type.lanes(), 1) - << "CodegenParams: only support generating 1-lane parameters; saw " << arr_type.lanes(); + CHECK_EQ(arr_type.lanes(), 1) << "CodegenParams: only support generating 1-lane parameters; saw " + << arr_type.lanes(); auto shape = arr.Shape(); int num_elements = 1; @@ -53,128 +51,121 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: std::vector elements; switch (arr_type.code()) { - case runtime::DataType::kInt: - CHECK(arr_type.bits() == 8 || - arr_type.bits() == 16 || - arr_type.bits() == 32 || - arr_type.bits() == 64) - << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " - << arr_type.bits() << "-bit array"; - element_type = llvm::Type::getIntNTy(*ctx, arr_type.bits()); - - if (arr_type.bits() == 8) { - int8_t* data_buf = static_cast(tensor->dl_tensor.data); - for (int i = 0; i < num_elements; i++) { - elements.emplace_back(llvm::ConstantInt::getSigned(element_type, data_buf[i])); - } - } else if (arr_type.bits() == 16) { - for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantInt::getSigned(element_type, ((int16_t*) tensor->dl_tensor.data)[i])); - } - } else if (arr_type.bits() == 32) { - for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantInt::getSigned(element_type, ((int32_t*) tensor->dl_tensor.data)[i])); - } - } else if (arr_type.bits() == 64) { - for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantInt::getSigned(element_type, ((int64_t*) tensor->dl_tensor.data)[i])); - } - } else { - CHECK(false) << "should not get here"; - } - break; - - case runtime::DataType::TypeCode::kUInt: - CHECK(arr_type.bits() == 8 || - arr_type.bits() == 16 || - arr_type.bits() == 32 || - arr_type.bits() == 64) - << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " - << arr_type.bits() << "-bit array"; - element_type = llvm::Type::getIntNTy(*ctx, arr_type.bits()); - - if (arr_type.bits() == 8) { - for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantInt::get(element_type, ((int8_t*) tensor->dl_tensor.data)[i])); - } - } else if (arr_type.bits() == 16) { - for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantInt::get(element_type, ((int16_t*) tensor->dl_tensor.data)[i])); - } - } else if (arr_type.bits() == 32) { - for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantInt::get(element_type, ((int32_t*) tensor->dl_tensor.data)[i])); - } - } else if (arr_type.bits() == 64) { - for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantInt::get(element_type, ((int64_t*) tensor->dl_tensor.data)[i])); + case runtime::DataType::kInt: + CHECK(arr_type.bits() == 8 || arr_type.bits() == 16 || arr_type.bits() == 32 || + arr_type.bits() == 64) + << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " + << arr_type.bits() << "-bit array"; + element_type = llvm::Type::getIntNTy(*ctx, arr_type.bits()); + + if (arr_type.bits() == 8) { + int8_t* data_buf = static_cast(tensor->dl_tensor.data); + for (int i = 0; i < num_elements; i++) { + elements.emplace_back(llvm::ConstantInt::getSigned(element_type, data_buf[i])); + } + } else if (arr_type.bits() == 16) { + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantInt::getSigned(element_type, ((int16_t*)tensor->dl_tensor.data)[i])); + } + } else if (arr_type.bits() == 32) { + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantInt::getSigned(element_type, ((int32_t*)tensor->dl_tensor.data)[i])); + } + } else if (arr_type.bits() == 64) { + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantInt::getSigned(element_type, ((int64_t*)tensor->dl_tensor.data)[i])); + } + } else { + CHECK(false) << "should not get here"; } - } else { - CHECK(false) << "should not get here"; - } - break; - - case runtime::DataType::TypeCode::kFloat: - if (arr_type.bits() == 32) { - element_type = llvm::Type::getFloatTy(*ctx); - for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantFP::get(element_type, ((float*) tensor->dl_tensor.data)[i])); + break; + + case runtime::DataType::TypeCode::kUInt: + CHECK(arr_type.bits() == 8 || arr_type.bits() == 16 || arr_type.bits() == 32 || + arr_type.bits() == 64) + << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " + << arr_type.bits() << "-bit array"; + element_type = llvm::Type::getIntNTy(*ctx, arr_type.bits()); + + if (arr_type.bits() == 8) { + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantInt::get(element_type, ((int8_t*)tensor->dl_tensor.data)[i])); + } + } else if (arr_type.bits() == 16) { + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantInt::get(element_type, ((int16_t*)tensor->dl_tensor.data)[i])); + } + } else if (arr_type.bits() == 32) { + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantInt::get(element_type, ((int32_t*)tensor->dl_tensor.data)[i])); + } + } else if (arr_type.bits() == 64) { + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantInt::get(element_type, ((int64_t*)tensor->dl_tensor.data)[i])); + } + } else { + CHECK(false) << "should not get here"; } - } else if (arr_type.bits() == 64) { - element_type = llvm::Type::getDoubleTy(*ctx); - for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantFP::get(element_type, ((double*) tensor->dl_tensor.data)[i])); + break; + + case runtime::DataType::TypeCode::kFloat: + if (arr_type.bits() == 32) { + element_type = llvm::Type::getFloatTy(*ctx); + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantFP::get(element_type, ((float*)tensor->dl_tensor.data)[i])); + } + } else if (arr_type.bits() == 64) { + element_type = llvm::Type::getDoubleTy(*ctx); + for (int i = 0; i < num_elements; i++) { + elements.emplace_back( + llvm::ConstantFP::get(element_type, ((double*)tensor->dl_tensor.data)[i])); + } + } else { + CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " + << arr_type.bits() << "-bit array"; } - } else { - CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " - << arr_type.bits() << "-bit array"; - } - break; + break; - default: - CHECK(false) << "Data type not supported"; + default: + CHECK(false) << "Data type not supported"; } - return llvm::cast( - llvm::ConstantArray::get(llvm::ArrayType::get(element_type, num_elements), - llvm::ArrayRef(elements))); + return llvm::cast(llvm::ConstantArray::get( + llvm::ArrayType::get(element_type, num_elements), llvm::ArrayRef(elements))); } - static constexpr const char* kFloatCast = "(float)"; static constexpr const char* kDoubleCast = "(double)"; static constexpr const int kMaxLineLength = 80; - void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& os) { auto arr_type = arr.DataType(); - CHECK_EQ(arr_type.lanes(), 1) - << "CodegenParams: only support generating 1-lane parameters; saw " << arr_type.lanes(); + CHECK_EQ(arr_type.lanes(), 1) << "CodegenParams: only support generating 1-lane parameters; saw " + << arr_type.lanes(); int one_element_size_bytes = (arr_type.bits() / 4) + (2 /* "0x" */) + (2 /* ", " */); if (arr_type.code() == runtime::DataType::TypeCode::kInt) { - one_element_size_bytes += 1; // sign bit + one_element_size_bytes += 1; // sign bit if (arr_type.bits() > 32) { one_element_size_bytes += 2; // "UL" } } else if (arr_type.code() == runtime::DataType::TypeCode::kUInt) { if (arr_type.bits() > 32) { - one_element_size_bytes += 1; // "L" + one_element_size_bytes += 1; // "L" } } else if (arr_type.code() == runtime::DataType::TypeCode::kFloat) { // Floats and doubles are printed as hex but casted. - one_element_size_bytes += 1 /* sign */ + 1 /* decimal point */ + - 1 /* exponent sign */ + 1 /* extra decimal digit in exponent */; + one_element_size_bytes += 1 /* sign */ + 1 /* decimal point */ + 1 /* exponent sign */ + + 1 /* extra decimal digit in exponent */; } int elements_per_row = 16; @@ -198,171 +189,207 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& std::ios::adjustfield | std::ios::basefield | std::ios::showbase); os.fill('0'); switch (arr_type.code()) { - case runtime::DataType::kInt: - CHECK(arr_type.bits() == 8 || - arr_type.bits() == 16 || - arr_type.bits() == 32 || - arr_type.bits() == 64) - << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " - << arr_type.bits() << "-bit array"; - - if (arr_type.bits() == 8) { - for (int i = 0; i < num_elements; i++) { - // NOTE: for special types int8_t and uint8_t, need to promote to int type to avoid printing - // as a char. - int8_t elem = static_cast(tensor->dl_tensor.data)[i]; - uint16_t to_print; - if (elem < 0) { - os << "-"; - to_print = -elem; - } else { - os << "+"; - to_print = elem; + case runtime::DataType::kInt: + CHECK(arr_type.bits() == 8 || arr_type.bits() == 16 || arr_type.bits() == 32 || + arr_type.bits() == 64) + << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " + << arr_type.bits() << "-bit array"; + + if (arr_type.bits() == 8) { + for (int i = 0; i < num_elements; i++) { + // NOTE: for special types int8_t and uint8_t, need to promote to int type to avoid + // printing as a char. + int8_t elem = static_cast(tensor->dl_tensor.data)[i]; + uint16_t to_print; + if (elem < 0) { + os << "-"; + to_print = -elem; + } else { + os << "+"; + to_print = elem; + } + os << "0x" << std::setw(2) << +static_cast(to_print); + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } } - os << "0x" << std::setw(2) << +static_cast(to_print); - if (i < num_elements - 1) { os << ", "; } - if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } - } - } else if (arr_type.bits() == 16) { - for (int i = 0; i < num_elements; i++) { - int16_t elem = static_cast(tensor->dl_tensor.data)[i]; - uint16_t to_print; - if (elem < 0) { - os << "-"; - to_print = -elem; - } else { - os << "+"; - to_print = elem; + } else if (arr_type.bits() == 16) { + for (int i = 0; i < num_elements; i++) { + int16_t elem = static_cast(tensor->dl_tensor.data)[i]; + uint16_t to_print; + if (elem < 0) { + os << "-"; + to_print = -elem; + } else { + os << "+"; + to_print = elem; + } + os << "0x" << std::setw(4) << to_print; + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } } - os << "0x" << std::setw(4) << to_print; - if (i < num_elements - 1) { os << ", "; } - if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } - } - } else if (arr_type.bits() == 32) { - for (int i = 0; i < num_elements; i++) { - int32_t elem = static_cast(tensor->dl_tensor.data)[i]; - uint32_t to_print; - if (elem < 0) { - os << "-"; - to_print = -elem; - } else { - os << "+"; - to_print = elem; + } else if (arr_type.bits() == 32) { + for (int i = 0; i < num_elements; i++) { + int32_t elem = static_cast(tensor->dl_tensor.data)[i]; + uint32_t to_print; + if (elem < 0) { + os << "-"; + to_print = -elem; + } else { + os << "+"; + to_print = elem; + } + os << "0x" << std::setw(8) << to_print; + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } } - os << "0x" << std::setw(8) << to_print; - if (i < num_elements - 1) { os << ", "; } - if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } - } - } else if (arr_type.bits() == 64) { - for (int i = 0; i < num_elements; i++) { - int64_t elem = static_cast(tensor->dl_tensor.data)[i]; - uint64_t to_print; - if (elem < 0) { - os << "-"; - to_print = -elem; - } else { - os << "+"; - to_print = elem; + } else if (arr_type.bits() == 64) { + for (int i = 0; i < num_elements; i++) { + int64_t elem = static_cast(tensor->dl_tensor.data)[i]; + uint64_t to_print; + if (elem < 0) { + os << "-"; + to_print = -elem; + } else { + os << "+"; + to_print = elem; + } + os << "0x" << std::setw(16) << to_print; + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } } - os << "0x" << std::setw(16) << to_print; - if (i < num_elements - 1) { os << ", "; } - if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } - } - } else { - CHECK(false) << "should not get here"; - } - break; - - case runtime::DataType::TypeCode::kUInt: - CHECK(arr_type.bits() == 8 || - arr_type.bits() == 16 || - arr_type.bits() == 32 || - arr_type.bits() == 64) - << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " - << arr_type.bits() << "-bit array"; - - if (arr_type.bits() == 8) { - for (int i = 0; i < num_elements; i++) { - // NOTE: for special types int8_t and uint8_t, need to promote to int type to avoid printing - // as a char. - os << "0x" << std::setw(2) - << +static_cast(static_cast(tensor->dl_tensor.data)[i]); - if (i < num_elements - 1) { os << ", "; } - if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } + } else { + CHECK(false) << "should not get here"; } - } else if (arr_type.bits() == 16) { - for (int i = 0; i < num_elements; i++) { - os << "0x" << std::setw(4) << static_cast(tensor->dl_tensor.data)[i]; - if (i < num_elements - 1) { os << ", "; } - if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } - } - } else if (arr_type.bits() == 32) { - for (int i = 0; i < num_elements; i++) { - os << "0x" << std::setw(8) << static_cast(tensor->dl_tensor.data)[i]; - if (i < num_elements - 1) { os << ", "; } - if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } - } - } else if (arr_type.bits() == 64) { - for (int i = 0; i < num_elements; i++) { - os << "0x" << std::setw(16) << static_cast(tensor->dl_tensor.data)[i] << "UL"; - if (i < num_elements - 1) { os << ", "; } - if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } - } - } else { - CHECK(false) << "should not get here"; - } - break; - - case runtime::DataType::TypeCode::kFloat: { - std::stringstream ss; - ss.setf(std::ios::hex | std::ios::showbase | std::ios::fixed | std::ios::scientific, - std::ios::basefield | std::ios::showbase | std::ios::floatfield); - os.fill(' '); - os.setf(std::ios::left, std::ios::adjustfield); - if (arr_type.bits() == 32) { - for (int i = 0; i < num_elements; i++) { - float elem = static_cast(tensor->dl_tensor.data)[i]; - if (std::isinf(elem)) { - // C99 standard. - os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; - } else if (std::isnan(elem)) { - // GNU extension, implemenatation-dependent. - os << std::setw(one_element_size_bytes) << "NAN"; - } else { - ss << elem; - os << std::setw(one_element_size_bytes) << ss.str(); - ss.str(""); + break; + + case runtime::DataType::TypeCode::kUInt: + CHECK(arr_type.bits() == 8 || arr_type.bits() == 16 || arr_type.bits() == 32 || + arr_type.bits() == 64) + << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " + << arr_type.bits() << "-bit array"; + + if (arr_type.bits() == 8) { + for (int i = 0; i < num_elements; i++) { + // NOTE: for special types int8_t and uint8_t, need to promote to int type to avoid + // printing as a char. + os << "0x" << std::setw(2) + << +static_cast(static_cast(tensor->dl_tensor.data)[i]); + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } + } else if (arr_type.bits() == 16) { + for (int i = 0; i < num_elements; i++) { + os << "0x" << std::setw(4) << static_cast(tensor->dl_tensor.data)[i]; + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } } - if (i < num_elements - 1) { os << ", "; } - if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } + } else if (arr_type.bits() == 32) { + for (int i = 0; i < num_elements; i++) { + os << "0x" << std::setw(8) << static_cast(tensor->dl_tensor.data)[i]; + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } + } else if (arr_type.bits() == 64) { + for (int i = 0; i < num_elements; i++) { + os << "0x" << std::setw(16) << static_cast(tensor->dl_tensor.data)[i] << "UL"; + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } + } else { + CHECK(false) << "should not get here"; } - std::cout << "\n"; - } else if (arr_type.bits() == 64) { - for (int i = 0; i < num_elements; i++) { - double elem = static_cast(tensor->dl_tensor.data)[i]; - if (std::isinf(elem)) { - // C99 standard. - os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; - } else if (std::isnan(elem)) { - // GNU extension, implemenatation-dependent. - os << std::setw(one_element_size_bytes) << "NAN"; - } else { - ss << elem; - os << std::setw(one_element_size_bytes) << ss.str(); - ss.str(""); + break; + + case runtime::DataType::TypeCode::kFloat: { + std::stringstream ss; + ss.setf(std::ios::hex | std::ios::showbase | std::ios::fixed | std::ios::scientific, + std::ios::basefield | std::ios::showbase | std::ios::floatfield); + os.fill(' '); + os.setf(std::ios::left, std::ios::adjustfield); + if (arr_type.bits() == 32) { + for (int i = 0; i < num_elements; i++) { + float elem = static_cast(tensor->dl_tensor.data)[i]; + if (std::isinf(elem)) { + // C99 standard. + os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; + } else if (std::isnan(elem)) { + // GNU extension, implemenatation-dependent. + os << std::setw(one_element_size_bytes) << "NAN"; + } else { + ss << elem; + os << std::setw(one_element_size_bytes) << ss.str(); + ss.str(""); + } + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } } - if (i < num_elements - 1) { os << ", "; } - if (((i + 1) % elements_per_row) == 0) { os << "\n" << indent_str; } + std::cout << "\n"; + } else if (arr_type.bits() == 64) { + for (int i = 0; i < num_elements; i++) { + double elem = static_cast(tensor->dl_tensor.data)[i]; + if (std::isinf(elem)) { + // C99 standard. + os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; + } else if (std::isnan(elem)) { + // GNU extension, implemenatation-dependent. + os << std::setw(one_element_size_bytes) << "NAN"; + } else { + ss << elem; + os << std::setw(one_element_size_bytes) << ss.str(); + ss.str(""); + } + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } + } else { + CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " + << arr_type.bits() << "-bit array"; } - } else { - CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " - << arr_type.bits() << "-bit array"; + break; } - break; - } - default: - CHECK(false) << "Data type not supported"; + default: + CHECK(false) << "Data type not supported"; } if (num_elements % elements_per_row != 0) { diff --git a/src/target/llvm/codegen_params.h b/src/target/llvm/codegen_params.h index 8b8ba4f23cc6..6e3c3e8eafd6 100644 --- a/src/target/llvm/codegen_params.h +++ b/src/target/llvm/codegen_params.h @@ -24,10 +24,11 @@ #ifndef TVM_TARGET_LLVM_CODEGEN_PARAMS_H_ #define TVM_TARGET_LLVM_CODEGEN_PARAMS_H_ -#include "llvm_common.h" #include #include +#include "llvm_common.h" + namespace tvm { namespace codegen { @@ -35,14 +36,11 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& os); -void LLVMCodeGenParams(llvm::LLVMContext* ctx, - llvm::Module* module, - int64_t storage_id_offset, +void LLVMCodeGenParams(llvm::LLVMContext* ctx, llvm::Module* module, int64_t storage_id_offset, ::tvm::runtime::Array param_names, ::tvm::runtime::Array params_by_sid); - } // namespace codegen } // namespace tvm -#endif // TVM_TARGET_LLVM_CODEGEN_PARAMS_H_ +#endif // TVM_TARGET_LLVM_CODEGEN_PARAMS_H_ diff --git a/src/target/llvm/llvm_module.cc b/src/target/llvm/llvm_module.cc index ab2fcee00b9e..73a3594427d3 100644 --- a/src/target/llvm/llvm_module.cc +++ b/src/target/llvm/llvm_module.cc @@ -206,11 +206,12 @@ class LLVMModuleNode final : public runtime::ModuleNode { for (auto kv : mod->functions) { if (could_have_linked_params && kv.first->name_hint == ::tvm::runtime::symbol::tvm_lookup_linked_param) { - Map attrs_dict = Downcast>(kv.second->attrs->dict); + Map attrs_dict = + Downcast>(kv.second->attrs->dict); CHECK(attrs_dict.find(::tvm::tir::attr::kLinkedParams) != attrs_dict.end()) - << "no " << ::tvm::tir::attr::kLinkedParams << " attribute found!"; - linked_params = Downcast>( - attrs_dict[::tvm::tir::attr::kLinkedParams]); + << "no " << ::tvm::tir::attr::kLinkedParams << " attribute found!"; + linked_params = + Downcast>(attrs_dict[::tvm::tir::attr::kLinkedParams]); found_linked_params = true; continue; } diff --git a/src/target/source/codegen_c_host.cc b/src/target/source/codegen_c_host.cc index 915d43cffb13..f47e07e94bd1 100644 --- a/src/target/source/codegen_c_host.cc +++ b/src/target/source/codegen_c_host.cc @@ -23,8 +23,8 @@ #include "codegen_c_host.h" #include -#include #include +#include #include #include @@ -67,8 +67,7 @@ void CodeGenCHost::LinkParameters(Map params) { << "int* out_ret_tcode, void* resource_handle) {\n"; ICHECK_EQ(GetUniqueName(tvm::runtime::symbol::tvm_lookup_linked_param), tvm::runtime::symbol::tvm_lookup_linked_param) - << "builtin PackedFunc name already taken: " - << tvm::runtime::symbol::tvm_lookup_linked_param; + << "builtin PackedFunc name already taken: " << tvm::runtime::symbol::tvm_lookup_linked_param; stream << " switch (((int64_t*) args)[0]) {\n" << " default:\n" << " out_ret_tcode[0] = " << kTVMNullptr << ";\n" @@ -86,15 +85,16 @@ void CodeGenCHost::LinkParameters(Map params) { num_elements *= dim; } PrintType(kv.second->param.DataType(), decl_stream); - decl_stream << " " << ::tvm::runtime::symbol::tvm_param_prefix - << kv.first << "[" << num_elements << "] = {\n"; + decl_stream << " " << ::tvm::runtime::symbol::tvm_param_prefix << kv.first << "[" + << num_elements << "] = {\n"; NDArrayDataToC(kv.second->param, 4, decl_stream); decl_stream << "};\n" << "#ifdef __cplusplus\n" << "} // extern \"C\"\n" << "#endif\n"; stream << " case " << kv.second->id << ":\n" - << " ((uint64_t*)out_ret_value)[0] = (uint64_t) (uintptr_t) " << ::tvm::runtime::symbol::tvm_param_prefix << kv.first << ";\n" + << " ((uint64_t*)out_ret_value)[0] = (uint64_t) (uintptr_t) " + << ::tvm::runtime::symbol::tvm_param_prefix << kv.first << ";\n" << " out_ret_tcode[0] = " << kTVMOpaqueHandle << ";\n" << " return 0;\n"; } @@ -352,17 +352,17 @@ runtime::Module BuildCHost(IRModule mod, Target target) { CodeGenCHost cg; cg.Init(output_ssa, emit_asserts, target->str()); - Map linked_params; + Map linked_params; bool found_linked_params = false; bool could_have_linked_params = target->GetAttr("link-params").value_or(Bool(false)); for (auto kv : mod->functions) { if (could_have_linked_params && kv.first->name_hint == ::tvm::runtime::symbol::tvm_lookup_linked_param) { - Map attrs_dict = Downcast>(kv.second->attrs->dict); + Map attrs_dict = Downcast>(kv.second->attrs->dict); CHECK(attrs_dict.find(::tvm::tir::attr::kLinkedParams) != attrs_dict.end()) - << "no " << ::tvm::tir::attr::kLinkedParams << " attribute found!"; - linked_params = Downcast>( - attrs_dict[::tvm::tir::attr::kLinkedParams]); + << "no " << ::tvm::tir::attr::kLinkedParams << " attribute found!"; + linked_params = + Downcast>(attrs_dict[::tvm::tir::attr::kLinkedParams]); found_linked_params = true; continue; } diff --git a/tests/micro/qemu/zephyr-runtime/src/main.c b/tests/micro/qemu/zephyr-runtime/src/main.c index 91b13de7d04d..1fa32e384c0b 100644 --- a/tests/micro/qemu/zephyr-runtime/src/main.c +++ b/tests/micro/qemu/zephyr-runtime/src/main.c @@ -57,8 +57,8 @@ ssize_t write_serial(void* unused_context, const uint8_t* data, size_t size) { return size; } -size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, - const char* fmt, va_list args) { +size_t TVMPlatformFormatMessage(char* out_buf, size_t out_buf_size_bytes, const char* fmt, + va_list args) { return vsnprintk(out_buf, out_buf_size_bytes, fmt, args); } From 261eda746032407416bd6a745821e3a92791eb1d Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Fri, 13 Nov 2020 15:13:29 -0800 Subject: [PATCH 16/60] address c++ lint --- src/relay/backend/graph_runtime_codegen.cc | 2 +- src/target/llvm/codegen_llvm.cc | 2 +- src/target/llvm/codegen_params.cc | 21 ++++++++++++--------- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/relay/backend/graph_runtime_codegen.cc b/src/relay/backend/graph_runtime_codegen.cc index a5073326c13c..b0b9d206a1a2 100644 --- a/src/relay/backend/graph_runtime_codegen.cc +++ b/src/relay/backend/graph_runtime_codegen.cc @@ -206,7 +206,7 @@ class GraphRuntimeCodegen : public backend::MemoizedExprTranslator>(); for (auto param : params_) { ret.params.emplace(std::make_pair( - param.first, std::make_pair(int(param_storage_ids_[param.first]), param.second))); + param.first, std::make_pair(static_cast(param_storage_ids_[param.first]), param.second))); } for (auto& kv : lowered_funcs_) { diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc index 611ce47c6126..dbc41a6e1a54 100644 --- a/src/target/llvm/codegen_llvm.cc +++ b/src/target/llvm/codegen_llvm.cc @@ -247,7 +247,7 @@ void CodeGenLLVM::LinkParameters(const Map params) { auto array = NDArrayToLLVMArray(ctx_, kv.second->param); std::cout << "param " << kv.first << ": "; array->print(os); - std::string symbol_name = std::string{::tvm::runtime::symbol::tvm_param_prefix} + kv.first; + std::string symbol_name = std::string(::tvm::runtime::symbol::tvm_param_prefix) + kv.first; llvm::GlobalVariable* param_symbol = new llvm::GlobalVariable( *module_, array->getType(), true, llvm::GlobalValue::InternalLinkage, array, symbol_name); diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index 688daf6a7191..958d4db0ac29 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -25,6 +25,9 @@ #include "codegen_params.h" #include +#include +#include +#include namespace tvm { namespace codegen { @@ -66,17 +69,17 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: } else if (arr_type.bits() == 16) { for (int i = 0; i < num_elements; i++) { elements.emplace_back( - llvm::ConstantInt::getSigned(element_type, ((int16_t*)tensor->dl_tensor.data)[i])); + llvm::ConstantInt::getSigned(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else if (arr_type.bits() == 32) { for (int i = 0; i < num_elements; i++) { elements.emplace_back( - llvm::ConstantInt::getSigned(element_type, ((int32_t*)tensor->dl_tensor.data)[i])); + llvm::ConstantInt::getSigned(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else if (arr_type.bits() == 64) { for (int i = 0; i < num_elements; i++) { elements.emplace_back( - llvm::ConstantInt::getSigned(element_type, ((int64_t*)tensor->dl_tensor.data)[i])); + llvm::ConstantInt::getSigned(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else { CHECK(false) << "should not get here"; @@ -93,22 +96,22 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: if (arr_type.bits() == 8) { for (int i = 0; i < num_elements; i++) { elements.emplace_back( - llvm::ConstantInt::get(element_type, ((int8_t*)tensor->dl_tensor.data)[i])); + llvm::ConstantInt::get(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else if (arr_type.bits() == 16) { for (int i = 0; i < num_elements; i++) { elements.emplace_back( - llvm::ConstantInt::get(element_type, ((int16_t*)tensor->dl_tensor.data)[i])); + llvm::ConstantInt::get(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else if (arr_type.bits() == 32) { for (int i = 0; i < num_elements; i++) { elements.emplace_back( - llvm::ConstantInt::get(element_type, ((int32_t*)tensor->dl_tensor.data)[i])); + llvm::ConstantInt::get(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else if (arr_type.bits() == 64) { for (int i = 0; i < num_elements; i++) { elements.emplace_back( - llvm::ConstantInt::get(element_type, ((int64_t*)tensor->dl_tensor.data)[i])); + llvm::ConstantInt::get(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else { CHECK(false) << "should not get here"; @@ -120,13 +123,13 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: element_type = llvm::Type::getFloatTy(*ctx); for (int i = 0; i < num_elements; i++) { elements.emplace_back( - llvm::ConstantFP::get(element_type, ((float*)tensor->dl_tensor.data)[i])); + llvm::ConstantFP::get(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else if (arr_type.bits() == 64) { element_type = llvm::Type::getDoubleTy(*ctx); for (int i = 0; i < num_elements; i++) { elements.emplace_back( - llvm::ConstantFP::get(element_type, ((double*)tensor->dl_tensor.data)[i])); + llvm::ConstantFP::get(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else { CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " From c0d2c0d3d572d7ee6d754c8c432f07ce4d15d78b Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Fri, 13 Nov 2020 15:13:43 -0800 Subject: [PATCH 17/60] git-clang-format --- src/relay/backend/graph_runtime_codegen.cc | 3 +- src/target/llvm/codegen_params.cc | 36 +++++++++++----------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/relay/backend/graph_runtime_codegen.cc b/src/relay/backend/graph_runtime_codegen.cc index b0b9d206a1a2..f35f144181c6 100644 --- a/src/relay/backend/graph_runtime_codegen.cc +++ b/src/relay/backend/graph_runtime_codegen.cc @@ -206,7 +206,8 @@ class GraphRuntimeCodegen : public backend::MemoizedExprTranslator>(); for (auto param : params_) { ret.params.emplace(std::make_pair( - param.first, std::make_pair(static_cast(param_storage_ids_[param.first]), param.second))); + param.first, + std::make_pair(static_cast(param_storage_ids_[param.first]), param.second))); } for (auto& kv : lowered_funcs_) { diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index 958d4db0ac29..a7d79c466391 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -68,18 +68,18 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: } } else if (arr_type.bits() == 16) { for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantInt::getSigned(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); + elements.emplace_back(llvm::ConstantInt::getSigned( + element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else if (arr_type.bits() == 32) { for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantInt::getSigned(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); + elements.emplace_back(llvm::ConstantInt::getSigned( + element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else if (arr_type.bits() == 64) { for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantInt::getSigned(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); + elements.emplace_back(llvm::ConstantInt::getSigned( + element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else { CHECK(false) << "should not get here"; @@ -95,23 +95,23 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: if (arr_type.bits() == 8) { for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantInt::get(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); + elements.emplace_back(llvm::ConstantInt::get( + element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else if (arr_type.bits() == 16) { for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantInt::get(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); + elements.emplace_back(llvm::ConstantInt::get( + element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else if (arr_type.bits() == 32) { for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantInt::get(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); + elements.emplace_back(llvm::ConstantInt::get( + element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else if (arr_type.bits() == 64) { for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantInt::get(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); + elements.emplace_back(llvm::ConstantInt::get( + element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else { CHECK(false) << "should not get here"; @@ -122,14 +122,14 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: if (arr_type.bits() == 32) { element_type = llvm::Type::getFloatTy(*ctx); for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantFP::get(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); + elements.emplace_back(llvm::ConstantFP::get( + element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else if (arr_type.bits() == 64) { element_type = llvm::Type::getDoubleTy(*ctx); for (int i = 0; i < num_elements; i++) { - elements.emplace_back( - llvm::ConstantFP::get(element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); + elements.emplace_back(llvm::ConstantFP::get( + element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); } } else { CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " From 601616a0d69ac4ec5d23ce2111ddb1c3438f5c68 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Fri, 13 Nov 2020 15:19:44 -0800 Subject: [PATCH 18/60] rm extra comments --- src/runtime/rpc/rpc_module.cc | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/runtime/rpc/rpc_module.cc b/src/runtime/rpc/rpc_module.cc index c34ec26fb6c4..f8f95c5b7e28 100644 --- a/src/runtime/rpc/rpc_module.cc +++ b/src/runtime/rpc/rpc_module.cc @@ -475,19 +475,9 @@ TVM_REGISTER_GLOBAL("rpc.SessTableIndex").set_body([](TVMArgs args, TVMRetValue* TVM_REGISTER_GLOBAL("tvm.rpc.NDArrayFromRemoteOpaqueHandle") .set_body_typed([](Module mod, void* remote_array, DLTensor* template_tensor, TVMContext ctx, PackedFunc deleter) -> NDArray { - // auto func = new std::function([deleter]() -> void { - // deleter(); - // }); return NDArrayFromRemoteOpaqueHandle( RPCModuleGetSession(mod), remote_array, template_tensor, ctx, - [](Object* context) { - // auto container = static_cast(context); - // auto cb_func = - // reinterpret_cast*>(container->manager_ctx); - // (*cb_func)(); - // delete cb_func; - }, - nullptr); //(void*) func); + [](Object* context) {}, nullptr); }); } // namespace runtime From cf22894043f180584beb0cc35482a13f9bc43de2 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Fri, 13 Nov 2020 15:20:03 -0800 Subject: [PATCH 19/60] git-clang-format --- src/runtime/rpc/rpc_module.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/rpc/rpc_module.cc b/src/runtime/rpc/rpc_module.cc index f8f95c5b7e28..12510e0fac3a 100644 --- a/src/runtime/rpc/rpc_module.cc +++ b/src/runtime/rpc/rpc_module.cc @@ -476,8 +476,8 @@ TVM_REGISTER_GLOBAL("tvm.rpc.NDArrayFromRemoteOpaqueHandle") .set_body_typed([](Module mod, void* remote_array, DLTensor* template_tensor, TVMContext ctx, PackedFunc deleter) -> NDArray { return NDArrayFromRemoteOpaqueHandle( - RPCModuleGetSession(mod), remote_array, template_tensor, ctx, - [](Object* context) {}, nullptr); + RPCModuleGetSession(mod), remote_array, template_tensor, ctx, [](Object* context) {}, + nullptr); }); } // namespace runtime From ad5837e91249972b2fbe2998150c639b6e098f88 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Fri, 13 Nov 2020 16:52:43 -0800 Subject: [PATCH 20/60] pylint --- python/tvm/contrib/binutils.py | 3 --- python/tvm/micro/debugger.py | 12 ++++++------ python/tvm/micro/session.py | 22 ++++++++++++++++++++++ python/tvm/relay/param_dict.py | 16 ---------------- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/python/tvm/contrib/binutils.py b/python/tvm/contrib/binutils.py index 53f92b9855fe..ea6b6320fbbd 100644 --- a/python/tvm/contrib/binutils.py +++ b/python/tvm/contrib/binutils.py @@ -17,10 +17,7 @@ """Utilities for binary file manipulation""" import logging -import os import subprocess -import tvm._ffi -from . import utils _LOG = logging.getLogger(__name__) diff --git a/python/tvm/micro/debugger.py b/python/tvm/micro/debugger.py index 9dd496a950e5..138b43d7cdc1 100644 --- a/python/tvm/micro/debugger.py +++ b/python/tvm/micro/debugger.py @@ -236,11 +236,11 @@ def write(self, data, timeout_sec): while timeout_sec == 0 or time.monotonic() < end_time: try: return self.gdb_transport_debugger.fd_transport.write(data, timeout_sec) - except OSError as e: - if e.errno == errno.EAGAIN: + except OSError as exc: + if exc.errno == errno.EAGAIN: time.sleep(0.1) continue - raise e + raise exc raise base.IoTimeoutError() @@ -249,11 +249,11 @@ def read(self, n, timeout_sec): while timeout_sec == 0 or time.monotonic() < end_time: try: return self.gdb_transport_debugger.fd_transport.read(n, timeout_sec) - except OSError as e: - if e.errno == errno.EAGAIN: + except OSError as exc: + if exc.errno == errno.EAGAIN: time.sleep(0.1) continue - raise e + raise exc raise base.IoTimeoutError() diff --git a/python/tvm/micro/session.py b/python/tvm/micro/session.py index adbad20cda06..5bc01186096f 100644 --- a/python/tvm/micro/session.py +++ b/python/tvm/micro/session.py @@ -155,6 +155,28 @@ def __exit__(self, exc_type, exc_value, exc_traceback): def lookup_remote_linked_param(mod, storage_id, template_tensor, ctx): + """Lookup a parameter that has been pre-linked into a remote (i.e. over RPC) Module. + + This function signature matches the signature built by + + Parameters + ---------- + mod : tvm.runtime.Module + The remote Module containing the pre-linked parameters. + storage_id : int + An integer identifying the pre-linked paramter to find + template_tensor : DLTensor + A DLTensor containing metadata that should be filled-in to the returned NDArray. This + function should mostly not inspect this, and just pass it along to + NDArrayFromRemoteOpaqueHandle. + ctx : TVMContext + The remote CPU context to be used with the returned NDArray. + + Returns + ------- + tvm.nd.NDArray : + NDArray containing the pre-linked parameter. + """ try: lookup_linked_param = mod.get_function("_lookup_linked_param") except KeyError: diff --git a/python/tvm/relay/param_dict.py b/python/tvm/relay/param_dict.py index 37b4f1c72c4a..503a43ce3d4b 100644 --- a/python/tvm/relay/param_dict.py +++ b/python/tvm/relay/param_dict.py @@ -77,19 +77,3 @@ def load_param_dict(param_bytes): param_bytes = bytearray(param_bytes) load_arr = _load_param_dict(param_bytes) return {v.name: v.array for v in load_arr} - - -def linkable_param_dict(graph_json, params, target): - graph = json.loads(graph_json) - data_by_sid = [None] * len(params) - for param_name, param in params.items(): - for node in graph["nodes"]: - if node["name"] == param_name: - sid = node["storage_id"] - data_by_sid[sid] = param - - # GraphRuntimeCodegen is expected to allocated the first len(params) storage_ids to contain - # parameters. - assert all(d is not None for d in data_by_sid) - - data_ From 154bf5f871ee9428ba86f9d2b85cc48ec39baa9e Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Fri, 13 Nov 2020 16:59:54 -0800 Subject: [PATCH 21/60] pylint again --- python/tvm/relay/param_dict.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tvm/relay/param_dict.py b/python/tvm/relay/param_dict.py index 503a43ce3d4b..2d0398e20486 100644 --- a/python/tvm/relay/param_dict.py +++ b/python/tvm/relay/param_dict.py @@ -16,7 +16,6 @@ # under the License. # pylint: disable=invalid-name """Helper utility to save parameter dicts.""" -import json import tvm import tvm._ffi From 4d9fc2ed22338b78b889bbe606c5424f4cc1b6c8 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Fri, 13 Nov 2020 17:16:50 -0800 Subject: [PATCH 22/60] rm debugging breaking build --- src/runtime/graph/graph_runtime.cc | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/runtime/graph/graph_runtime.cc b/src/runtime/graph/graph_runtime.cc index e0401134cccc..293de2276621 100644 --- a/src/runtime/graph/graph_runtime.cc +++ b/src/runtime/graph/graph_runtime.cc @@ -282,20 +282,6 @@ void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) { *rv = NDArray(GetObjectPtr(container.release())); } -std::string List2String(std::vector shape) { - if (shape.size() == 0) { - return "[]"; - } - - std::stringstream ss; - ss << "[" << shape[0]; - for (int i = 1; i < shape.size(); i++) { - ss << ", " << shape[i]; - } - ss << "]"; - return ss.str(); -} - void GraphRuntime::SetupStorage() { // Grab saved optimization plan from graph. std::vector vtype; @@ -373,9 +359,6 @@ void GraphRuntime::SetupStorage() { for (size_t i = 0; i < data_entry_.size(); ++i) { int storage_id = attrs_.storage_id[i]; ICHECK_LT(static_cast(storage_id), storage_pool_.size()); - LOG(INFO) << "sid " << i << ": (" << List2String(storage_pool_[storage_id].Shape()) - << ", dtype=" << storage_pool_[storage_id].DataType() << ")" - << ": setup view: " << List2String(attrs_.shape[i]); data_entry_[i] = storage_pool_[storage_id].CreateView(attrs_.shape[i], vtype[i]); const DLTensor* tmp = data_entry_[i].operator->(); From 891ccf52e7f83036f2ad8af22120435efdf0eab6 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 16 Nov 2020 19:37:04 -0800 Subject: [PATCH 23/60] fix incorrect parameter passing in GraphRuntimeModule --- src/runtime/crt/graph_runtime_module/graph_runtime_module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/crt/graph_runtime_module/graph_runtime_module.c b/src/runtime/crt/graph_runtime_module/graph_runtime_module.c index 3e73efcc62ab..2a32a0251507 100644 --- a/src/runtime/crt/graph_runtime_module/graph_runtime_module.c +++ b/src/runtime/crt/graph_runtime_module/graph_runtime_module.c @@ -114,7 +114,7 @@ int32_t TVMGraphRuntimeModule_GetNumOutputs(TVMValue* args, int* tcodes, int nar return kTvmErrorFunctionCallNumArguments; } - ret_values[0].v_int64 = TVMGraphRuntime_GetNumOutputs(); + ret_values[0].v_int64 = TVMGraphRuntime_GetNumOutputs(graph_runtime.runtime); ret_tcodes[0] = kTVMArgInt; return 0; } @@ -131,7 +131,7 @@ int32_t TVMGraphRuntimeModule_GetOutput(TVMValue* args, int* tcodes, int nargs, } int output_index = args[0].v_int64; - if (output_index < 0 || output_index > TVMGraphRuntime_GetNumOutputs()) { + if (output_index < 0 || output_index > TVMGraphRuntime_GetNumOutputs(graph_runtime.runtime)) { return kTvmErrorGraphModuleNoSuchInput; } From df132faf28ee9627220ea6f5bef3041a8997aaa4 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 16 Nov 2020 19:37:30 -0800 Subject: [PATCH 24/60] fixes for LLVM 4.0 and i386 --- src/target/llvm/codegen_llvm.cc | 68 +++++++++++++++------------------ 1 file changed, 31 insertions(+), 37 deletions(-) diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc index dbc41a6e1a54..1d63d35055dd 100644 --- a/src/target/llvm/codegen_llvm.cc +++ b/src/target/llvm/codegen_llvm.cc @@ -197,7 +197,7 @@ void CodeGenLLVM::LinkParameters(const Map params) { // tcodes param_types.push_back(t_int_->getPointerTo(GetGlobalAddressSpace())); // num_args - param_types.push_back(t_int64_); + param_types.push_back(t_int_); // ret_args param_types.push_back(t_void_->getPointerTo(GetGlobalAddressSpace())); // ret_tcodes @@ -219,15 +219,19 @@ void CodeGenLLVM::LinkParameters(const Map params) { llvm::BasicBlock* entry = llvm::BasicBlock::Create(*ctx_, "entry", function); builder_->SetInsertPoint(entry); std::vector zero_index_list{llvm::ConstantInt::get(t_int32_, 0)}; + std::vector zero_array_index_list{llvm::ConstantInt::get(t_int32_, 0), + llvm::ConstantInt::get(t_int32_, 0)}; auto args_array = builder_->CreateBitCast( +#if TVM_LLVM_VERSION >= 50 &function->arg_begin()[0], +#else + &(*(function->arg_begin())), +#endif llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)); llvm::Value* sid = builder_->CreateBitCast( builder_->CreateLoad(t_void_->getPointerTo(GetGlobalAddressSpace()), builder_->CreateInBoundsGEP(args_array, zero_index_list)), t_int64_); - // - // builder_->CreateGEP(&function->arg_begin()[0], zero_index_list), t_int64_); llvm::BasicBlock* default_block = llvm::BasicBlock::Create(*ctx_, "default_block", function); llvm::SwitchInst* switch_inst = builder_->CreateSwitch(sid, default_block, params.size() + 1); @@ -235,9 +239,16 @@ void CodeGenLLVM::LinkParameters(const Map params) { builder_->SetInsertPoint(default_block); { auto ret_types_array = - builder_->CreateBitCast(&function->arg_begin()[4], llvm::ArrayType::get(t_int_, 1)); + builder_->CreateBitCast( +#if TVM_LLVM_VERSION >= 50 + &function->arg_begin()[4], +#else + &(*(std::next(function->arg_begin(), 4))), +#endif + llvm::ArrayType::get(t_int_, 1)->getPointerTo()); + builder_->CreateStore(llvm::ConstantInt::get(t_int_, kTVMNullptr), - builder_->CreateGEP(ret_types_array, zero_index_list)); + builder_->CreateGEP(ret_types_array, zero_array_index_list)); builder_->CreateRet(ConstInt32(kTvmErrorNoError)); } @@ -245,8 +256,6 @@ void CodeGenLLVM::LinkParameters(const Map params) { for (auto kv : params) { auto array = NDArrayToLLVMArray(ctx_, kv.second->param); - std::cout << "param " << kv.first << ": "; - array->print(os); std::string symbol_name = std::string(::tvm::runtime::symbol::tvm_param_prefix) + kv.first; llvm::GlobalVariable* param_symbol = new llvm::GlobalVariable( *module_, array->getType(), true, llvm::GlobalValue::InternalLinkage, array, symbol_name); @@ -256,42 +265,27 @@ void CodeGenLLVM::LinkParameters(const Map params) { llvm::cast(llvm::ConstantInt::get(t_int64_, kv.second->id)), case_block); builder_->SetInsertPoint(case_block); auto retval_array = builder_->CreateBitCast( +#if TVM_LLVM_VERSION >= 50 &function->arg_begin()[3], - llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)); +#else + &(*std::next(function->arg_begin(), 3)), +#endif + llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)->getPointerTo()); builder_->CreateStore( builder_->CreatePointerCast(param_symbol, t_void_->getPointerTo(GetGlobalAddressSpace())), - builder_->CreateGEP(retval_array, zero_index_list)); + builder_->CreateGEP(retval_array, zero_array_index_list)); auto ret_types_array = - builder_->CreateBitCast(&function->arg_begin()[4], llvm::ArrayType::get(t_int_, 1)); - builder_->CreateStore(llvm::ConstantInt::get(t_int_, kTVMOpaqueHandle), - builder_->CreateGEP(ret_types_array, zero_index_list)); + builder_->CreateBitCast( +#if TVM_LLVM_VERSION >= 50 + &function->arg_begin()[4], +#else + &(*std::next(function->arg_begin(), 4)), +#endif + llvm::ArrayType::get(t_int_, 1)->getPointerTo()); + builder_->CreateStore(llvm::ConstantInt::get(t_int_, kTVMOpaqueHandle), + builder_->CreateGEP(ret_types_array, zero_array_index_list)); builder_->CreateRet(ConstInt32(0)); } - - std::cout << "generated function: " << std::endl; - function->print(os); - - // llvm::Value* sid_start = module_->getGlobalVariable(module::tvm_param_array_sid_start); - // llvm::Value* cond = builder_->CreateAnd( - // builder_->CreateICmpSGE(sid, sid_start), - // builder_->CreateICmpSLT(sid, - // module_->getGlobalVariable(module::tvm_param_array_sid_end))); - - // BasicBlock* then_block = BasicBlock::Create(*ctx_, "if_then", function_); - // builder_->CreateCondBr(cond, then_block, else_block); - - // // SID valid block (fetch sid data pointer and write to ret_values). - // builder_->SetInsertPoint(then_block); - // std::vector sid_index_list{builder_->CreateISub(sid, sid_start)}; - // builder_->CreateStore( - // builder_->CreateGEP(module_->getGlobalVariable(module::tvm_param_array), sid_index_list), - // builder_->CreateBitCast( - // builder_->CreateGEP(function->getArg(3), zero_index_list), t_int64_ty_)); - // NOTE: set ret_tcode[0] to kTVMOpaqueHandle because the 'data' pointer of a DLTensor is returned - // here, *not* a proper DLTensor. It is up to the caller to create a DLTensor that correctly - // describes the returned data pointer. - - // SID invalid block (return invalid SID error). } std::unique_ptr CodeGenLLVM::Finish() { From 8bb51e426bd87ed60267f6208ca4e3063e929d76 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 16 Nov 2020 19:37:45 -0800 Subject: [PATCH 25/60] set default for --link-params --- src/target/target_kind.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc index f249ef8f529d..3294a7870d66 100644 --- a/src/target/target_kind.cc +++ b/src/target/target_kind.cc @@ -213,12 +213,12 @@ TVM_REGISTER_TARGET_KIND("llvm", kDLCPU) .add_attr_option("mfloat-abi") .add_attr_option("system-lib") .add_attr_option("runtime") - .add_attr_option("link-params") + .add_attr_option("link-params", Bool(false)) .set_default_keys({"cpu"}); TVM_REGISTER_TARGET_KIND("c", kDLCPU) .add_attr_option("system-lib") - .add_attr_option("link-params") + .add_attr_option("link-params", Bool(false)) .add_attr_option("runtime") .add_attr_option("mcpu") .set_default_keys({"cpu"}); From 03432d2c2b566fcce2d6e35ed9d2348db9c4bc8a Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 16 Nov 2020 19:39:09 -0800 Subject: [PATCH 26/60] switch link order for proper library symbol resolution --- tests/python/unittest/test_link_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/unittest/test_link_params.py b/tests/python/unittest/test_link_params.py index 3dc3122af81c..4b6692d79d10 100644 --- a/tests/python/unittest/test_link_params.py +++ b/tests/python/unittest/test_link_params.py @@ -358,7 +358,7 @@ def test_crt_link_params(): bin_opts=opts["bin_opts"], extra_libs=[ os.path.join(tvm.micro.CRT_ROOT_DIR, m) - for m in ("graph_runtime", "graph_runtime_module") + for m in ("graph_runtime_module", "graph_runtime") ], ) From b13472a9732c5947675367243818a017aa14f271 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 16 Nov 2020 20:23:34 -0800 Subject: [PATCH 27/60] git-clang-format --- src/target/llvm/codegen_llvm.cc | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc index 1d63d35055dd..11da661cceac 100644 --- a/src/target/llvm/codegen_llvm.cc +++ b/src/target/llvm/codegen_llvm.cc @@ -238,14 +238,13 @@ void CodeGenLLVM::LinkParameters(const Map params) { builder_->SetInsertPoint(default_block); { - auto ret_types_array = - builder_->CreateBitCast( + auto ret_types_array = builder_->CreateBitCast( #if TVM_LLVM_VERSION >= 50 - &function->arg_begin()[4], + &function->arg_begin()[4], #else - &(*(std::next(function->arg_begin(), 4))), + &(*(std::next(function->arg_begin(), 4))), #endif - llvm::ArrayType::get(t_int_, 1)->getPointerTo()); + llvm::ArrayType::get(t_int_, 1)->getPointerTo()); builder_->CreateStore(llvm::ConstantInt::get(t_int_, kTVMNullptr), builder_->CreateGEP(ret_types_array, zero_array_index_list)); @@ -274,16 +273,15 @@ void CodeGenLLVM::LinkParameters(const Map params) { builder_->CreateStore( builder_->CreatePointerCast(param_symbol, t_void_->getPointerTo(GetGlobalAddressSpace())), builder_->CreateGEP(retval_array, zero_array_index_list)); - auto ret_types_array = - builder_->CreateBitCast( + auto ret_types_array = builder_->CreateBitCast( #if TVM_LLVM_VERSION >= 50 - &function->arg_begin()[4], + &function->arg_begin()[4], #else &(*std::next(function->arg_begin(), 4)), #endif - llvm::ArrayType::get(t_int_, 1)->getPointerTo()); - builder_->CreateStore(llvm::ConstantInt::get(t_int_, kTVMOpaqueHandle), - builder_->CreateGEP(ret_types_array, zero_array_index_list)); + llvm::ArrayType::get(t_int_, 1)->getPointerTo()); + builder_->CreateStore(llvm::ConstantInt::get(t_int_, kTVMOpaqueHandle), + builder_->CreateGEP(ret_types_array, zero_array_index_list)); builder_->CreateRet(ConstInt32(0)); } } From 02d9744f63793e82ccb9338d37d6676852818bb9 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 16 Nov 2020 20:35:42 -0800 Subject: [PATCH 28/60] black format + pylint --- python/tvm/micro/transport/file_descriptor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tvm/micro/transport/file_descriptor.py b/python/tvm/micro/transport/file_descriptor.py index 6df6cd425eff..22377f0eea2e 100644 --- a/python/tvm/micro/transport/file_descriptor.py +++ b/python/tvm/micro/transport/file_descriptor.py @@ -20,7 +20,6 @@ import fcntl import os import select -import time from . import base From d9c7b9cc6bfc8dd6d79c188df16465b8ca8d04b9 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 16 Nov 2020 20:38:20 -0800 Subject: [PATCH 29/60] pylint again --- python/tvm/micro/transport/file_descriptor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/tvm/micro/transport/file_descriptor.py b/python/tvm/micro/transport/file_descriptor.py index 22377f0eea2e..6df6cd425eff 100644 --- a/python/tvm/micro/transport/file_descriptor.py +++ b/python/tvm/micro/transport/file_descriptor.py @@ -20,6 +20,7 @@ import fcntl import os import select +import time from . import base From 3ddadf337b171eb31c9e1b1519ee7664bd558122 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 16 Nov 2020 20:58:12 -0800 Subject: [PATCH 30/60] fix target_test to recognize --link-params --- tests/cpp/target_test.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/cpp/target_test.cc b/tests/cpp/target_test.cc index 3d528f821059..a422f12b04d7 100644 --- a/tests/cpp/target_test.cc +++ b/tests/cpp/target_test.cc @@ -147,8 +147,9 @@ TEST(TargetCreation, DeduplicateKeys) { ICHECK_EQ(target->keys.size(), 2U); ICHECK_EQ(target->keys[0], "cpu"); ICHECK_EQ(target->keys[1], "arm_cpu"); - ICHECK_EQ(target->attrs.size(), 1U); + ICHECK_EQ(target->attrs.size(), 2U); ICHECK_EQ(target->GetAttr("device"), "arm_cpu"); + ICHECK_EQ(target->GetAttr("link-params"), false); } int main(int argc, char** argv) { From ef740219d0ce61aac67fefbc74de0a06002689a1 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 17 Nov 2020 09:15:48 -0800 Subject: [PATCH 31/60] remove --link-params from default micro() target. * can be set with options=. --- python/tvm/target/target.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py index cd874b8bffe4..a8934539020a 100644 --- a/python/tvm/target/target.py +++ b/python/tvm/target/target.py @@ -236,7 +236,7 @@ def micro(model="unknown", options=None): "stm32f746xx": ["-mcpu=cortex-m7"], } opts = _merge_opts( - trans_table[model] + ["-runtime=c", "--system-lib", "--link-params", f"-model={model}"], + trans_table[model] + ["-runtime=c", "--system-lib", f"-model={model}"], options, ) From 51776082a48d1fb38d2a3a6eb92d36f924004e98 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 17 Nov 2020 09:16:21 -0800 Subject: [PATCH 32/60] import testing, somehow not needed before --- tests/python/unittest/test_crt.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/python/unittest/test_crt.py b/tests/python/unittest/test_crt.py index 3b5471d0bb8b..3d6923342652 100644 --- a/tests/python/unittest/test_crt.py +++ b/tests/python/unittest/test_crt.py @@ -28,6 +28,7 @@ import tvm import tvm.relay +import tvm.testing from tvm.topi.utils import get_const_tuple from tvm.topi.testing import conv2d_nchw_python From 1f471b1d60de219908c56fc1a028bd33f7acfb08 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 17 Nov 2020 10:20:41 -0800 Subject: [PATCH 33/60] catch correct error from remote module lookup --- python/tvm/micro/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/micro/session.py b/python/tvm/micro/session.py index 5bc01186096f..8aa4ad44f607 100644 --- a/python/tvm/micro/session.py +++ b/python/tvm/micro/session.py @@ -179,7 +179,7 @@ def lookup_remote_linked_param(mod, storage_id, template_tensor, ctx): """ try: lookup_linked_param = mod.get_function("_lookup_linked_param") - except KeyError: + except AttributeError: return None remote_data = lookup_linked_param(storage_id) From 3e1eb4a06d47e43fb9f236bcf040041c302a91ec Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 17 Nov 2020 10:27:53 -0800 Subject: [PATCH 34/60] CRT RPC-level ModuleGetFunction behaves like C++ on error * returns no error/kNullptr on function name not found --- src/runtime/crt/common/crt_runtime_api.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/runtime/crt/common/crt_runtime_api.c b/src/runtime/crt/common/crt_runtime_api.c index 9a0663fc704d..f2d67ccfbeab 100644 --- a/src/runtime/crt/common/crt_runtime_api.c +++ b/src/runtime/crt/common/crt_runtime_api.c @@ -229,17 +229,17 @@ int TVMFuncCall(TVMFunctionHandle func_handle, TVMValue* arg_values, int* type_c return func(arg_values, type_codes, num_args, ret_val, ret_type_code, resource_handle); } -static int FindFunctionOrSetAPIError(tvm_module_index_t module_index, - const TVMFuncRegistry* registry, const char* name, - TVMFunctionHandle* out) { +static tvm_crt_error_t FindFunctionOrSetAPIError(tvm_module_index_t module_index, + const TVMFuncRegistry* registry, const char* name, + TVMFunctionHandle* out) { tvm_function_index_t function_index; - if (TVMFuncRegistry_Lookup(registry, name, &function_index) != 0) { - TVMAPIErrorf("failed to get function: mod_index=%04" PRIx16 ", name=%s", module_index, name); - return -1; + tvm_crt_error_t err = TVMFuncRegistry_Lookup(registry, name, &function_index); + if (err != kTvmErrorNoError) { + return err; } *out = EncodeFunctionHandle(module_index, function_index); - return 0; + return kTvmErrorNoError; } int TVMFuncGetGlobal(const char* name, TVMFunctionHandle* out) { @@ -279,6 +279,14 @@ int ModuleGetFunction(TVMValue* args, int* type_codes, int num_args, TVMValue* r if (to_return == 0) { ret_type_codes[0] = kTVMPackedFuncHandle; + } else { + ret_value->v_handle = NULL; + } + + // NOTE: For compatibility with C++ runtime API, return no error (but NULL function) when the + // function lookup failed. + if (to_return == kTvmErrorFunctionNameNotFound) { + to_return = kTvmErrorNoError; } return to_return; From 289b1b795c229057cea3f65dae90cfd7df971d01 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 17 Nov 2020 10:29:22 -0800 Subject: [PATCH 35/60] black format --- python/tvm/target/target.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py index a8934539020a..9a609e5a334b 100644 --- a/python/tvm/target/target.py +++ b/python/tvm/target/target.py @@ -236,8 +236,7 @@ def micro(model="unknown", options=None): "stm32f746xx": ["-mcpu=cortex-m7"], } opts = _merge_opts( - trans_table[model] + ["-runtime=c", "--system-lib", f"-model={model}"], - options, + trans_table[model] + ["-runtime=c", "--system-lib", f"-model={model}"], options, ) # NOTE: in the future, the default micro target will be LLVM except when From ea0290baf41b79e7c01902cb26adc50a95100cec Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 17 Nov 2020 11:38:10 -0800 Subject: [PATCH 36/60] black format again --- python/tvm/target/target.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py index 9a609e5a334b..a8934539020a 100644 --- a/python/tvm/target/target.py +++ b/python/tvm/target/target.py @@ -236,7 +236,8 @@ def micro(model="unknown", options=None): "stm32f746xx": ["-mcpu=cortex-m7"], } opts = _merge_opts( - trans_table[model] + ["-runtime=c", "--system-lib", f"-model={model}"], options, + trans_table[model] + ["-runtime=c", "--system-lib", f"-model={model}"], + options, ) # NOTE: in the future, the default micro target will be LLVM except when From 652571ddd3824d99e0a8b9beba499439208cd4b6 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 17 Nov 2020 13:08:18 -0800 Subject: [PATCH 37/60] rm test_binutils, code is gone --- tests/python/contrib/test_binutils.py | 167 -------------------------- 1 file changed, 167 deletions(-) delete mode 100644 tests/python/contrib/test_binutils.py diff --git a/tests/python/contrib/test_binutils.py b/tests/python/contrib/test_binutils.py deleted file mode 100644 index f0aa2d157aed..000000000000 --- a/tests/python/contrib/test_binutils.py +++ /dev/null @@ -1,167 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Test various utilities for interaction with compiled binaries. - -Specifically, we test the following capabilities: - - querying the size of a binary section - - relocating sections within a binary to new addresses - - reading the contents of a binary section - - querying the address of a symbol in the binary -""" - -import tvm -from tvm import te -import subprocess -from tvm.contrib import utils -from tvm.contrib import cc -from tvm.contrib.binutils import * - -TOOLCHAIN_PREFIX = "" - - -def make_binary(): - prog = "int a = 7; \ - int main() { \ - int b = 5; \ - return 0; \ - }" - tmp_dir = utils.tempdir() - tmp_source = tmp_dir.relpath("source.c") - tmp_obj = tmp_dir.relpath("obj.obj") - with open(tmp_source, "w") as f: - f.write(prog) - cc.create_executable(tmp_obj, tmp_source, [], cc="{}gcc".format(TOOLCHAIN_PREFIX)) - prog_bin = bytearray(open(tmp_obj, "rb").read()) - return prog_bin - - -def test_tvm_callback_get_section_size(binary=None): - if binary is None: - binary = make_binary() - tmp_dir = utils.tempdir() - tmp_bin = tmp_dir.relpath("obj.bin") - with open(tmp_bin, "wb") as f: - f.write(binary) - - def verify(): - print( - "Text section size: %d" - % tvm_callback_get_section_size(tmp_bin, "text", TOOLCHAIN_PREFIX) - ) - print( - "Data section size: %d" - % tvm_callback_get_section_size(tmp_bin, "data", TOOLCHAIN_PREFIX) - ) - print( - "Bss section size: %d" % tvm_callback_get_section_size(tmp_bin, "bss", TOOLCHAIN_PREFIX) - ) - print() - - verify() - - -def test_tvm_callback_relocate_binary(): - binary = make_binary() - tmp_dir = utils.tempdir() - tmp_bin = tmp_dir.relpath("obj.bin") - with open(tmp_bin, "wb") as f: - f.write(binary) - - def verify(): - word_size = 8 - text_loc = 0x0 - rodata_loc = 0x10000 - data_loc = 0x20000 - bss_loc = 0x30000 - stack_end = 0x50000 - rel_bin = tvm_callback_relocate_binary( - tmp_bin, word_size, text_loc, rodata_loc, data_loc, bss_loc, stack_end, TOOLCHAIN_PREFIX - ) - print("Relocated binary section sizes") - test_tvm_callback_get_section_size(binary=rel_bin) - relf = tmp_dir.relpath("rel.bin") - with open(relf, "wb") as f: - f.write(rel_bin) - nm_proc = subprocess.Popen( - ["nm", "-C", "--defined-only", relf], stdout=subprocess.PIPE, stderr=subprocess.STDOUT - ) - (out, _) = nm_proc.communicate() - symbol_entries = out.decode("utf-8").split("\n") - for entry in symbol_entries: - if len(entry) == 0: - continue - sym_loc, section, sym_name = entry.split(" ") - sym_loc = int(sym_loc, 16) - if section == "T": # text - assert sym_loc >= text_loc and sym_loc < data_loc - elif section == "D": # data - assert sym_loc >= data_loc and sym_loc < bss_loc - elif section == "B": # bss - assert sym_loc >= bss_loc - - verify() - - -def test_tvm_callback_read_binary_section(): - binary = make_binary() - - def verify(): - text_bin = tvm_callback_read_binary_section(binary, "text", TOOLCHAIN_PREFIX) - data_bin = tvm_callback_read_binary_section(binary, "data", TOOLCHAIN_PREFIX) - bss_bin = tvm_callback_read_binary_section(binary, "bss", TOOLCHAIN_PREFIX) - print("Read text section part of binary? %r" % (text_bin in binary)) - print("Read data section part of binary? %r" % (data_bin in binary)) - print("Read bss section part of binary? %r" % (bss_bin in binary)) - print() - - verify() - - -def test_tvm_callback_get_symbol_map(): - binary = make_binary() - tmp_dir = utils.tempdir() - tmp_bin = tmp_dir.relpath("obj.bin") - with open(tmp_bin, "wb") as f: - f.write(binary) - - def verify(): - word_size = 8 - text_loc = 0x0 - rodata_loc = 0x10000 - data_loc = 0x20000 - bss_loc = 0x30000 - stack_end = 0x50000 - rel_bin = tvm_callback_relocate_binary( - tmp_bin, word_size, text_loc, rodata_loc, data_loc, bss_loc, stack_end, TOOLCHAIN_PREFIX - ) - symbol_map = tvm_callback_get_symbol_map(rel_bin, TOOLCHAIN_PREFIX) - symbols = set() - for i, line in enumerate(symbol_map.split("\n")): - # Every other line is the value the symbol maps to. - if i % 2 == 0: - symbols.add(line) - assert "a" in symbols - assert "main" in symbols - - verify() - - -if __name__ == "__main__": - test_tvm_callback_get_section_size() - test_tvm_callback_relocate_binary() - test_tvm_callback_read_binary_section() - test_tvm_callback_get_symbol_map() From 4976e50d3a562e667483873bc550795f6cf5a911 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 17 Nov 2020 13:18:42 -0800 Subject: [PATCH 38/60] split codegen_params into two files, C and LLVM --- src/target/llvm/codegen_params.cc | 263 +------------------------ src/target/llvm/codegen_params.h | 2 - src/target/source/codegen_c_host.cc | 2 +- src/target/source/codegen_params.cc | 295 ++++++++++++++++++++++++++++ src/target/source/codegen_params.h | 38 ++++ 5 files changed, 337 insertions(+), 263 deletions(-) create mode 100644 src/target/source/codegen_params.cc create mode 100644 src/target/source/codegen_params.h diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index a7d79c466391..243de66381e4 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -18,24 +18,23 @@ */ /*! - * \file codegen_blob.cc + * \file codegen_params.cc */ #ifdef TVM_LLVM_VERSION #include "codegen_params.h" -#include -#include -#include #include namespace tvm { namespace codegen { +namespace { class DLManagedTensorDeleter { public: void operator()(DLManagedTensor* ptr) { ptr->deleter(ptr); } }; +} llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime::NDArray arr) { llvm::Type* element_type = nullptr; @@ -145,262 +144,6 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: llvm::ArrayType::get(element_type, num_elements), llvm::ArrayRef(elements))); } -static constexpr const char* kFloatCast = "(float)"; -static constexpr const char* kDoubleCast = "(double)"; - -static constexpr const int kMaxLineLength = 80; - -void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& os) { - auto arr_type = arr.DataType(); - CHECK_EQ(arr_type.lanes(), 1) << "CodegenParams: only support generating 1-lane parameters; saw " - << arr_type.lanes(); - - int one_element_size_bytes = (arr_type.bits() / 4) + (2 /* "0x" */) + (2 /* ", " */); - if (arr_type.code() == runtime::DataType::TypeCode::kInt) { - one_element_size_bytes += 1; // sign bit - if (arr_type.bits() > 32) { - one_element_size_bytes += 2; // "UL" - } - } else if (arr_type.code() == runtime::DataType::TypeCode::kUInt) { - if (arr_type.bits() > 32) { - one_element_size_bytes += 1; // "L" - } - } else if (arr_type.code() == runtime::DataType::TypeCode::kFloat) { - // Floats and doubles are printed as hex but casted. - one_element_size_bytes += 1 /* sign */ + 1 /* decimal point */ + 1 /* exponent sign */ + - 1 /* extra decimal digit in exponent */; - } - - int elements_per_row = 16; - while (elements_per_row > 1 && - (elements_per_row * one_element_size_bytes) > (kMaxLineLength - indent_chars)) { - elements_per_row /= 2; - } - - std::string indent_str(indent_chars, ' '); - os << indent_str; - - auto shape = arr.Shape(); - int num_elements = 1; - for (auto shape_elem : shape) { - num_elements *= shape_elem; - } - - std::unique_ptr tensor(arr.ToDLPack()); - auto old_fmtflags = os.flags(); - os.setf(std::ios::internal | std::ios::hex, - std::ios::adjustfield | std::ios::basefield | std::ios::showbase); - os.fill('0'); - switch (arr_type.code()) { - case runtime::DataType::kInt: - CHECK(arr_type.bits() == 8 || arr_type.bits() == 16 || arr_type.bits() == 32 || - arr_type.bits() == 64) - << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " - << arr_type.bits() << "-bit array"; - - if (arr_type.bits() == 8) { - for (int i = 0; i < num_elements; i++) { - // NOTE: for special types int8_t and uint8_t, need to promote to int type to avoid - // printing as a char. - int8_t elem = static_cast(tensor->dl_tensor.data)[i]; - uint16_t to_print; - if (elem < 0) { - os << "-"; - to_print = -elem; - } else { - os << "+"; - to_print = elem; - } - os << "0x" << std::setw(2) << +static_cast(to_print); - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } - } else if (arr_type.bits() == 16) { - for (int i = 0; i < num_elements; i++) { - int16_t elem = static_cast(tensor->dl_tensor.data)[i]; - uint16_t to_print; - if (elem < 0) { - os << "-"; - to_print = -elem; - } else { - os << "+"; - to_print = elem; - } - os << "0x" << std::setw(4) << to_print; - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } - } else if (arr_type.bits() == 32) { - for (int i = 0; i < num_elements; i++) { - int32_t elem = static_cast(tensor->dl_tensor.data)[i]; - uint32_t to_print; - if (elem < 0) { - os << "-"; - to_print = -elem; - } else { - os << "+"; - to_print = elem; - } - os << "0x" << std::setw(8) << to_print; - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } - } else if (arr_type.bits() == 64) { - for (int i = 0; i < num_elements; i++) { - int64_t elem = static_cast(tensor->dl_tensor.data)[i]; - uint64_t to_print; - if (elem < 0) { - os << "-"; - to_print = -elem; - } else { - os << "+"; - to_print = elem; - } - os << "0x" << std::setw(16) << to_print; - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } - } else { - CHECK(false) << "should not get here"; - } - break; - - case runtime::DataType::TypeCode::kUInt: - CHECK(arr_type.bits() == 8 || arr_type.bits() == 16 || arr_type.bits() == 32 || - arr_type.bits() == 64) - << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " - << arr_type.bits() << "-bit array"; - - if (arr_type.bits() == 8) { - for (int i = 0; i < num_elements; i++) { - // NOTE: for special types int8_t and uint8_t, need to promote to int type to avoid - // printing as a char. - os << "0x" << std::setw(2) - << +static_cast(static_cast(tensor->dl_tensor.data)[i]); - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } - } else if (arr_type.bits() == 16) { - for (int i = 0; i < num_elements; i++) { - os << "0x" << std::setw(4) << static_cast(tensor->dl_tensor.data)[i]; - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } - } else if (arr_type.bits() == 32) { - for (int i = 0; i < num_elements; i++) { - os << "0x" << std::setw(8) << static_cast(tensor->dl_tensor.data)[i]; - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } - } else if (arr_type.bits() == 64) { - for (int i = 0; i < num_elements; i++) { - os << "0x" << std::setw(16) << static_cast(tensor->dl_tensor.data)[i] << "UL"; - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } - } else { - CHECK(false) << "should not get here"; - } - break; - - case runtime::DataType::TypeCode::kFloat: { - std::stringstream ss; - ss.setf(std::ios::hex | std::ios::showbase | std::ios::fixed | std::ios::scientific, - std::ios::basefield | std::ios::showbase | std::ios::floatfield); - os.fill(' '); - os.setf(std::ios::left, std::ios::adjustfield); - if (arr_type.bits() == 32) { - for (int i = 0; i < num_elements; i++) { - float elem = static_cast(tensor->dl_tensor.data)[i]; - if (std::isinf(elem)) { - // C99 standard. - os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; - } else if (std::isnan(elem)) { - // GNU extension, implemenatation-dependent. - os << std::setw(one_element_size_bytes) << "NAN"; - } else { - ss << elem; - os << std::setw(one_element_size_bytes) << ss.str(); - ss.str(""); - } - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } - std::cout << "\n"; - } else if (arr_type.bits() == 64) { - for (int i = 0; i < num_elements; i++) { - double elem = static_cast(tensor->dl_tensor.data)[i]; - if (std::isinf(elem)) { - // C99 standard. - os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; - } else if (std::isnan(elem)) { - // GNU extension, implemenatation-dependent. - os << std::setw(one_element_size_bytes) << "NAN"; - } else { - ss << elem; - os << std::setw(one_element_size_bytes) << ss.str(); - ss.str(""); - } - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } - } else { - CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " - << arr_type.bits() << "-bit array"; - } - break; - } - - default: - CHECK(false) << "Data type not supported"; - } - - if (num_elements % elements_per_row != 0) { - os << "\n"; - } - os.flags(old_fmtflags); -} - } // namespace codegen } // namespace tvm diff --git a/src/target/llvm/codegen_params.h b/src/target/llvm/codegen_params.h index 6e3c3e8eafd6..c21820aa6c3f 100644 --- a/src/target/llvm/codegen_params.h +++ b/src/target/llvm/codegen_params.h @@ -34,8 +34,6 @@ namespace codegen { llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime::NDArray arr); -void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& os); - void LLVMCodeGenParams(llvm::LLVMContext* ctx, llvm::Module* module, int64_t storage_id_offset, ::tvm::runtime::Array param_names, ::tvm::runtime::Array params_by_sid); diff --git a/src/target/source/codegen_c_host.cc b/src/target/source/codegen_c_host.cc index f47e07e94bd1..0a19fc1399b7 100644 --- a/src/target/source/codegen_c_host.cc +++ b/src/target/source/codegen_c_host.cc @@ -33,7 +33,7 @@ #include "../../support/str_escape.h" #include "../build_common.h" #include "../func_registry_generator.h" -#include "../llvm/codegen_params.h" +#include "codegen_params.h" namespace tvm { namespace codegen { diff --git a/src/target/source/codegen_params.cc b/src/target/source/codegen_params.cc new file mode 100644 index 000000000000..ae02a957467e --- /dev/null +++ b/src/target/source/codegen_params.cc @@ -0,0 +1,295 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file codegen_params.cc + */ + +#include "codegen_params.h" + +#include +#include +#include +#include + +namespace tvm { +namespace codegen { + +namespace { +class DLManagedTensorDeleter { + public: + void operator()(DLManagedTensor* ptr) { ptr->deleter(ptr); } +}; +} + +static constexpr const int kMaxLineLength = 80; + +void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& os) { + auto arr_type = arr.DataType(); + CHECK_EQ(arr_type.lanes(), 1) << "CodegenParams: only support generating 1-lane parameters; saw " + << arr_type.lanes(); + + int one_element_size_bytes = (arr_type.bits() / 4) + (2 /* "0x" */) + (2 /* ", " */); + if (arr_type.code() == runtime::DataType::TypeCode::kInt) { + one_element_size_bytes += 1; // sign bit + if (arr_type.bits() > 32) { + one_element_size_bytes += 2; // "UL" + } + } else if (arr_type.code() == runtime::DataType::TypeCode::kUInt) { + if (arr_type.bits() > 32) { + one_element_size_bytes += 1; // "L" + } + } else if (arr_type.code() == runtime::DataType::TypeCode::kFloat) { + // Floats and doubles are printed as hex but casted. + one_element_size_bytes += 1 /* sign */ + 1 /* decimal point */ + 1 /* exponent sign */ + + 1 /* extra decimal digit in exponent */; + } + + int elements_per_row = 16; + while (elements_per_row > 1 && + (elements_per_row * one_element_size_bytes) > (kMaxLineLength - indent_chars)) { + elements_per_row /= 2; + } + + std::string indent_str(indent_chars, ' '); + os << indent_str; + + auto shape = arr.Shape(); + int num_elements = 1; + for (auto shape_elem : shape) { + num_elements *= shape_elem; + } + + std::unique_ptr tensor(arr.ToDLPack()); + auto old_fmtflags = os.flags(); + os.setf(std::ios::internal | std::ios::hex, + std::ios::adjustfield | std::ios::basefield | std::ios::showbase); + os.fill('0'); + switch (arr_type.code()) { + case runtime::DataType::kInt: + CHECK(arr_type.bits() == 8 || arr_type.bits() == 16 || arr_type.bits() == 32 || + arr_type.bits() == 64) + << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " + << arr_type.bits() << "-bit array"; + + if (arr_type.bits() == 8) { + for (int i = 0; i < num_elements; i++) { + // NOTE: for special types int8_t and uint8_t, need to promote to int type to avoid + // printing as a char. + int8_t elem = static_cast(tensor->dl_tensor.data)[i]; + uint16_t to_print; + if (elem < 0) { + os << "-"; + to_print = -elem; + } else { + os << "+"; + to_print = elem; + } + os << "0x" << std::setw(2) << +static_cast(to_print); + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } + } else if (arr_type.bits() == 16) { + for (int i = 0; i < num_elements; i++) { + int16_t elem = static_cast(tensor->dl_tensor.data)[i]; + uint16_t to_print; + if (elem < 0) { + os << "-"; + to_print = -elem; + } else { + os << "+"; + to_print = elem; + } + os << "0x" << std::setw(4) << to_print; + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } + } else if (arr_type.bits() == 32) { + for (int i = 0; i < num_elements; i++) { + int32_t elem = static_cast(tensor->dl_tensor.data)[i]; + uint32_t to_print; + if (elem < 0) { + os << "-"; + to_print = -elem; + } else { + os << "+"; + to_print = elem; + } + os << "0x" << std::setw(8) << to_print; + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } + } else if (arr_type.bits() == 64) { + for (int i = 0; i < num_elements; i++) { + int64_t elem = static_cast(tensor->dl_tensor.data)[i]; + uint64_t to_print; + if (elem < 0) { + os << "-"; + to_print = -elem; + } else { + os << "+"; + to_print = elem; + } + os << "0x" << std::setw(16) << to_print; + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } + } else { + CHECK(false) << "should not get here"; + } + break; + + case runtime::DataType::TypeCode::kUInt: + CHECK(arr_type.bits() == 8 || arr_type.bits() == 16 || arr_type.bits() == 32 || + arr_type.bits() == 64) + << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " + << arr_type.bits() << "-bit array"; + + if (arr_type.bits() == 8) { + for (int i = 0; i < num_elements; i++) { + // NOTE: for special types int8_t and uint8_t, need to promote to int type to avoid + // printing as a char. + os << "0x" << std::setw(2) + << +static_cast(static_cast(tensor->dl_tensor.data)[i]); + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } + } else if (arr_type.bits() == 16) { + for (int i = 0; i < num_elements; i++) { + os << "0x" << std::setw(4) << static_cast(tensor->dl_tensor.data)[i]; + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } + } else if (arr_type.bits() == 32) { + for (int i = 0; i < num_elements; i++) { + os << "0x" << std::setw(8) << static_cast(tensor->dl_tensor.data)[i]; + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } + } else if (arr_type.bits() == 64) { + for (int i = 0; i < num_elements; i++) { + os << "0x" << std::setw(16) << static_cast(tensor->dl_tensor.data)[i] << "UL"; + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } + } else { + CHECK(false) << "should not get here"; + } + break; + + case runtime::DataType::TypeCode::kFloat: { + std::stringstream ss; + ss.setf(std::ios::hex | std::ios::showbase | std::ios::fixed | std::ios::scientific, + std::ios::basefield | std::ios::showbase | std::ios::floatfield); + os.fill(' '); + os.setf(std::ios::left, std::ios::adjustfield); + if (arr_type.bits() == 32) { + for (int i = 0; i < num_elements; i++) { + float elem = static_cast(tensor->dl_tensor.data)[i]; + if (std::isinf(elem)) { + // C99 standard. + os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; + } else if (std::isnan(elem)) { + // GNU extension, implemenatation-dependent. + os << std::setw(one_element_size_bytes) << "NAN"; + } else { + ss << elem; + os << std::setw(one_element_size_bytes) << ss.str(); + ss.str(""); + } + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } + std::cout << "\n"; + } else if (arr_type.bits() == 64) { + for (int i = 0; i < num_elements; i++) { + double elem = static_cast(tensor->dl_tensor.data)[i]; + if (std::isinf(elem)) { + // C99 standard. + os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; + } else if (std::isnan(elem)) { + // GNU extension, implemenatation-dependent. + os << std::setw(one_element_size_bytes) << "NAN"; + } else { + ss << elem; + os << std::setw(one_element_size_bytes) << ss.str(); + ss.str(""); + } + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } + } else { + CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " + << arr_type.bits() << "-bit array"; + } + break; + } + + default: + CHECK(false) << "Data type not supported"; + } + + if (num_elements % elements_per_row != 0) { + os << "\n"; + } + os.flags(old_fmtflags); +} + +} // codegen codegen +} // codegen tvm diff --git a/src/target/source/codegen_params.h b/src/target/source/codegen_params.h new file mode 100644 index 000000000000..6ef3f4fbc63e --- /dev/null +++ b/src/target/source/codegen_params.h @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file codegen_params.h + */ + +#ifndef TVM_TARGET_SOURCE_CODEGEN_PARAMS_H_ +#define TVM_TARGET_SOURCE_CODEGEN_PARAMS_H_ + +#include +#include + +namespace tvm { +namespace codegen { + +void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& os); + +} // namespace codegen +} // namespace tvm + +#endif // TVM_TARGET_SOURCE_CODEGEN_PARAMS_H_ From 219e3769864d1698628f342abe53a6ba2e1f873c Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 17 Nov 2020 13:19:10 -0800 Subject: [PATCH 39/60] git-clang-format --- src/target/llvm/codegen_params.cc | 2 +- src/target/source/codegen_params.cc | 9 +++++---- src/target/source/codegen_params.h | 3 ++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index 243de66381e4..8b405d6e92f8 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -34,7 +34,7 @@ class DLManagedTensorDeleter { public: void operator()(DLManagedTensor* ptr) { ptr->deleter(ptr); } }; -} +} // namespace llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime::NDArray arr) { llvm::Type* element_type = nullptr; diff --git a/src/target/source/codegen_params.cc b/src/target/source/codegen_params.cc index ae02a957467e..f55071f16c19 100644 --- a/src/target/source/codegen_params.cc +++ b/src/target/source/codegen_params.cc @@ -23,10 +23,11 @@ #include "codegen_params.h" +#include + #include #include #include -#include namespace tvm { namespace codegen { @@ -36,7 +37,7 @@ class DLManagedTensorDeleter { public: void operator()(DLManagedTensor* ptr) { ptr->deleter(ptr); } }; -} +} // namespace static constexpr const int kMaxLineLength = 80; @@ -291,5 +292,5 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& os.flags(old_fmtflags); } -} // codegen codegen -} // codegen tvm +} // namespace codegen +} // namespace tvm diff --git a/src/target/source/codegen_params.h b/src/target/source/codegen_params.h index 6ef3f4fbc63e..a3d277eac590 100644 --- a/src/target/source/codegen_params.h +++ b/src/target/source/codegen_params.h @@ -24,9 +24,10 @@ #ifndef TVM_TARGET_SOURCE_CODEGEN_PARAMS_H_ #define TVM_TARGET_SOURCE_CODEGEN_PARAMS_H_ -#include #include +#include + namespace tvm { namespace codegen { From 762310b416d7bb097175239e13ff5917d0d338d0 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 17 Nov 2020 13:24:50 -0800 Subject: [PATCH 40/60] cpplint --- src/target/llvm/codegen_params.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index 8b405d6e92f8..8f92e4f19b3a 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -24,6 +24,7 @@ #include "codegen_params.h" +#include #include namespace tvm { From 7af6c9a76020a75fdff8396c9ef34b8d0f7d431d Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 17 Nov 2020 13:44:28 -0800 Subject: [PATCH 41/60] add missing include for linux compilation --- src/target/source/codegen_params.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/target/source/codegen_params.cc b/src/target/source/codegen_params.cc index f55071f16c19..99f7e44ca8e7 100644 --- a/src/target/source/codegen_params.cc +++ b/src/target/source/codegen_params.cc @@ -23,6 +23,7 @@ #include "codegen_params.h" +#include #include #include From b76d2cea6419e87e584e1f865ae02ccdeaf5ce41 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 17 Nov 2020 14:43:07 -0800 Subject: [PATCH 42/60] cpplint --- src/target/source/codegen_params.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/target/source/codegen_params.cc b/src/target/source/codegen_params.cc index 99f7e44ca8e7..74524b3545d3 100644 --- a/src/target/source/codegen_params.cc +++ b/src/target/source/codegen_params.cc @@ -23,9 +23,9 @@ #include "codegen_params.h" -#include #include +#include #include #include #include From 78de39ee78d8ff72cb0e57d550cc89bb530faf27 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Sun, 22 Nov 2020 10:48:48 -0800 Subject: [PATCH 43/60] Keep binutils as it was, not part of this PR --- python/tvm/contrib/binutils.py | 280 +++++++++++++++++++++++++- tests/python/contrib/test_binutils.py | 167 +++++++++++++++ 2 files changed, 444 insertions(+), 3 deletions(-) create mode 100644 tests/python/contrib/test_binutils.py diff --git a/python/tvm/contrib/binutils.py b/python/tvm/contrib/binutils.py index ea6b6320fbbd..646362a5587f 100644 --- a/python/tvm/contrib/binutils.py +++ b/python/tvm/contrib/binutils.py @@ -16,10 +16,61 @@ # under the License. """Utilities for binary file manipulation""" -import logging +import os import subprocess +import tvm._ffi +from . import utils -_LOG = logging.getLogger(__name__) +# TODO does this file still belong in `contrib`. is it too µTVM-specific? + +# TODO shouldn't need so many `ALIGN` directives +RELOCATION_LD_SCRIPT_TEMPLATE = """ +/* linker symbol for use in UTVMInit */ +_utvm_stack_pointer_init = 0x{stack_pointer_init:x}; + +SECTIONS +{{ + . = 0x{text_start:x}; + . = ALIGN({word_size}); + .text : + {{ + . = ALIGN({word_size}); + KEEP(*(.text)) + KEEP(*(.text*)) + . = ALIGN({word_size}); + }} + + . = 0x{rodata_start:x}; + . = ALIGN({word_size}); + .rodata : + {{ + . = ALIGN({word_size}); + KEEP(*(.rodata)) + KEEP(*(.rodata*)) + . = ALIGN({word_size}); + }} + + . = 0x{data_start:x}; + . = ALIGN({word_size}); + .data : + {{ + . = ALIGN({word_size}); + KEEP(*(.data)) + KEEP(*(.data*)) + . = ALIGN({word_size}); + }} + + . = 0x{bss_start:x}; + . = ALIGN({word_size}); + .bss : + {{ + . = ALIGN({word_size}); + KEEP(*(.bss)) + KEEP(*(.bss*)) + . = ALIGN({word_size}); + }} +}} +""" def run_cmd(cmd): @@ -35,7 +86,6 @@ def run_cmd(cmd): output : str resulting stdout capture from the subprocess """ - _LOG.debug("execute: %s", " ".join(cmd)) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (output, _) = proc.communicate() output = output.decode("utf-8") @@ -44,3 +94,227 @@ def run_cmd(cmd): msg = f'error while running command "{cmd_str}":\n{output}' raise RuntimeError(msg) return output + + +@tvm._ffi.register_func("tvm_callback_get_section_size") +def tvm_callback_get_section_size(binary_path, section_name, toolchain_prefix): + """Finds size of the section in the binary. + Assumes `size` shell command exists (typically works only on Linux machines) + + Parameters + ---------- + binary_path : str + path of the binary file + + section_name : str + name of section + + toolchain_prefix : str + prefix for binary names in target compiler toolchain + + Returns + ------- + size : integer + size of the section in bytes + """ + if not os.path.isfile(binary_path): + raise RuntimeError('no such file "{}"'.format(binary_path)) + # We use the "-A" flag here to get the ".rodata" section's size, which is + # not included by default. + size_output = run_cmd(["{}size".format(toolchain_prefix), "-A", binary_path]) + + # TODO(weberlo): Refactor this method and `*relocate_binary` so they are + # both aware of [".bss", ".sbss", ".sdata"] being relocated to ".bss". + section_mapping = { + ".text": [".text"], + ".rodata": [".rodata"], + ".data": [".data", ".sdata"], + ".bss": [".bss", ".sbss"], + } + sections_to_sum = section_mapping["." + section_name] + section_size = 0 + # Skip the first two header lines in the `size` output. + for line in size_output.split("\n")[2:]: + tokens = list(filter(lambda s: len(s) != 0, line.split(" "))) + if len(tokens) != 3: + continue + entry_name = tokens[0] + entry_size = int(tokens[1]) + for section in sections_to_sum: + if entry_name.startswith(section): + section_size += entry_size + break + + # NOTE: in the past, section_size has been wrong on x86. it may be + # inconsistent. TODO: maybe stop relying on `*size` to give us the size and + # instead read the section with `*objcopy` and count the bytes. + # NOTE(areusch): I think the problem is due to alignment ops in the linker. + # Since this is going away in the impending switch to on-device runtime, + # add a constant to hopefully absorb these relocations. + if section_size > 0: + section_size += 64 + + return section_size + + +@tvm._ffi.register_func("tvm_callback_relocate_binary") +def tvm_callback_relocate_binary( + binary_path, + word_size, + text_start, + rodata_start, + data_start, + bss_start, + stack_end, + toolchain_prefix, +): + """Relocates sections in the binary to new addresses + + Parameters + ---------- + binary_path : str + path of the binary file + + word_size : int + word size on the target machine + + text_start : int + text section address + + rodata_start : int + rodata section address + + data_start : int + data section address + + bss_start : int + bss section address + + stack_end : int + stack section end address + + toolchain_prefix : str + prefix for binary names in target compiler toolchain + + Returns + ------- + rel_bin : bytearray + the relocated binary + """ + assert text_start < rodata_start < data_start < bss_start < stack_end + stack_pointer_init = stack_end - word_size + ld_script_contents = "" + # TODO(weberlo): There should be a better way to configure this for different archs. + # TODO is this line even necessary? + if "riscv" in toolchain_prefix: + ld_script_contents += 'OUTPUT_ARCH( "riscv" )\n\n' + ld_script_contents += RELOCATION_LD_SCRIPT_TEMPLATE.format( + word_size=word_size, + text_start=text_start, + rodata_start=rodata_start, + data_start=data_start, + bss_start=bss_start, + stack_pointer_init=stack_pointer_init, + ) + + tmp_dir = utils.tempdir() + rel_obj_path = tmp_dir.relpath("relocated.obj") + rel_ld_script_path = tmp_dir.relpath("relocate.lds") + with open(rel_ld_script_path, "w") as f: + f.write(ld_script_contents) + run_cmd( + ["{}ld".format(toolchain_prefix), binary_path, "-T", rel_ld_script_path, "-o", rel_obj_path] + ) + + with open(rel_obj_path, "rb") as f: + rel_bin = bytearray(f.read()) + + gdb_init_dir = os.environ.get("MICRO_GDB_INIT_DIR") + if gdb_init_dir is not None: + gdb_init_path = f"{gdb_init_dir}/.gdbinit" + with open(gdb_init_path, "r") as f: + gdbinit_contents = f.read().split("\n") + new_contents = [] + for line in gdbinit_contents: + new_contents.append(line) + if line.startswith("target"): + new_contents.append(f"add-symbol-file {rel_obj_path}") + with open(gdb_init_path, "w") as f: + f.write("\n".join(new_contents)) + + return rel_bin + + +@tvm._ffi.register_func("tvm_callback_read_binary_section") +def tvm_callback_read_binary_section(binary, section, toolchain_prefix): + """Returns the contents of the specified section in the binary byte array + + Parameters + ---------- + binary : bytearray + contents of the binary + + section : str + type of section + + toolchain_prefix : str + prefix for binary names in target compiler toolchain + + Returns + ------- + section_bin : bytearray + contents of the read section + """ + tmp_dir = utils.tempdir() + tmp_bin = tmp_dir.relpath("temp.bin") + tmp_section = tmp_dir.relpath("tmp_section.bin") + with open(tmp_bin, "wb") as out_file: + out_file.write(bytes(binary)) + run_cmd( + [ + "{}objcopy".format(toolchain_prefix), + "--dump-section", + ".{}={}".format(section, tmp_section), + tmp_bin, + ] + ) + if os.path.isfile(tmp_section): + # Get section content if it exists. + with open(tmp_section, "rb") as f: + section_bin = bytearray(f.read()) + else: + # Return empty bytearray if the section does not exist. + section_bin = bytearray("", "utf-8") + return section_bin + + +@tvm._ffi.register_func("tvm_callback_get_symbol_map") +def tvm_callback_get_symbol_map(binary, toolchain_prefix): + """Obtains a map of symbols to addresses in the passed binary + + Parameters + ---------- + binary : bytearray + contents of the binary + + toolchain_prefix : str + prefix for binary names in target compiler toolchain + + Returns + ------- + map_str : str + map of defined symbols to addresses, encoded as a series of + alternating newline-separated keys and values + """ + tmp_dir = utils.tempdir() + tmp_obj = tmp_dir.relpath("tmp_obj.bin") + with open(tmp_obj, "wb") as out_file: + out_file.write(bytes(binary)) + nm_output = run_cmd(["{}nm".format(toolchain_prefix), "-C", "--defined-only", tmp_obj]) + nm_output = nm_output.splitlines() + map_str = "" + for line in nm_output: + line = line.split() + map_str += line[2] + "\n" + map_str += line[0] + "\n" + return map_str diff --git a/tests/python/contrib/test_binutils.py b/tests/python/contrib/test_binutils.py new file mode 100644 index 000000000000..f0aa2d157aed --- /dev/null +++ b/tests/python/contrib/test_binutils.py @@ -0,0 +1,167 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Test various utilities for interaction with compiled binaries. + +Specifically, we test the following capabilities: + - querying the size of a binary section + - relocating sections within a binary to new addresses + - reading the contents of a binary section + - querying the address of a symbol in the binary +""" + +import tvm +from tvm import te +import subprocess +from tvm.contrib import utils +from tvm.contrib import cc +from tvm.contrib.binutils import * + +TOOLCHAIN_PREFIX = "" + + +def make_binary(): + prog = "int a = 7; \ + int main() { \ + int b = 5; \ + return 0; \ + }" + tmp_dir = utils.tempdir() + tmp_source = tmp_dir.relpath("source.c") + tmp_obj = tmp_dir.relpath("obj.obj") + with open(tmp_source, "w") as f: + f.write(prog) + cc.create_executable(tmp_obj, tmp_source, [], cc="{}gcc".format(TOOLCHAIN_PREFIX)) + prog_bin = bytearray(open(tmp_obj, "rb").read()) + return prog_bin + + +def test_tvm_callback_get_section_size(binary=None): + if binary is None: + binary = make_binary() + tmp_dir = utils.tempdir() + tmp_bin = tmp_dir.relpath("obj.bin") + with open(tmp_bin, "wb") as f: + f.write(binary) + + def verify(): + print( + "Text section size: %d" + % tvm_callback_get_section_size(tmp_bin, "text", TOOLCHAIN_PREFIX) + ) + print( + "Data section size: %d" + % tvm_callback_get_section_size(tmp_bin, "data", TOOLCHAIN_PREFIX) + ) + print( + "Bss section size: %d" % tvm_callback_get_section_size(tmp_bin, "bss", TOOLCHAIN_PREFIX) + ) + print() + + verify() + + +def test_tvm_callback_relocate_binary(): + binary = make_binary() + tmp_dir = utils.tempdir() + tmp_bin = tmp_dir.relpath("obj.bin") + with open(tmp_bin, "wb") as f: + f.write(binary) + + def verify(): + word_size = 8 + text_loc = 0x0 + rodata_loc = 0x10000 + data_loc = 0x20000 + bss_loc = 0x30000 + stack_end = 0x50000 + rel_bin = tvm_callback_relocate_binary( + tmp_bin, word_size, text_loc, rodata_loc, data_loc, bss_loc, stack_end, TOOLCHAIN_PREFIX + ) + print("Relocated binary section sizes") + test_tvm_callback_get_section_size(binary=rel_bin) + relf = tmp_dir.relpath("rel.bin") + with open(relf, "wb") as f: + f.write(rel_bin) + nm_proc = subprocess.Popen( + ["nm", "-C", "--defined-only", relf], stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ) + (out, _) = nm_proc.communicate() + symbol_entries = out.decode("utf-8").split("\n") + for entry in symbol_entries: + if len(entry) == 0: + continue + sym_loc, section, sym_name = entry.split(" ") + sym_loc = int(sym_loc, 16) + if section == "T": # text + assert sym_loc >= text_loc and sym_loc < data_loc + elif section == "D": # data + assert sym_loc >= data_loc and sym_loc < bss_loc + elif section == "B": # bss + assert sym_loc >= bss_loc + + verify() + + +def test_tvm_callback_read_binary_section(): + binary = make_binary() + + def verify(): + text_bin = tvm_callback_read_binary_section(binary, "text", TOOLCHAIN_PREFIX) + data_bin = tvm_callback_read_binary_section(binary, "data", TOOLCHAIN_PREFIX) + bss_bin = tvm_callback_read_binary_section(binary, "bss", TOOLCHAIN_PREFIX) + print("Read text section part of binary? %r" % (text_bin in binary)) + print("Read data section part of binary? %r" % (data_bin in binary)) + print("Read bss section part of binary? %r" % (bss_bin in binary)) + print() + + verify() + + +def test_tvm_callback_get_symbol_map(): + binary = make_binary() + tmp_dir = utils.tempdir() + tmp_bin = tmp_dir.relpath("obj.bin") + with open(tmp_bin, "wb") as f: + f.write(binary) + + def verify(): + word_size = 8 + text_loc = 0x0 + rodata_loc = 0x10000 + data_loc = 0x20000 + bss_loc = 0x30000 + stack_end = 0x50000 + rel_bin = tvm_callback_relocate_binary( + tmp_bin, word_size, text_loc, rodata_loc, data_loc, bss_loc, stack_end, TOOLCHAIN_PREFIX + ) + symbol_map = tvm_callback_get_symbol_map(rel_bin, TOOLCHAIN_PREFIX) + symbols = set() + for i, line in enumerate(symbol_map.split("\n")): + # Every other line is the value the symbol maps to. + if i % 2 == 0: + symbols.add(line) + assert "a" in symbols + assert "main" in symbols + + verify() + + +if __name__ == "__main__": + test_tvm_callback_get_section_size() + test_tvm_callback_relocate_binary() + test_tvm_callback_read_binary_section() + test_tvm_callback_get_symbol_map() From 148bdfc6f4426800ce9c3bb9e913be92502f7c11 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Sun, 22 Nov 2020 12:15:57 -0800 Subject: [PATCH 44/60] templatize LLVM param codegen --- src/target/llvm/codegen_params.cc | 130 +++++++++++++++++------------- 1 file changed, 75 insertions(+), 55 deletions(-) diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index 8f92e4f19b3a..c27a97d2f611 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -37,6 +37,40 @@ class DLManagedTensorDeleter { }; } // namespace +template +struct LLVMConstantGetter { + static llvm::Constant* getElement(llvm::Type* ty, T t); +}; + +template +struct LLVMConstantGetter::value && std::is_signed::value)>> { + static llvm::Constant* getElement(llvm::Type* ty, T t) { + return llvm::ConstantInt::getSigned(ty, t); + } +}; + +template +struct LLVMConstantGetter::value && !std::is_signed::value)>> { + static llvm::Constant* getElement(llvm::Type* ty, T t) { + return llvm::ConstantInt::get(ty, t); + } +}; + +template +struct LLVMConstantGetter::value>> { + static llvm::Constant* getElement(llvm::Type* ty, T t) { + return llvm::ConstantFP::get(ty, t); + } +}; + +template ::value>> +void BuildLLVMVector(llvm::Type* element_type, void* tensor_data, size_t num_elements, std::vector* elements) { + for (size_t i = 0; i < num_elements; i++) { + auto llvm_element = LLVMConstantGetter::getElement(element_type, static_cast(tensor_data)[i]); + elements->emplace_back(llvm_element); + } +} + llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime::NDArray arr) { llvm::Type* element_type = nullptr; @@ -61,28 +95,22 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: << arr_type.bits() << "-bit array"; element_type = llvm::Type::getIntNTy(*ctx, arr_type.bits()); - if (arr_type.bits() == 8) { - int8_t* data_buf = static_cast(tensor->dl_tensor.data); - for (int i = 0; i < num_elements; i++) { - elements.emplace_back(llvm::ConstantInt::getSigned(element_type, data_buf[i])); - } - } else if (arr_type.bits() == 16) { - for (int i = 0; i < num_elements; i++) { - elements.emplace_back(llvm::ConstantInt::getSigned( - element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); - } - } else if (arr_type.bits() == 32) { - for (int i = 0; i < num_elements; i++) { - elements.emplace_back(llvm::ConstantInt::getSigned( - element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); - } - } else if (arr_type.bits() == 64) { - for (int i = 0; i < num_elements; i++) { - elements.emplace_back(llvm::ConstantInt::getSigned( - element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); - } - } else { - CHECK(false) << "should not get here"; + switch (arr_type.bits()) { + case 8: + BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + break; + case 16: + BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + break; + case 32: + BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + break; + case 64: + BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + break; + default: + ICHECK(false) << "should not get here"; + break; } break; @@ -93,47 +121,39 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: << arr_type.bits() << "-bit array"; element_type = llvm::Type::getIntNTy(*ctx, arr_type.bits()); - if (arr_type.bits() == 8) { - for (int i = 0; i < num_elements; i++) { - elements.emplace_back(llvm::ConstantInt::get( - element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); - } - } else if (arr_type.bits() == 16) { - for (int i = 0; i < num_elements; i++) { - elements.emplace_back(llvm::ConstantInt::get( - element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); - } - } else if (arr_type.bits() == 32) { - for (int i = 0; i < num_elements; i++) { - elements.emplace_back(llvm::ConstantInt::get( - element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); - } - } else if (arr_type.bits() == 64) { - for (int i = 0; i < num_elements; i++) { - elements.emplace_back(llvm::ConstantInt::get( - element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); - } - } else { - CHECK(false) << "should not get here"; + switch (arr_type.bits()) { + case 8: + BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + break; + case 16: + BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + break; + case 32: + BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + break; + case 64: + BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + break; + default: + ICHECK(false) << "should not get here"; + break; } break; case runtime::DataType::TypeCode::kFloat: - if (arr_type.bits() == 32) { + switch (arr_type.bits()) { + case 32: element_type = llvm::Type::getFloatTy(*ctx); - for (int i = 0; i < num_elements; i++) { - elements.emplace_back(llvm::ConstantFP::get( - element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); - } - } else if (arr_type.bits() == 64) { + BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + break; + case 64: element_type = llvm::Type::getDoubleTy(*ctx); - for (int i = 0; i < num_elements; i++) { - elements.emplace_back(llvm::ConstantFP::get( - element_type, reinterpret_cast(tensor->dl_tensor.data)[i])); - } - } else { + BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + break; + default: CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " << arr_type.bits() << "-bit array"; + break; } break; From 38a73ea165e100b7d4fb03a34fb9884b2ae70c10 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Sun, 22 Nov 2020 13:17:45 -0800 Subject: [PATCH 45/60] address tqchen comments --- include/tvm/tir/function.h | 13 ++- pyproject.toml | 92 ++++++++++++++++++++++ src/relay/backend/graph_runtime_codegen.cc | 10 ++- src/runtime/graph/graph_runtime.cc | 20 +++-- src/runtime/graph/graph_runtime.h | 13 ++- src/runtime/rpc/rpc_module.cc | 21 +++-- src/target/llvm/codegen_llvm.cc | 52 +++++------- src/target/llvm/codegen_params.cc | 23 +++--- 8 files changed, 174 insertions(+), 70 deletions(-) diff --git a/include/tvm/tir/function.h b/include/tvm/tir/function.h index a22552ea190c..97ee7f7211d4 100644 --- a/include/tvm/tir/function.h +++ b/include/tvm/tir/function.h @@ -151,6 +151,14 @@ class PrimFunc : public BaseFunc { TVM_DEFINE_OBJECT_REF_COW_METHOD(PrimFuncNode); }; +/*! + * \brief Describes one parameter that should be linked into the generated module. + * + * When parameters are to be linked in with generated code (i.e. on target_host-compatible + * backends), Relay attaches instances of this object to a global TIR function. Code-generators + * use the information contained in this node to include the parameter data in the generated + * module. + */ class LinkedParamNode : public Object { public: /*! \brief Unique numeric identifier used by runtimes to lookup this parameter. */ @@ -168,9 +176,12 @@ class LinkedParamNode : public Object { TVM_DECLARE_FINAL_OBJECT_INFO(LinkedParamNode, Object); }; +/*! + * \brief Managed reference to LinkedParamNode. + */ class LinkedParam : public ObjectRef { public: - LinkedParam(int64_t id, ::tvm::runtime::NDArray param); + TVM_DLL LinkedParam(int64_t id, ::tvm::runtime::NDArray param); TVM_DEFINE_OBJECT_REF_METHODS(LinkedParam, ObjectRef, LinkedParamNode); TVM_DEFINE_OBJECT_REF_COW_METHOD(LinkedParamNode); diff --git a/pyproject.toml b/pyproject.toml index 5cca711ddbe6..d273b25eb3cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,3 +46,95 @@ exclude = ''' )/ ) ''' +[tool.poetry] +name = "incubator-tvm" +version = "0.1.0" +description = "" +authors = ["Your Name "] +packages = [ + { include = "tvm", from = "../../../../python" }, +] + +[tool.poetry.dependencies] +attrs = "^19" +decorator = "^4.4" +numpy = "~1.19" +psutil = "^5" +scipy = "^1.4" +python = "^3.6" +tornado = "^6" +typed_ast = "^1.4" + +# AutoTVM +xgboost = {version = "^1.1", optional = true} + +############# +# Importers # +############# + +# NOTE: Caffe frontend dependency is from torch package. + +# CoreML +coremltools = {version = "^3.3", optional = true} + +# Darknet +opencv-python = {version = "^4.2", optional = true} +cffi = {version = "^1.14", optional = true} + +# NOTE: Keras provided by tensorflow package. +# If TF version conflict, maybe try: keras = "2.3.1" + +# MXNet frontend +mxnet = {version = "^1.6.0", optional = true} + +# ONNX frontend +onnx = {version = "1.6.0", optional = true} +onnxruntime = {version = "1.0.0", optional = true} + +# Pytorch (also used by ONNX) +torch = {version = "1.4.0", optional = true} +torchvision = {version = "0.5.0", optional = true} +# NOTE: torch depends on a number of other packages, but unhelpfully, does not expose that in the +# wheel!!! +future = {version = "*", optional = true} + +# Tensorflow frontend +tensorflow = {version = "^2.1", optional = true} +tensorflow-estimator = {version = "^2.1", optional = true} + +# TFLite frontend +tflite = {version = "2.1.0", optional = true} +wheel = "*" + + +[tool.poetry.extras] +xgboost = ["xgboost"] +importer-caffe2 = ["torch"] +importer-coreml = ["coremltools"] +importer-darknet = ["opencv-python"] +importer-keras = ["tensorflow", "tensorflow-estimator"] +importer-onnx = ["onnx", "onnxruntime", "torch", "torchvision", "future"] +importer-pytorch = ["torch", "torchvision", "future"] +importer-tensorflow = ["tensorflow", "tensorflow-estimator"] +importer-tflite = ["tlfite", "tensorflow", "tensorflow-estimator"] + +[tool.poetry.dev-dependencies] +autodocsumm = "^0.1" +black = "^19.10b0" +sphinx = "^3.0" +sphinx-gallery = "^0.4" +sphinx-rtd-theme = "^0.4" +matplotlib = "^3.2" +Image = "^1.5" +recommonmark = "^0.6" +pillow = "< 7" +pyformat = "^0.7" +pylint = "^2.4" +pytest = "^5.4" + +[build-system] +requires = ["poetry>=0.12"] +build-backend = "poetry.masonry.api" + +[tool.autopep8] +max_line_length = 100 diff --git a/src/relay/backend/graph_runtime_codegen.cc b/src/relay/backend/graph_runtime_codegen.cc index f35f144181c6..93439ba04f2d 100644 --- a/src/relay/backend/graph_runtime_codegen.cc +++ b/src/relay/backend/graph_runtime_codegen.cc @@ -595,14 +595,16 @@ class GraphRuntimeCodegenModule : public runtime::ModuleNode { } else if (name == "get_param_by_name") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { String key = args[0]; - ICHECK_GT(this->output_.params.count(key), 0); - *rv = this->output_.params[key].second; + auto it = this->output_.params.find(key); + CHECK(it != this->output_.params.end()) << "no such parameter " << key; + *rv = (*it).second.second; }); } else if (name == "get_param_id") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { String key = args[0]; - ICHECK_GT(this->output_.params.count(key), 0); - *rv = this->output_.params[key].first; + auto it = this->output_.params.find(key); + CHECK(it != this->output_.params.end()) << "no such parameter " << key; + *rv = (*it).second.first; }); } else if (name == "get_irmodule") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { diff --git a/src/runtime/graph/graph_runtime.cc b/src/runtime/graph/graph_runtime.cc index 293de2276621..6d08019a0275 100644 --- a/src/runtime/graph/graph_runtime.cc +++ b/src/runtime/graph/graph_runtime.cc @@ -75,7 +75,9 @@ void GraphRuntime::Init(const std::string& graph_json, tvm::runtime::Module modu ctxs_ = ctxs; lookup_linked_param_ = lookup_linked_param_func; if (lookup_linked_param_ == nullptr) { - lookup_linked_param_ = PackedFunc(&GraphRuntime::DefaultLookupLinkedParam); + lookup_linked_param_ = PackedFunc([this](TVMArgs args, TVMRetValue* rv) { + this->DefaultLookupLinkedParam(args, rv); + }); } this->SetupStorage(); this->SetupOpExecs(); @@ -249,9 +251,10 @@ void GraphRuntime::ShareParams(const GraphRuntime& other, dmlc::Stream* strm) { this->SetupOpExecs(); } -void GraphRuntime::PreAllocatedDLTensorDeleter(DLManagedTensor* tensor) { - // ctx is the DLTensor which needs to get deleted. The data member points to global const memory. - delete reinterpret_cast(tensor); +void GraphRuntime::LinkedNDArrayDeleter(Object* container) { + // container is the NDArray::Container which needs to get deleted. + // The data member points to global const memory, so it does not need deleting. + delete reinterpret_cast(container); } void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) { @@ -261,14 +264,16 @@ void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) { TVMContext ctx = args[3]; // Get pre-linked parameter lookup function, if it was generated. When pf == nullptr, no linked // params are present. - tvm::runtime::PackedFunc pf = + if (!module_lookup_linked_param_valid_) { + module_lookup_linked_param_ = mod.GetFunction(::tvm::runtime::symbol::tvm_lookup_linked_param, true); - if (pf == nullptr) { + } + if (module_lookup_linked_param_ == nullptr) { *rv = nullptr; return; } - TVMRetValue opaque_handle = pf(storage_id); + TVMRetValue opaque_handle = module_lookup_linked_param_(storage_id); if (opaque_handle.type_code() == kTVMNullptr) { *rv = nullptr; return; @@ -279,6 +284,7 @@ void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) { std::unique_ptr container{new NDArray::Container( static_cast(opaque_handle), shape_vec, template_tensor->dtype, ctx)}; + container->SetDeleter(GraphRuntime::LinkedNDArrayDeleter); *rv = NDArray(GetObjectPtr(container.release())); } diff --git a/src/runtime/graph/graph_runtime.h b/src/runtime/graph/graph_runtime.h index f1894c4830d0..627911883dfb 100644 --- a/src/runtime/graph/graph_runtime.h +++ b/src/runtime/graph/graph_runtime.h @@ -370,9 +370,9 @@ class TVM_DLL GraphRuntime : public ModuleNode { ICHECK_EQ(bitmask, 1 | 2 | 4 | 8 | 16) << "invalid format"; } /*! \brief PackedFunc to lookup a linked paramter from a local Module. */ - static void DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv); - /*! \brief Delete pre-allocated DLTensor. */ - static void PreAllocatedDLTensorDeleter(DLManagedTensor* tensor); + void DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv); + /*! \brief Delete NDArray::Container with linked (i.e. static) data. */ + static void LinkedNDArrayDeleter(Object* container); /*! \brief Setup the temporal storage */ void SetupStorage(); /*! \brief Setup the executors. */ @@ -420,6 +420,13 @@ class TVM_DLL GraphRuntime : public ModuleNode { std::vector> op_execs_; /*! \brief Linked parameter lookup function. */ PackedFunc lookup_linked_param_; + /*! \brief Module's _lookup_linked_param function, used by DefaultLookupLinkedParam. */ + PackedFunc module_lookup_linked_param_; + /*! + * \brief True when module_lookup_linked_param_ is valid. + * When the module does not include linked parmeters, module_lookup_linked_param_ will be nullptr. + */ + bool module_lookup_linked_param_valid_; }; std::vector GetAllContext(const TVMArgs& args, int ctx_start_arg); diff --git a/src/runtime/rpc/rpc_module.cc b/src/runtime/rpc/rpc_module.cc index 12510e0fac3a..cb115b4a1def 100644 --- a/src/runtime/rpc/rpc_module.cc +++ b/src/runtime/rpc/rpc_module.cc @@ -41,7 +41,9 @@ namespace runtime { static void RemoteNDArrayDeleter(Object* obj) { auto* ptr = static_cast(obj); RemoteSpace* space = static_cast(ptr->dl_tensor.data); - space->sess->FreeHandle(ptr->manager_ctx, kTVMNDArrayHandle); + if (ptr->manager_ctx != nullptr) { + space->sess->FreeHandle(ptr->manager_ctx, kTVMNDArrayHandle); + } delete space; delete ptr; } @@ -54,13 +56,11 @@ static void RemoteNDArrayDeleter(Object* obj) { * \param template_tensor An empty DLTensor whose shape and dtype fields are used to fill the newly * created array. Needed because it's difficult to pass a shape vector as a PackedFunc arg. * \param ctx Remote context used with this tensor. Must have non-zero RPCSessMask. - * \param deleter A function invoked when the local NDArray object is no longer used. If `handle` - * needs to be explicitly deleted after the NDArray is freed, this function should do that. - * \param deleter_ctx An opaque pointer passed to deleter to identify the tensor being deleted. + * \param remote_ndarray_handle The handle returned by RPC server to identify the NDArray. */ NDArray NDArrayFromRemoteOpaqueHandle(std::shared_ptr sess, void* handle, DLTensor* template_tensor, TVMContext ctx, - ADTObj::FDeleter deleter, void* deleter_ctx) { + void* remote_ndarray_handle) { ICHECK_EQ(sess->table_index(), GetRPCSessionIndex(ctx)) << "The TVMContext given does not belong to the given session"; RemoteSpace* space = new RemoteSpace(); @@ -70,8 +70,8 @@ NDArray NDArrayFromRemoteOpaqueHandle(std::shared_ptr sess, void* ha template_tensor->shape + template_tensor->ndim}; NDArray::Container* data = new NDArray::Container(static_cast(space), std::move(shape_vec), template_tensor->dtype, ctx); - data->manager_ctx = deleter_ctx; - data->SetDeleter(deleter); + data->manager_ctx = remote_ndarray_handle; + data->SetDeleter(RemoteNDArrayDeleter); return NDArray(GetObjectPtr(data)); } @@ -286,7 +286,7 @@ void RPCWrappedFunc::WrapRemoteReturnToValue(TVMArgs args, TVMRetValue* rv) cons void* nd_handle = args[2]; *rv = NDArrayFromRemoteOpaqueHandle(sess_, tensor->data, tensor, AddRPCSessionMask(tensor->ctx, sess_->table_index()), - RemoteNDArrayDeleter, nd_handle); + nd_handle); } else { ICHECK_EQ(args.size(), 2); *rv = args[1]; @@ -474,10 +474,9 @@ TVM_REGISTER_GLOBAL("rpc.SessTableIndex").set_body([](TVMArgs args, TVMRetValue* TVM_REGISTER_GLOBAL("tvm.rpc.NDArrayFromRemoteOpaqueHandle") .set_body_typed([](Module mod, void* remote_array, DLTensor* template_tensor, TVMContext ctx, - PackedFunc deleter) -> NDArray { + void* ndarray_handle) -> NDArray { return NDArrayFromRemoteOpaqueHandle( - RPCModuleGetSession(mod), remote_array, template_tensor, ctx, [](Object* context) {}, - nullptr); + RPCModuleGetSession(mod), remote_array, template_tensor, ctx, ndarray_handle); }); } // namespace runtime diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc index 11da661cceac..0fc36d96747f 100644 --- a/src/target/llvm/codegen_llvm.cc +++ b/src/target/llvm/codegen_llvm.cc @@ -205,9 +205,6 @@ void CodeGenLLVM::LinkParameters(const Map params) { // resource_handle param_types.push_back(t_void_->getPointerTo(GetGlobalAddressSpace())); - // TODO(tvm-team): - // Update the function type to respect the ret_type field of f. - // Once we allow more flexibility in the PrimFunc. llvm::FunctionType* ftype = llvm::FunctionType::get(t_int_, param_types, false); llvm::Function* function = @@ -234,25 +231,28 @@ void CodeGenLLVM::LinkParameters(const Map params) { t_int64_); llvm::BasicBlock* default_block = llvm::BasicBlock::Create(*ctx_, "default_block", function); - llvm::SwitchInst* switch_inst = builder_->CreateSwitch(sid, default_block, params.size() + 1); - - builder_->SetInsertPoint(default_block); - { - auto ret_types_array = builder_->CreateBitCast( + auto ret_types_array = builder_->CreateBitCast( #if TVM_LLVM_VERSION >= 50 - &function->arg_begin()[4], + &function->arg_begin()[4], #else - &(*(std::next(function->arg_begin(), 4))), + &(*(std::next(function->arg_begin(), 4))), #endif - llvm::ArrayType::get(t_int_, 1)->getPointerTo()); - - builder_->CreateStore(llvm::ConstantInt::get(t_int_, kTVMNullptr), - builder_->CreateGEP(ret_types_array, zero_array_index_list)); - builder_->CreateRet(ConstInt32(kTvmErrorNoError)); - } + llvm::ArrayType::get(t_int_, 1)->getPointerTo()); + auto retval_array = builder_->CreateBitCast( +#if TVM_LLVM_VERSION >= 50 + &function->arg_begin()[3], +#else + &(*std::next(function->arg_begin(), 3)), +#endif + llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)->getPointerTo()); + llvm::SwitchInst* switch_inst = builder_->CreateSwitch(sid, default_block, params.size() + 1); - llvm::raw_os_ostream os{std::cout}; + builder_->SetInsertPoint(default_block); + builder_->CreateStore(llvm::ConstantInt::get(t_int_, kTVMNullptr), + builder_->CreateInBoundsGEP(ret_types_array, zero_array_index_list)); + builder_->CreateRet(ConstInt32(kTvmErrorNoError)); + // Add data to the global section. for (auto kv : params) { auto array = NDArrayToLLVMArray(ctx_, kv.second->param); std::string symbol_name = std::string(::tvm::runtime::symbol::tvm_param_prefix) + kv.first; @@ -263,25 +263,11 @@ void CodeGenLLVM::LinkParameters(const Map params) { switch_inst->addCase( llvm::cast(llvm::ConstantInt::get(t_int64_, kv.second->id)), case_block); builder_->SetInsertPoint(case_block); - auto retval_array = builder_->CreateBitCast( -#if TVM_LLVM_VERSION >= 50 - &function->arg_begin()[3], -#else - &(*std::next(function->arg_begin(), 3)), -#endif - llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)->getPointerTo()); builder_->CreateStore( builder_->CreatePointerCast(param_symbol, t_void_->getPointerTo(GetGlobalAddressSpace())), - builder_->CreateGEP(retval_array, zero_array_index_list)); - auto ret_types_array = builder_->CreateBitCast( -#if TVM_LLVM_VERSION >= 50 - &function->arg_begin()[4], -#else - &(*std::next(function->arg_begin(), 4)), -#endif - llvm::ArrayType::get(t_int_, 1)->getPointerTo()); + builder_->CreateInBoundsGEP(retval_array, zero_array_index_list)); builder_->CreateStore(llvm::ConstantInt::get(t_int_, kTVMOpaqueHandle), - builder_->CreateGEP(ret_types_array, zero_array_index_list)); + builder_->CreateInBoundsGEP(ret_types_array, zero_array_index_list)); builder_->CreateRet(ConstInt32(0)); } } diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index c27a97d2f611..8a675efeded1 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -75,6 +75,8 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: llvm::Type* element_type = nullptr; auto arr_type = arr.DataType(); + CHECK(arr.IsContiguous()) << "CodegenParams: only support contiguous arrays"; + CHECK_EQ(arr->ctx.device_type, kDLCPU) << "CodegenParams: only support contiguous arrays"; CHECK_EQ(arr_type.lanes(), 1) << "CodegenParams: only support generating 1-lane parameters; saw " << arr_type.lanes(); @@ -84,7 +86,6 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: num_elements *= shape_elem; } - std::unique_ptr tensor(arr.ToDLPack()); std::vector elements; switch (arr_type.code()) { @@ -97,16 +98,16 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: switch (arr_type.bits()) { case 8: - BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + BuildLLVMVector(element_type, arr->data, num_elements, &elements); break; case 16: - BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + BuildLLVMVector(element_type, arr->data, num_elements, &elements); break; case 32: - BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + BuildLLVMVector(element_type, arr->data, num_elements, &elements); break; case 64: - BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + BuildLLVMVector(element_type, arr->data, num_elements, &elements); break; default: ICHECK(false) << "should not get here"; @@ -123,16 +124,16 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: switch (arr_type.bits()) { case 8: - BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + BuildLLVMVector(element_type, arr->data, num_elements, &elements); break; case 16: - BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + BuildLLVMVector(element_type, arr->data, num_elements, &elements); break; case 32: - BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + BuildLLVMVector(element_type, arr->data, num_elements, &elements); break; case 64: - BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + BuildLLVMVector(element_type, arr->data, num_elements, &elements); break; default: ICHECK(false) << "should not get here"; @@ -144,11 +145,11 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: switch (arr_type.bits()) { case 32: element_type = llvm::Type::getFloatTy(*ctx); - BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + BuildLLVMVector(element_type, arr->data, num_elements, &elements); break; case 64: element_type = llvm::Type::getDoubleTy(*ctx); - BuildLLVMVector(element_type, tensor->dl_tensor.data, num_elements, &elements); + BuildLLVMVector(element_type, arr->data, num_elements, &elements); break; default: CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " From 0fc11f1a9c362006ba4f85a17a1fdcdd778346d0 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Sun, 22 Nov 2020 13:18:34 -0800 Subject: [PATCH 46/60] git-clang-format --- src/runtime/graph/graph_runtime.cc | 7 +- src/runtime/rpc/rpc_module.cc | 4 +- src/target/llvm/codegen_llvm.cc | 14 ++-- src/target/llvm/codegen_params.cc | 104 ++++++++++++++--------------- 4 files changed, 64 insertions(+), 65 deletions(-) diff --git a/src/runtime/graph/graph_runtime.cc b/src/runtime/graph/graph_runtime.cc index 6d08019a0275..38815396b5e5 100644 --- a/src/runtime/graph/graph_runtime.cc +++ b/src/runtime/graph/graph_runtime.cc @@ -75,9 +75,8 @@ void GraphRuntime::Init(const std::string& graph_json, tvm::runtime::Module modu ctxs_ = ctxs; lookup_linked_param_ = lookup_linked_param_func; if (lookup_linked_param_ == nullptr) { - lookup_linked_param_ = PackedFunc([this](TVMArgs args, TVMRetValue* rv) { - this->DefaultLookupLinkedParam(args, rv); - }); + lookup_linked_param_ = PackedFunc( + [this](TVMArgs args, TVMRetValue* rv) { this->DefaultLookupLinkedParam(args, rv); }); } this->SetupStorage(); this->SetupOpExecs(); @@ -266,7 +265,7 @@ void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) { // params are present. if (!module_lookup_linked_param_valid_) { module_lookup_linked_param_ = - mod.GetFunction(::tvm::runtime::symbol::tvm_lookup_linked_param, true); + mod.GetFunction(::tvm::runtime::symbol::tvm_lookup_linked_param, true); } if (module_lookup_linked_param_ == nullptr) { *rv = nullptr; diff --git a/src/runtime/rpc/rpc_module.cc b/src/runtime/rpc/rpc_module.cc index cb115b4a1def..4f721e122a4c 100644 --- a/src/runtime/rpc/rpc_module.cc +++ b/src/runtime/rpc/rpc_module.cc @@ -475,8 +475,8 @@ TVM_REGISTER_GLOBAL("rpc.SessTableIndex").set_body([](TVMArgs args, TVMRetValue* TVM_REGISTER_GLOBAL("tvm.rpc.NDArrayFromRemoteOpaqueHandle") .set_body_typed([](Module mod, void* remote_array, DLTensor* template_tensor, TVMContext ctx, void* ndarray_handle) -> NDArray { - return NDArrayFromRemoteOpaqueHandle( - RPCModuleGetSession(mod), remote_array, template_tensor, ctx, ndarray_handle); + return NDArrayFromRemoteOpaqueHandle(RPCModuleGetSession(mod), remote_array, template_tensor, + ctx, ndarray_handle); }); } // namespace runtime diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc index 0fc36d96747f..d10ed311949c 100644 --- a/src/target/llvm/codegen_llvm.cc +++ b/src/target/llvm/codegen_llvm.cc @@ -233,18 +233,18 @@ void CodeGenLLVM::LinkParameters(const Map params) { llvm::BasicBlock* default_block = llvm::BasicBlock::Create(*ctx_, "default_block", function); auto ret_types_array = builder_->CreateBitCast( #if TVM_LLVM_VERSION >= 50 - &function->arg_begin()[4], + &function->arg_begin()[4], #else - &(*(std::next(function->arg_begin(), 4))), + &(*(std::next(function->arg_begin(), 4))), #endif - llvm::ArrayType::get(t_int_, 1)->getPointerTo()); - auto retval_array = builder_->CreateBitCast( + llvm::ArrayType::get(t_int_, 1)->getPointerTo()); + auto retval_array = builder_->CreateBitCast( #if TVM_LLVM_VERSION >= 50 - &function->arg_begin()[3], + &function->arg_begin()[3], #else - &(*std::next(function->arg_begin(), 3)), + &(*std::next(function->arg_begin(), 3)), #endif - llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)->getPointerTo()); + llvm::ArrayType::get(t_void_->getPointerTo(GetGlobalAddressSpace()), 1)->getPointerTo()); llvm::SwitchInst* switch_inst = builder_->CreateSwitch(sid, default_block, params.size() + 1); builder_->SetInsertPoint(default_block); diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index 8a675efeded1..9588f876abed 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -43,30 +43,30 @@ struct LLVMConstantGetter { }; template -struct LLVMConstantGetter::value && std::is_signed::value)>> { +struct LLVMConstantGetter< + T, std::enable_if_t<(std::is_integral::value && std::is_signed::value)>> { static llvm::Constant* getElement(llvm::Type* ty, T t) { return llvm::ConstantInt::getSigned(ty, t); } }; template -struct LLVMConstantGetter::value && !std::is_signed::value)>> { - static llvm::Constant* getElement(llvm::Type* ty, T t) { - return llvm::ConstantInt::get(ty, t); - } +struct LLVMConstantGetter< + T, std::enable_if_t<(std::is_integral::value && !std::is_signed::value)>> { + static llvm::Constant* getElement(llvm::Type* ty, T t) { return llvm::ConstantInt::get(ty, t); } }; template struct LLVMConstantGetter::value>> { - static llvm::Constant* getElement(llvm::Type* ty, T t) { - return llvm::ConstantFP::get(ty, t); - } + static llvm::Constant* getElement(llvm::Type* ty, T t) { return llvm::ConstantFP::get(ty, t); } }; template ::value>> -void BuildLLVMVector(llvm::Type* element_type, void* tensor_data, size_t num_elements, std::vector* elements) { +void BuildLLVMVector(llvm::Type* element_type, void* tensor_data, size_t num_elements, + std::vector* elements) { for (size_t i = 0; i < num_elements; i++) { - auto llvm_element = LLVMConstantGetter::getElement(element_type, static_cast(tensor_data)[i]); + auto llvm_element = + LLVMConstantGetter::getElement(element_type, static_cast(tensor_data)[i]); elements->emplace_back(llvm_element); } } @@ -97,21 +97,21 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: element_type = llvm::Type::getIntNTy(*ctx, arr_type.bits()); switch (arr_type.bits()) { - case 8: - BuildLLVMVector(element_type, arr->data, num_elements, &elements); - break; - case 16: - BuildLLVMVector(element_type, arr->data, num_elements, &elements); - break; - case 32: - BuildLLVMVector(element_type, arr->data, num_elements, &elements); - break; - case 64: - BuildLLVMVector(element_type, arr->data, num_elements, &elements); - break; - default: - ICHECK(false) << "should not get here"; - break; + case 8: + BuildLLVMVector(element_type, arr->data, num_elements, &elements); + break; + case 16: + BuildLLVMVector(element_type, arr->data, num_elements, &elements); + break; + case 32: + BuildLLVMVector(element_type, arr->data, num_elements, &elements); + break; + case 64: + BuildLLVMVector(element_type, arr->data, num_elements, &elements); + break; + default: + ICHECK(false) << "should not get here"; + break; } break; @@ -123,38 +123,38 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: element_type = llvm::Type::getIntNTy(*ctx, arr_type.bits()); switch (arr_type.bits()) { - case 8: - BuildLLVMVector(element_type, arr->data, num_elements, &elements); - break; - case 16: - BuildLLVMVector(element_type, arr->data, num_elements, &elements); - break; - case 32: - BuildLLVMVector(element_type, arr->data, num_elements, &elements); - break; - case 64: - BuildLLVMVector(element_type, arr->data, num_elements, &elements); - break; - default: - ICHECK(false) << "should not get here"; - break; + case 8: + BuildLLVMVector(element_type, arr->data, num_elements, &elements); + break; + case 16: + BuildLLVMVector(element_type, arr->data, num_elements, &elements); + break; + case 32: + BuildLLVMVector(element_type, arr->data, num_elements, &elements); + break; + case 64: + BuildLLVMVector(element_type, arr->data, num_elements, &elements); + break; + default: + ICHECK(false) << "should not get here"; + break; } break; case runtime::DataType::TypeCode::kFloat: switch (arr_type.bits()) { - case 32: - element_type = llvm::Type::getFloatTy(*ctx); - BuildLLVMVector(element_type, arr->data, num_elements, &elements); - break; - case 64: - element_type = llvm::Type::getDoubleTy(*ctx); - BuildLLVMVector(element_type, arr->data, num_elements, &elements); - break; - default: - CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " - << arr_type.bits() << "-bit array"; - break; + case 32: + element_type = llvm::Type::getFloatTy(*ctx); + BuildLLVMVector(element_type, arr->data, num_elements, &elements); + break; + case 64: + element_type = llvm::Type::getDoubleTy(*ctx); + BuildLLVMVector(element_type, arr->data, num_elements, &elements); + break; + default: + CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " + << arr_type.bits() << "-bit array"; + break; } break; From 5bf324695af48868f162d159853465f26b740419 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Sun, 22 Nov 2020 18:30:01 -0800 Subject: [PATCH 47/60] actually use storage_id, not graph node id, for param id --- src/relay/backend/graph_runtime_codegen.cc | 2 +- tests/python/unittest/test_link_params.py | 87 +++++++++++----------- 2 files changed, 46 insertions(+), 43 deletions(-) diff --git a/src/relay/backend/graph_runtime_codegen.cc b/src/relay/backend/graph_runtime_codegen.cc index 93439ba04f2d..7ed150495104 100644 --- a/src/relay/backend/graph_runtime_codegen.cc +++ b/src/relay/backend/graph_runtime_codegen.cc @@ -320,7 +320,7 @@ class GraphRuntimeCodegen : public backend::MemoizedExprTranslatorvalue; params_[name] = op->data; return to_return; } diff --git a/tests/python/unittest/test_link_params.py b/tests/python/unittest/test_link_params.py index 4b6692d79d10..65316aad866d 100644 --- a/tests/python/unittest/test_link_params.py +++ b/tests/python/unittest/test_link_params.py @@ -31,7 +31,10 @@ from tvm.contrib import utils -TEST_SHAPE = (3, 4, 5) +INPUT_SHAPE = (1, 3, 16, 16) + + +KERNEL_SHAPE = (3, 3, 3, 3) # The data types that are linkable. @@ -55,23 +58,22 @@ def dtype_info(dtype): RANDOM_TENSOR_START = None -def _make_random_tensor(dtype): - """Create a random test tensor of shape TEST_SHAPE and the given dtype.""" +def _make_random_tensor(dtype, shape): + """Create a random test tensor with given shape and dtype.""" global RAND_SEED if RANDOM_TENSOR_START is not None: to_return = np.arange( - RANDOM_TENSOR_START, RANDOM_TENSOR_START + np.prod(TEST_SHAPE), dtype=dtype - ).reshape(TEST_SHAPE) - RAND_SEED += np.prod(TEST_SHAPE) + RANDOM_TENSOR_START, RANDOM_TENSOR_START + np.prod(shape), dtype=dtype + ).reshape(shape) + RAND_SEED += np.prod(shape) return to_return dinfo = dtype_info(dtype) if "int" in dtype: - return np.random.randint(dinfo.min, dinfo.max, TEST_SHAPE, dtype=dtype) + return np.random.randint(dinfo.min, dinfo.max, shape, dtype=dtype) else: - to_return = np.random.uniform(0, dinfo.max, TEST_SHAPE) - # to_return = dinfo.min + (np.random.random(TEST_SHAPE) * dinfo.max) - np.reshape(to_return, np.prod(TEST_SHAPE))[::2] *= -1 + to_return = np.random.uniform(0, dinfo.max, shape).astype(dtype) + np.reshape(to_return, np.prod(shape))[::2] *= -1 return to_return @@ -94,10 +96,11 @@ def _lookup_sid(graph, name): num_outputs_seen = 0 for i, n in enumerate(graph["nodes"]): if n["name"] == name: + print('sid', name, graph["attrs"]["storage_id"][1], num_outputs_seen) return graph["attrs"]["storage_id"][1][num_outputs_seen] else: if "attrs" in n and "num_outputs" in n["attrs"]: - num_outputs_seen += n["attrs"]["num_outputs"] + num_outputs_seen += int(n["attrs"]["num_outputs"]) else: num_outputs_seen += 1 @@ -122,15 +125,14 @@ def _verify_linked_param(dtype, lib, mod, graph, name): # NOTE: query_imports=True because when loading a module from disk (i.e. for C backend), # a GraphRuntimeFactory module is created instead of the module itself. param_ptr = mod.get_function("_lookup_linked_param", True)(sid) - print("verify", param_ptr) - arr_data = (_get_ctypes_dtype(dtype) * np.prod(TEST_SHAPE)).from_address(param_ptr.value) gen_param = lib.params[name] - print("gen param dtype", gen_param.dtype) + arr_data = (_get_ctypes_dtype(dtype) * np.prod(gen_param.shape)).from_address(param_ptr.value) arr = np.ndarray(shape=gen_param.shape, dtype=gen_param.dtype, buffer=arr_data, order="C") if "int" in gen_param.dtype: np.testing.assert_equal(gen_param.asnumpy(), arr) else: np.testing.assert_allclose(gen_param.asnumpy(), arr) + return dtype == gen_param.dtype def _make_mod_and_params(dtype): @@ -139,27 +141,31 @@ def _make_mod_and_params(dtype): param_init = {} def _add_decl(name, dtype): - param_decls[name] = f"%{name} : Tensor[{TEST_SHAPE}, {dtype}]" - param_init[name] = _make_random_tensor(dtype) + param_decls[name] = f"%{name} : Tensor[{KERNEL_SHAPE}, {dtype}]" + param_init[name] = _make_random_tensor(dtype, KERNEL_SHAPE) + # Add several parameters so that the number of parameters _add_decl(f"{dtype}_a", dtype) _add_decl(f"{dtype}_b", dtype) mod_lines = [ '#[version = "0.0.5"]', - f"def @main(%rand_input : Tensor[{TEST_SHAPE}, {dtype}], { ', '.join(param_decls.values()) } ) {{", + f"def @main(%rand_input : Tensor[{INPUT_SHAPE}, {dtype}], { ', '.join(param_decls.values()) } ) {{", + # This program ensures that GraphPlanMemory alternates between the same two storage IDs for a + # while. In doing this, it ensures that param %{dtype}_b will be placed into the graph at an + # index unequal to its storage_id. This ensures that GraphRuntimeCodegen encodes the storage_id + # and not the parameter index into the graph. + (f' %0 = nn.conv2d(%rand_input, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", ' + f'kernel_size=[3, 3], out_dtype="{dtype}");'), + (f' %1 = nn.conv2d(%0, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", ' + f'kernel_size=[3, 3], out_dtype="{dtype}");'), + (f' %2 = nn.conv2d(%1, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", ' + f'kernel_size=[3, 3], out_dtype="{dtype}");'), + (f' %3 = nn.conv2d(%2, %{dtype}_b, data_layout="NCHW", kernel_layout="OIHW", ' + f'kernel_size=[3, 3], out_dtype="{dtype}");'), + " %3", + "}", ] - if "int" in dtype: - mod_lines.append( - # f' %0 = bitwise_xor(%rand_input, bitwise_xor(%{dtype}_a, %{dtype}_b));') - f" %0 = add(%rand_input, %{dtype}_a);" - ) - else: - mod_lines.append( - f' %0 = cast(add(%rand_input, cast(add(%{dtype}_a, %{dtype}_b), dtype="{dtype}")), dtype="{dtype}");' - ) - # f' %0 = cast(add(%rand_input, %{dtype}_a), dtype="{dtype}");') - mod_lines.extend([" %0", "}"]) mod = tvm.parser.fromtext("\n".join(mod_lines)) return mod, param_init @@ -169,16 +175,17 @@ def _add_decl(name, dtype): def test_llvm_link_params(): for dtype in LINKABLE_DTYPES: mod, param_init = _make_mod_and_params(dtype) - rand_input = _make_random_tensor(dtype) + rand_input = _make_random_tensor(dtype, INPUT_SHAPE) main_func = mod["main"] target = "llvm --runtime=c --system-lib --link-params" with tvm.transform.PassContext(opt_level=3): lib = tvm.relay.build(mod, target, params=param_init) - assert set(lib.params.keys()) == {"p0"} # NOTE: op folded + assert set(lib.params.keys()) == {"p0", "p1"} # NOTE: op folded + print('graph', lib.graph_json) graph = json.loads(lib.graph_json) for p in lib.params: - _verify_linked_param(dtype, lib, lib.lib, graph, p) + _verify_linked_param(dtype, lib, lib.lib, graph, p) or found_one # Wrap in function to explicitly deallocate the runtime. def _run_linked(lib): @@ -244,18 +251,18 @@ def test_c_link_params(): temp_dir = utils.tempdir() for dtype in LINKABLE_DTYPES: mod, param_init = _make_mod_and_params(dtype) - rand_input = _make_random_tensor(dtype) + rand_input = _make_random_tensor(dtype, INPUT_SHAPE) main_func = mod["main"] target = "c --link-params" with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): lib = tvm.relay.build(mod, target, params=param_init) - assert set(lib.params.keys()) == {"p0"} # NOTE: op folded + assert set(lib.params.keys()) == {"p0", "p1"} # NOTE: op folded src = lib.lib.get_source() lib.lib.save("test.c", "cc") c_dtype = _get_c_datatype(dtype) src_lines = src.split("\n") - param = lib.params["p0"].asnumpy().reshape(np.prod(TEST_SHAPE)) + param = lib.params["p0"].asnumpy().reshape(np.prod(KERNEL_SHAPE)) param_def = f"static const {c_dtype} __tvm_param__p0[{np.prod(param.shape)}] = {{" for i, line in enumerate(src_lines): if line == param_def: @@ -269,7 +276,6 @@ def test_c_link_params(): if dtype.startswith("int"): width += 1 # Account for sign - print("check printing of", param) while "};" not in src_lines[i]: for match in HEX_NUM_RE.finditer(src_lines[i]): assert match.group() == _format_c_value(dtype, width, param[cursor]), ( @@ -296,7 +302,6 @@ def test_c_link_params(): def _run_linked(lib_mod): graph_rt = tvm.contrib.graph_runtime.GraphModule(lib_mod["default"](tvm.cpu(0))) graph_rt.set_input("rand_input", rand_input) # NOTE: params not required. - print("linked", graph_rt.get_input("p0")) graph_rt.run() return graph_rt.get_output(0) @@ -312,8 +317,6 @@ def _run_linked(lib_mod): lib.export_library(lib_path) lib_mod = tvm.runtime.load_module(lib_path) - print("unlinked", params) - def _run_unlinked(lib_mod): graph_rt = tvm.contrib.graph_runtime.GraphModule(lib_mod["default"](tvm.cpu(0))) graph_rt.set_input("rand_input", rand_input, **params) @@ -334,12 +337,12 @@ def test_crt_link_params(): for dtype in LINKABLE_DTYPES: mod, param_init = _make_mod_and_params(dtype) - rand_input = _make_random_tensor(dtype) + rand_input = _make_random_tensor(dtype, INPUT_SHAPE) main_func = mod["main"] target = "c -mcpu=native --system-lib --runtime=c --link-params" with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): graph_json, lib, params = tvm.relay.build(mod, target, params=param_init) - assert set(params.keys()) == {"p0"} # NOTE: op folded + assert set(params.keys()) == {"p0", "p1"} # NOTE: op folded workspace = tvm.micro.Workspace() compiler = tvm.micro.DefaultCompiler(target=target) @@ -383,9 +386,9 @@ def _run_unlinked(lib): graph_rt = tvm.contrib.graph_runtime.create(graph_json, mod, tvm.cpu(0)) graph_rt.set_input("rand_input", rand_input, **lowered_params) graph_rt.run() - return graph_rt.get_output(0) + return graph_rt.get_output(0).asnumpy() - unlinked_output = _run_unlinked(lib).asnumpy() + unlinked_output = _run_unlinked(lib) if "int" in dtype: np.testing.assert_equal(unlinked_output, linked_output) From 953fb98eba35bf8d109adc13278026299c80d8c0 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Sun, 22 Nov 2020 18:30:40 -0800 Subject: [PATCH 48/60] demote log level --- src/runtime/graph/graph_runtime.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/graph/graph_runtime.cc b/src/runtime/graph/graph_runtime.cc index 38815396b5e5..e51998574eb2 100644 --- a/src/runtime/graph/graph_runtime.cc +++ b/src/runtime/graph/graph_runtime.cc @@ -346,10 +346,10 @@ void GraphRuntime::SetupStorage() { }); TVMContext ctx = cit == ctxs_.end() ? ctxs_[0] : *cit; if (pit.linked_param.defined()) { - LOG(INFO) << "param " << storage_pool_.size() << " pre-loaded!"; + LOG(DEBUG) << "param " << storage_pool_.size() << " pre-loaded!"; storage_pool_.push_back(pit.linked_param); } else { - LOG(INFO) << "param " << storage_pool_.size() << " blank!"; + LOG(DEBUG) << "param " << storage_pool_.size() << " blank!"; std::vector shape; shape.push_back(static_cast(pit.size + 3) / 4); storage_pool_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx)); From 03b79e9c295c13069b33edb58438f879acdcdde8 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Sun, 22 Nov 2020 18:31:03 -0800 Subject: [PATCH 49/60] black format --- tests/python/unittest/test_link_params.py | 28 +++++++++++++++-------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/tests/python/unittest/test_link_params.py b/tests/python/unittest/test_link_params.py index 65316aad866d..7b6910b0ea57 100644 --- a/tests/python/unittest/test_link_params.py +++ b/tests/python/unittest/test_link_params.py @@ -96,7 +96,7 @@ def _lookup_sid(graph, name): num_outputs_seen = 0 for i, n in enumerate(graph["nodes"]): if n["name"] == name: - print('sid', name, graph["attrs"]["storage_id"][1], num_outputs_seen) + print("sid", name, graph["attrs"]["storage_id"][1], num_outputs_seen) return graph["attrs"]["storage_id"][1][num_outputs_seen] else: if "attrs" in n and "num_outputs" in n["attrs"]: @@ -155,14 +155,22 @@ def _add_decl(name, dtype): # while. In doing this, it ensures that param %{dtype}_b will be placed into the graph at an # index unequal to its storage_id. This ensures that GraphRuntimeCodegen encodes the storage_id # and not the parameter index into the graph. - (f' %0 = nn.conv2d(%rand_input, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", ' - f'kernel_size=[3, 3], out_dtype="{dtype}");'), - (f' %1 = nn.conv2d(%0, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", ' - f'kernel_size=[3, 3], out_dtype="{dtype}");'), - (f' %2 = nn.conv2d(%1, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", ' - f'kernel_size=[3, 3], out_dtype="{dtype}");'), - (f' %3 = nn.conv2d(%2, %{dtype}_b, data_layout="NCHW", kernel_layout="OIHW", ' - f'kernel_size=[3, 3], out_dtype="{dtype}");'), + ( + f' %0 = nn.conv2d(%rand_input, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", ' + f'kernel_size=[3, 3], out_dtype="{dtype}");' + ), + ( + f' %1 = nn.conv2d(%0, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", ' + f'kernel_size=[3, 3], out_dtype="{dtype}");' + ), + ( + f' %2 = nn.conv2d(%1, %{dtype}_a, data_layout="NCHW", kernel_layout="OIHW", ' + f'kernel_size=[3, 3], out_dtype="{dtype}");' + ), + ( + f' %3 = nn.conv2d(%2, %{dtype}_b, data_layout="NCHW", kernel_layout="OIHW", ' + f'kernel_size=[3, 3], out_dtype="{dtype}");' + ), " %3", "}", ] @@ -182,7 +190,7 @@ def test_llvm_link_params(): lib = tvm.relay.build(mod, target, params=param_init) assert set(lib.params.keys()) == {"p0", "p1"} # NOTE: op folded - print('graph', lib.graph_json) + print("graph", lib.graph_json) graph = json.loads(lib.graph_json) for p in lib.params: _verify_linked_param(dtype, lib, lib.lib, graph, p) or found_one From 7a3a9dd16953bf0a9eb4fedb9007a8b41a127d88 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Sun, 22 Nov 2020 19:05:16 -0800 Subject: [PATCH 50/60] rm debug logs --- src/runtime/graph/graph_runtime.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/runtime/graph/graph_runtime.cc b/src/runtime/graph/graph_runtime.cc index e51998574eb2..0033a1d5d8d2 100644 --- a/src/runtime/graph/graph_runtime.cc +++ b/src/runtime/graph/graph_runtime.cc @@ -346,10 +346,8 @@ void GraphRuntime::SetupStorage() { }); TVMContext ctx = cit == ctxs_.end() ? ctxs_[0] : *cit; if (pit.linked_param.defined()) { - LOG(DEBUG) << "param " << storage_pool_.size() << " pre-loaded!"; storage_pool_.push_back(pit.linked_param); } else { - LOG(DEBUG) << "param " << storage_pool_.size() << " blank!"; std::vector shape; shape.push_back(static_cast(pit.size + 3) / 4); storage_pool_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, ctx)); From e4296eff29f929cc7f3343eff21bc1e8bc26eb9f Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 23 Nov 2020 15:10:43 -0800 Subject: [PATCH 51/60] address kparzysz comments --- src/target/llvm/codegen_params.cc | 8 +- src/target/source/codegen_params.cc | 230 +++++++++------------------- 2 files changed, 72 insertions(+), 166 deletions(-) diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index 9588f876abed..a5ddb02ac35b 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -64,11 +64,9 @@ struct LLVMConstantGetter::value>> template ::value>> void BuildLLVMVector(llvm::Type* element_type, void* tensor_data, size_t num_elements, std::vector* elements) { - for (size_t i = 0; i < num_elements; i++) { - auto llvm_element = - LLVMConstantGetter::getElement(element_type, static_cast(tensor_data)[i]); - elements->emplace_back(llvm_element); - } + elements->resize(num_elements, nullptr); + std::transform(static_cast(tensor_data), static_cast(tensor_data) + num_elements, + elements->begin(), [&](T t) { return LLVMConstantGetter::getElement(element_type, t); }); } llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime::NDArray arr) { diff --git a/src/target/source/codegen_params.cc b/src/target/source/codegen_params.cc index 74524b3545d3..80d131e7360d 100644 --- a/src/target/source/codegen_params.cc +++ b/src/target/source/codegen_params.cc @@ -42,6 +42,62 @@ class DLManagedTensorDeleter { static constexpr const int kMaxLineLength = 80; +template ::value>> +void PrintArray(void* data, size_t num_elements, int elements_per_row, std::string indent_str, + std::ostream& os) { + for (size_t i = 0; i < num_elements; i++) { + int64_t elem = static_cast(data)[i]; + if (std::is_signed::value) { + uint64_t to_print; + if (elem < 0) { + os << "-"; + to_print = -elem; + } else { + os << "+"; + to_print = elem; + } + os << "0x" << std::setw(sizeof(T) * 8 / 4) << static_cast(to_print); + } else { + os << "0x" << std::setw(sizeof(T) * 8 / 4) << static_cast(elem); + } + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } +} + +template ::value>> +void PrintArray(void* data, size_t num_elements, int one_element_size_bytes, int elements_per_row, + std::string indent_str, std::ostream& os) { + std::stringstream ss; + ss.setf(std::ios::hex | (std::is_signed::value ? std::ios::showbase : 0) | std::ios::fixed | std::ios::scientific, + std::ios::basefield | std::ios::showbase | std::ios::floatfield); + for (int i = 0; i < num_elements; i++) { + T elem = static_cast(data)[i]; + if (std::isinf(elem)) { + // C99 standard. + os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; + } else if (std::isnan(elem)) { + // GNU extension, implemenatation-dependent. + os << std::setw(one_element_size_bytes) << "NAN"; + } else { + ss << elem; + os << std::setw(one_element_size_bytes) << ss.str(); + ss.str(""); + } + if (i < num_elements - 1) { + os << ", "; + } + if (((i + 1) % elements_per_row) == 0) { + os << "\n" << indent_str; + } + } +} + + void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& os) { auto arr_type = arr.DataType(); CHECK_EQ(arr_type.lanes(), 1) << "CodegenParams: only support generating 1-lane parameters; saw " @@ -49,13 +105,13 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& int one_element_size_bytes = (arr_type.bits() / 4) + (2 /* "0x" */) + (2 /* ", " */); if (arr_type.code() == runtime::DataType::TypeCode::kInt) { - one_element_size_bytes += 1; // sign bit + one_element_size_bytes += 1; // sign character if (arr_type.bits() > 32) { - one_element_size_bytes += 2; // "UL" + one_element_size_bytes += 2; // "LL" } } else if (arr_type.code() == runtime::DataType::TypeCode::kUInt) { if (arr_type.bits() > 32) { - one_element_size_bytes += 1; // "L" + one_element_size_bytes += 3; // "ULL" } } else if (arr_type.code() == runtime::DataType::TypeCode::kFloat) { // Floats and doubles are printed as hex but casted. @@ -89,85 +145,14 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& arr_type.bits() == 64) << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " << arr_type.bits() << "-bit array"; - if (arr_type.bits() == 8) { - for (int i = 0; i < num_elements; i++) { - // NOTE: for special types int8_t and uint8_t, need to promote to int type to avoid - // printing as a char. - int8_t elem = static_cast(tensor->dl_tensor.data)[i]; - uint16_t to_print; - if (elem < 0) { - os << "-"; - to_print = -elem; - } else { - os << "+"; - to_print = elem; - } - os << "0x" << std::setw(2) << +static_cast(to_print); - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } + PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); } else if (arr_type.bits() == 16) { - for (int i = 0; i < num_elements; i++) { - int16_t elem = static_cast(tensor->dl_tensor.data)[i]; - uint16_t to_print; - if (elem < 0) { - os << "-"; - to_print = -elem; - } else { - os << "+"; - to_print = elem; - } - os << "0x" << std::setw(4) << to_print; - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } + PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); } else if (arr_type.bits() == 32) { - for (int i = 0; i < num_elements; i++) { - int32_t elem = static_cast(tensor->dl_tensor.data)[i]; - uint32_t to_print; - if (elem < 0) { - os << "-"; - to_print = -elem; - } else { - os << "+"; - to_print = elem; - } - os << "0x" << std::setw(8) << to_print; - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } + PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); } else if (arr_type.bits() == 64) { - for (int i = 0; i < num_elements; i++) { - int64_t elem = static_cast(tensor->dl_tensor.data)[i]; - uint64_t to_print; - if (elem < 0) { - os << "-"; - to_print = -elem; - } else { - os << "+"; - to_print = elem; - } - os << "0x" << std::setw(16) << to_print; - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } + PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); } else { CHECK(false) << "should not get here"; } @@ -180,102 +165,25 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& << arr_type.bits() << "-bit array"; if (arr_type.bits() == 8) { - for (int i = 0; i < num_elements; i++) { - // NOTE: for special types int8_t and uint8_t, need to promote to int type to avoid - // printing as a char. - os << "0x" << std::setw(2) - << +static_cast(static_cast(tensor->dl_tensor.data)[i]); - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } + PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); } else if (arr_type.bits() == 16) { - for (int i = 0; i < num_elements; i++) { - os << "0x" << std::setw(4) << static_cast(tensor->dl_tensor.data)[i]; - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } + PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); } else if (arr_type.bits() == 32) { - for (int i = 0; i < num_elements; i++) { - os << "0x" << std::setw(8) << static_cast(tensor->dl_tensor.data)[i]; - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } + PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); } else if (arr_type.bits() == 64) { - for (int i = 0; i < num_elements; i++) { - os << "0x" << std::setw(16) << static_cast(tensor->dl_tensor.data)[i] << "UL"; - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } + PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); } else { CHECK(false) << "should not get here"; } break; case runtime::DataType::TypeCode::kFloat: { - std::stringstream ss; - ss.setf(std::ios::hex | std::ios::showbase | std::ios::fixed | std::ios::scientific, - std::ios::basefield | std::ios::showbase | std::ios::floatfield); os.fill(' '); os.setf(std::ios::left, std::ios::adjustfield); if (arr_type.bits() == 32) { - for (int i = 0; i < num_elements; i++) { - float elem = static_cast(tensor->dl_tensor.data)[i]; - if (std::isinf(elem)) { - // C99 standard. - os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; - } else if (std::isnan(elem)) { - // GNU extension, implemenatation-dependent. - os << std::setw(one_element_size_bytes) << "NAN"; - } else { - ss << elem; - os << std::setw(one_element_size_bytes) << ss.str(); - ss.str(""); - } - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } - std::cout << "\n"; + PrintArray(tensor->dl_tensor.data, num_elements, one_element_size_bytes, elements_per_row, indent_str, os); } else if (arr_type.bits() == 64) { - for (int i = 0; i < num_elements; i++) { - double elem = static_cast(tensor->dl_tensor.data)[i]; - if (std::isinf(elem)) { - // C99 standard. - os << (elem < 0 ? "-" : " ") << std::setw(one_element_size_bytes - 1) << "INFINITY"; - } else if (std::isnan(elem)) { - // GNU extension, implemenatation-dependent. - os << std::setw(one_element_size_bytes) << "NAN"; - } else { - ss << elem; - os << std::setw(one_element_size_bytes) << ss.str(); - ss.str(""); - } - if (i < num_elements - 1) { - os << ", "; - } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } - } + PrintArray(tensor->dl_tensor.data, num_elements, one_element_size_bytes, elements_per_row, indent_str, os); } else { CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " << arr_type.bits() << "-bit array"; From a05871fa904cc545d4827ce20a697ee8afe93550 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 23 Nov 2020 15:11:21 -0800 Subject: [PATCH 52/60] git-clang-format --- src/target/llvm/codegen_params.cc | 3 ++- src/target/source/codegen_params.cc | 19 ++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index a5ddb02ac35b..925754c3eef4 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -66,7 +66,8 @@ void BuildLLVMVector(llvm::Type* element_type, void* tensor_data, size_t num_ele std::vector* elements) { elements->resize(num_elements, nullptr); std::transform(static_cast(tensor_data), static_cast(tensor_data) + num_elements, - elements->begin(), [&](T t) { return LLVMConstantGetter::getElement(element_type, t); }); + elements->begin(), + [&](T t) { return LLVMConstantGetter::getElement(element_type, t); }); } llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime::NDArray arr) { diff --git a/src/target/source/codegen_params.cc b/src/target/source/codegen_params.cc index 80d131e7360d..223073717312 100644 --- a/src/target/source/codegen_params.cc +++ b/src/target/source/codegen_params.cc @@ -73,7 +73,8 @@ template ::value void PrintArray(void* data, size_t num_elements, int one_element_size_bytes, int elements_per_row, std::string indent_str, std::ostream& os) { std::stringstream ss; - ss.setf(std::ios::hex | (std::is_signed::value ? std::ios::showbase : 0) | std::ios::fixed | std::ios::scientific, + ss.setf(std::ios::hex | (std::is_signed::value ? std::ios::showbase : 0) | std::ios::fixed | + std::ios::scientific, std::ios::basefield | std::ios::showbase | std::ios::floatfield); for (int i = 0; i < num_elements; i++) { T elem = static_cast(data)[i]; @@ -97,7 +98,6 @@ void PrintArray(void* data, size_t num_elements, int one_element_size_bytes, int } } - void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& os) { auto arr_type = arr.DataType(); CHECK_EQ(arr_type.lanes(), 1) << "CodegenParams: only support generating 1-lane parameters; saw " @@ -167,11 +167,14 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& if (arr_type.bits() == 8) { PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); } else if (arr_type.bits() == 16) { - PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); + PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, + os); } else if (arr_type.bits() == 32) { - PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); + PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, + os); } else if (arr_type.bits() == 64) { - PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); + PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, + os); } else { CHECK(false) << "should not get here"; } @@ -181,9 +184,11 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& os.fill(' '); os.setf(std::ios::left, std::ios::adjustfield); if (arr_type.bits() == 32) { - PrintArray(tensor->dl_tensor.data, num_elements, one_element_size_bytes, elements_per_row, indent_str, os); + PrintArray(tensor->dl_tensor.data, num_elements, one_element_size_bytes, + elements_per_row, indent_str, os); } else if (arr_type.bits() == 64) { - PrintArray(tensor->dl_tensor.data, num_elements, one_element_size_bytes, elements_per_row, indent_str, os); + PrintArray(tensor->dl_tensor.data, num_elements, one_element_size_bytes, + elements_per_row, indent_str, os); } else { CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " << arr_type.bits() << "-bit array"; From dd862fc549a7111165064f162e7c76d472bbf3fc Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 23 Nov 2020 15:14:29 -0800 Subject: [PATCH 53/60] cpplint --- src/target/llvm/codegen_params.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index 925754c3eef4..fd44117ade95 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -24,6 +24,7 @@ #include "codegen_params.h" +#include #include #include From b08e24f982eacf24817ba9bfd722e34fb75ffb1f Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 23 Nov 2020 15:35:31 -0800 Subject: [PATCH 54/60] fix compile bugs on linux --- src/target/source/codegen_params.cc | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/target/source/codegen_params.cc b/src/target/source/codegen_params.cc index 223073717312..ea7f19418f20 100644 --- a/src/target/source/codegen_params.cc +++ b/src/target/source/codegen_params.cc @@ -73,10 +73,14 @@ template ::value void PrintArray(void* data, size_t num_elements, int one_element_size_bytes, int elements_per_row, std::string indent_str, std::ostream& os) { std::stringstream ss; - ss.setf(std::ios::hex | (std::is_signed::value ? std::ios::showbase : 0) | std::ios::fixed | - std::ios::scientific, - std::ios::basefield | std::ios::showbase | std::ios::floatfield); - for (int i = 0; i < num_elements; i++) { + if (std::is_signed::value) { + ss.setf(std::ios::hex | std::ios::showbase | std::ios::fixed | std::ios::scientific, + std::ios::basefield | std::ios::showbase | std::ios::floatfield); + } else { + ss.setf(std::ios::hex | std::ios::fixed | std::ios::scientific, + std::ios::basefield | std::ios::showbase | std::ios::floatfield); + } + for (size_t i = 0; i < num_elements; i++) { T elem = static_cast(data)[i]; if (std::isinf(elem)) { // C99 standard. From bcbeda4be7679c65b1b8b81b5e546100952ab447 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 23 Nov 2020 20:45:47 -0800 Subject: [PATCH 55/60] revert pyproject, address tqchen, kparzysz comments --- pyproject.toml | 92 ----------------------------- src/relay/backend/build_module.cc | 1 + src/target/source/codegen_params.cc | 8 ++- 3 files changed, 7 insertions(+), 94 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d273b25eb3cd..5cca711ddbe6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,95 +46,3 @@ exclude = ''' )/ ) ''' -[tool.poetry] -name = "incubator-tvm" -version = "0.1.0" -description = "" -authors = ["Your Name "] -packages = [ - { include = "tvm", from = "../../../../python" }, -] - -[tool.poetry.dependencies] -attrs = "^19" -decorator = "^4.4" -numpy = "~1.19" -psutil = "^5" -scipy = "^1.4" -python = "^3.6" -tornado = "^6" -typed_ast = "^1.4" - -# AutoTVM -xgboost = {version = "^1.1", optional = true} - -############# -# Importers # -############# - -# NOTE: Caffe frontend dependency is from torch package. - -# CoreML -coremltools = {version = "^3.3", optional = true} - -# Darknet -opencv-python = {version = "^4.2", optional = true} -cffi = {version = "^1.14", optional = true} - -# NOTE: Keras provided by tensorflow package. -# If TF version conflict, maybe try: keras = "2.3.1" - -# MXNet frontend -mxnet = {version = "^1.6.0", optional = true} - -# ONNX frontend -onnx = {version = "1.6.0", optional = true} -onnxruntime = {version = "1.0.0", optional = true} - -# Pytorch (also used by ONNX) -torch = {version = "1.4.0", optional = true} -torchvision = {version = "0.5.0", optional = true} -# NOTE: torch depends on a number of other packages, but unhelpfully, does not expose that in the -# wheel!!! -future = {version = "*", optional = true} - -# Tensorflow frontend -tensorflow = {version = "^2.1", optional = true} -tensorflow-estimator = {version = "^2.1", optional = true} - -# TFLite frontend -tflite = {version = "2.1.0", optional = true} -wheel = "*" - - -[tool.poetry.extras] -xgboost = ["xgboost"] -importer-caffe2 = ["torch"] -importer-coreml = ["coremltools"] -importer-darknet = ["opencv-python"] -importer-keras = ["tensorflow", "tensorflow-estimator"] -importer-onnx = ["onnx", "onnxruntime", "torch", "torchvision", "future"] -importer-pytorch = ["torch", "torchvision", "future"] -importer-tensorflow = ["tensorflow", "tensorflow-estimator"] -importer-tflite = ["tlfite", "tensorflow", "tensorflow-estimator"] - -[tool.poetry.dev-dependencies] -autodocsumm = "^0.1" -black = "^19.10b0" -sphinx = "^3.0" -sphinx-gallery = "^0.4" -sphinx-rtd-theme = "^0.4" -matplotlib = "^3.2" -Image = "^1.5" -recommonmark = "^0.6" -pillow = "< 7" -pyformat = "^0.7" -pylint = "^2.4" -pytest = "^5.4" - -[build-system] -requires = ["poetry>=0.12"] -build-backend = "poetry.masonry.api" - -[tool.autopep8] -max_line_length = 100 diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc index 189227bb15a1..82ac1c57018e 100644 --- a/src/relay/backend/build_module.cc +++ b/src/relay/backend/build_module.cc @@ -462,6 +462,7 @@ class RelayBuildModule : public runtime::ModuleNode { const runtime::PackedFunc* pf = runtime::Registry::Get("codegen.LLVMModuleCreate"); if (!target_host.defined()) target_host = (pf != nullptr) ? Target("llvm") : Target("stackvm"); + // Generate a placeholder function that attaches linked params as its arguments. if (target_host->GetAttr("link-params").value_or(Bool(false))) { CHECK(pf != nullptr) << "Unable to link-params with no target_host and no llvm codegen."; auto param_ids = graph_codegen_->GetParamIds(); diff --git a/src/target/source/codegen_params.cc b/src/target/source/codegen_params.cc index ea7f19418f20..b36bff019980 100644 --- a/src/target/source/codegen_params.cc +++ b/src/target/source/codegen_params.cc @@ -119,8 +119,12 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& } } else if (arr_type.code() == runtime::DataType::TypeCode::kFloat) { // Floats and doubles are printed as hex but casted. - one_element_size_bytes += 1 /* sign */ + 1 /* decimal point */ + 1 /* exponent sign */ + - 1 /* extra decimal digit in exponent */; + one_element_size_bytes += 1 /* sign */ + 1 /* decimal point */ + 1 /* exponent sign */; + if (arr_type.bits() == 64) { + one_element_size_bytes += 2; /* 4 decimal digits in exponent, relative to bits / 4 */ + } else if (arr_type.bits() == 32) { + one_element_size_bytes += 1; /* extra decimal digit in exponent, relative to bits / 4 */ + } } int elements_per_row = 16; From bf4207765dc6eab6f81061c2e968326f2020940f Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Mon, 23 Nov 2020 20:55:17 -0800 Subject: [PATCH 56/60] git-clang-format --- src/target/source/codegen_params.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/target/source/codegen_params.cc b/src/target/source/codegen_params.cc index b36bff019980..dde95d1964aa 100644 --- a/src/target/source/codegen_params.cc +++ b/src/target/source/codegen_params.cc @@ -121,7 +121,7 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& // Floats and doubles are printed as hex but casted. one_element_size_bytes += 1 /* sign */ + 1 /* decimal point */ + 1 /* exponent sign */; if (arr_type.bits() == 64) { - one_element_size_bytes += 2; /* 4 decimal digits in exponent, relative to bits / 4 */ + one_element_size_bytes += 2; /* 4 decimal digits in exponent, relative to bits / 4 */ } else if (arr_type.bits() == 32) { one_element_size_bytes += 1; /* extra decimal digit in exponent, relative to bits / 4 */ } From 883c878986d8de43d2dacfb0dfcd701b023c8b26 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 24 Nov 2020 13:48:38 -0800 Subject: [PATCH 57/60] address tqchen, others' comments --- src/target/llvm/codegen_params.cc | 18 ++-- src/target/llvm/codegen_params.h | 13 ++- src/target/source/codegen_params.cc | 162 ++++++++++++++++------------ src/target/source/codegen_params.h | 13 +++ 4 files changed, 127 insertions(+), 79 deletions(-) diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index fd44117ade95..254cfe8d1283 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -31,13 +31,6 @@ namespace tvm { namespace codegen { -namespace { -class DLManagedTensorDeleter { - public: - void operator()(DLManagedTensor* ptr) { ptr->deleter(ptr); } -}; -} // namespace - template struct LLVMConstantGetter { static llvm::Constant* getElement(llvm::Type* ty, T t); @@ -143,6 +136,11 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: case runtime::DataType::TypeCode::kFloat: switch (arr_type.bits()) { + case 16: + // NOTE: float16 is treated as uint16_t. + element_type = llvm::Type::getIntNTy(*ctx, arr_type.bits()); + BuildLLVMVector(element_type, arr->data, num_elements, &elements); + break; case 32: element_type = llvm::Type::getFloatTy(*ctx); BuildLLVMVector(element_type, arr->data, num_elements, &elements); @@ -158,6 +156,12 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: } break; + case runtime::DataType::TypeCode::kBFloat: + CHECK(arr_type.bits() == 16) << "CodegenParams: only support 16-bit bfloat; saw " + << arr_type.bits() << "-bit array"; + element_type = llvm::Type::getIntNTy(*ctx, arr_type.bits()); + BuildLLVMVector(element_type, arr->data, num_elements, &elements); + default: CHECK(false) << "Data type not supported"; } diff --git a/src/target/llvm/codegen_params.h b/src/target/llvm/codegen_params.h index c21820aa6c3f..771bc201f7aa 100644 --- a/src/target/llvm/codegen_params.h +++ b/src/target/llvm/codegen_params.h @@ -32,12 +32,17 @@ namespace tvm { namespace codegen { +/*! + * \brief Convert an NDArray to an LLVM array of constants. + * + * The supplied NDArray is flattened, and each element is converted to the appropriate LLVM type. + * + * \param ctx LLVM context used to create the various primitive datatypes. + * \param arr NDArray to convert. + * \return LLVM array containing the array data. + */ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime::NDArray arr); -void LLVMCodeGenParams(llvm::LLVMContext* ctx, llvm::Module* module, int64_t storage_id_offset, - ::tvm::runtime::Array param_names, - ::tvm::runtime::Array params_by_sid); - } // namespace codegen } // namespace tvm diff --git a/src/target/source/codegen_params.cc b/src/target/source/codegen_params.cc index dde95d1964aa..c1cb59d6e870 100644 --- a/src/target/source/codegen_params.cc +++ b/src/target/source/codegen_params.cc @@ -29,23 +29,48 @@ #include #include #include +#include namespace tvm { namespace codegen { -namespace { -class DLManagedTensorDeleter { - public: - void operator()(DLManagedTensor* ptr) { ptr->deleter(ptr); } -}; -} // namespace - +/*! \brief maximum line length of generated parameters, including indent. */ static constexpr const int kMaxLineLength = 80; -template ::value>> -void PrintArray(void* data, size_t num_elements, int elements_per_row, std::string indent_str, - std::ostream& os) { +static int ComputeNumElementsPerRow(int one_element_size_bytes, int indent_chars) { + if (one_element_size_bytes > kMaxLineLength - indent_chars) { + return 1; + } + // When multiple elements fit per line, divide the available space by the size of one element, + // and return the largest power of 2 less than the result. Using power-of-2-sized elements allows + // for easily traversing the generated code. + return 1 << (fls((kMaxLineLength - indent_chars) / one_element_size_bytes) - 1); +} + +template ::value>> +void PrintIntegralArray(void* data, size_t num_elements, int indent_chars, std::ostream& os) { + int one_element_size_bytes = (sizeof(T) / 4) + (2 /* "0x" */) + (2 /* ", " */); + if (std::is_signed::value) { + one_element_size_bytes += 1; // sign character + if (sizeof(T) == 64 / 8) { + one_element_size_bytes += 2; // "LL" + } + } else { + if (sizeof(T) == 64 / 8) { + one_element_size_bytes += 3; // "ULL" + } + } + + int elements_per_row = ComputeNumElementsPerRow(one_element_size_bytes, indent_chars); + std::string indent_str(indent_chars, ' '); + for (size_t i = 0; i < num_elements; i++) { + if ((i % elements_per_row) == 0) { + if (i != 0) { + os << std::endl; + } + os << indent_str; + } int64_t elem = static_cast(data)[i]; if (std::is_signed::value) { uint64_t to_print; @@ -57,21 +82,40 @@ void PrintArray(void* data, size_t num_elements, int elements_per_row, std::stri to_print = elem; } os << "0x" << std::setw(sizeof(T) * 8 / 4) << static_cast(to_print); + if (sizeof(T) == 64 / 8) { + os << "LL"; + } } else { os << "0x" << std::setw(sizeof(T) * 8 / 4) << static_cast(elem); + if (sizeof(T) == 64 / 8) { + os << "ULL"; + } } if (i < num_elements - 1) { os << ", "; } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } + } + + if ((num_elements % elements_per_row) != 0) { + os << "\n"; } } -template ::value>> -void PrintArray(void* data, size_t num_elements, int one_element_size_bytes, int elements_per_row, - std::string indent_str, std::ostream& os) { +template ::value>> +void PrintFloatingPointArray(void* data, size_t num_elements, int indent_chars, std::ostream& os) { + // Floats and doubles are printed as hex but casted. + int one_element_size_bytes = + (sizeof(T) / 4) + (2 /* "0x" */) + (2 /* ", " */) + + 1 /* sign */ + 1 /* decimal point */ + 1 /* exponent sign */; + if (sizeof(T) == 64 / 8) { + one_element_size_bytes += 2; /* 4 decimal digits in exponent, relative to bits / 4 */ + } else if (sizeof(T) == 32 / 8) { + one_element_size_bytes += 1; /* extra decimal digit in exponent, relative to bits / 4 */ + } + + int elements_per_row = ComputeNumElementsPerRow(one_element_size_bytes, indent_chars); + std::string indent_str(indent_chars, ' '); + std::stringstream ss; if (std::is_signed::value) { ss.setf(std::ios::hex | std::ios::showbase | std::ios::fixed | std::ios::scientific, @@ -81,6 +125,13 @@ void PrintArray(void* data, size_t num_elements, int one_element_size_bytes, int std::ios::basefield | std::ios::showbase | std::ios::floatfield); } for (size_t i = 0; i < num_elements; i++) { + if ((i % elements_per_row) == 0) { + if (i != 0) { + os << std::endl; + } + os << indent_str; + } + T elem = static_cast(data)[i]; if (std::isinf(elem)) { // C99 standard. @@ -96,9 +147,10 @@ void PrintArray(void* data, size_t num_elements, int one_element_size_bytes, int if (i < num_elements - 1) { os << ", "; } - if (((i + 1) % elements_per_row) == 0) { - os << "\n" << indent_str; - } + } + + if ((num_elements % elements_per_row) != 0) { + os << "\n"; } } @@ -107,42 +159,12 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& CHECK_EQ(arr_type.lanes(), 1) << "CodegenParams: only support generating 1-lane parameters; saw " << arr_type.lanes(); - int one_element_size_bytes = (arr_type.bits() / 4) + (2 /* "0x" */) + (2 /* ", " */); - if (arr_type.code() == runtime::DataType::TypeCode::kInt) { - one_element_size_bytes += 1; // sign character - if (arr_type.bits() > 32) { - one_element_size_bytes += 2; // "LL" - } - } else if (arr_type.code() == runtime::DataType::TypeCode::kUInt) { - if (arr_type.bits() > 32) { - one_element_size_bytes += 3; // "ULL" - } - } else if (arr_type.code() == runtime::DataType::TypeCode::kFloat) { - // Floats and doubles are printed as hex but casted. - one_element_size_bytes += 1 /* sign */ + 1 /* decimal point */ + 1 /* exponent sign */; - if (arr_type.bits() == 64) { - one_element_size_bytes += 2; /* 4 decimal digits in exponent, relative to bits / 4 */ - } else if (arr_type.bits() == 32) { - one_element_size_bytes += 1; /* extra decimal digit in exponent, relative to bits / 4 */ - } - } - - int elements_per_row = 16; - while (elements_per_row > 1 && - (elements_per_row * one_element_size_bytes) > (kMaxLineLength - indent_chars)) { - elements_per_row /= 2; - } - - std::string indent_str(indent_chars, ' '); - os << indent_str; - auto shape = arr.Shape(); int num_elements = 1; for (auto shape_elem : shape) { num_elements *= shape_elem; } - std::unique_ptr tensor(arr.ToDLPack()); auto old_fmtflags = os.flags(); os.setf(std::ios::internal | std::ios::hex, std::ios::adjustfield | std::ios::basefield | std::ios::showbase); @@ -154,13 +176,13 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& << "CodegenParams: only support generating 8-, 16-, 32-, or 64-bit integer params; saw " << arr_type.bits() << "-bit array"; if (arr_type.bits() == 8) { - PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); + PrintIntegralArray(arr->data, num_elements, indent_chars, os); } else if (arr_type.bits() == 16) { - PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); + PrintIntegralArray(arr->data, num_elements, indent_chars, os); } else if (arr_type.bits() == 32) { - PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); + PrintIntegralArray(arr->data, num_elements, indent_chars, os); } else if (arr_type.bits() == 64) { - PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); + PrintIntegralArray(arr->data, num_elements, indent_chars, os); } else { CHECK(false) << "should not get here"; } @@ -173,16 +195,13 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& << arr_type.bits() << "-bit array"; if (arr_type.bits() == 8) { - PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, os); + PrintIntegralArray(arr->data, num_elements, indent_chars, os); } else if (arr_type.bits() == 16) { - PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, - os); + PrintIntegralArray(arr->data, num_elements, indent_chars, os); } else if (arr_type.bits() == 32) { - PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, - os); + PrintIntegralArray(arr->data, num_elements, indent_chars, os); } else if (arr_type.bits() == 64) { - PrintArray(tensor->dl_tensor.data, num_elements, elements_per_row, indent_str, - os); + PrintIntegralArray(arr->data, num_elements, indent_chars, os); } else { CHECK(false) << "should not get here"; } @@ -191,12 +210,13 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& case runtime::DataType::TypeCode::kFloat: { os.fill(' '); os.setf(std::ios::left, std::ios::adjustfield); - if (arr_type.bits() == 32) { - PrintArray(tensor->dl_tensor.data, num_elements, one_element_size_bytes, - elements_per_row, indent_str, os); + if (arr_type.bits() == 16) { + // NOTE: print types not widely supported by C as uint16_t. + PrintIntegralArray(arr->data, num_elements, indent_chars, os); + } else if (arr_type.bits() == 32) { + PrintFloatingPointArray(arr->data, num_elements, indent_chars, os); } else if (arr_type.bits() == 64) { - PrintArray(tensor->dl_tensor.data, num_elements, one_element_size_bytes, - elements_per_row, indent_str, os); + PrintFloatingPointArray(arr->data, num_elements, indent_chars, os); } else { CHECK(false) << "CodegenParams: only support 32- or 64-bit floating point; saw " << arr_type.bits() << "-bit array"; @@ -204,13 +224,19 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& break; } + case runtime::DataType::TypeCode::kBFloat: { + // NOTE: print types not widely supported by C as uint16_t. + CHECK(arr_type.bits() == 16) + << "CodegenParams: only support generating 16-bit bfloat params; saw " + << arr_type.bits() << "-bit array"; + PrintIntegralArray(arr->data, num_elements, indent_chars, os); + break; + } + default: CHECK(false) << "Data type not supported"; } - if (num_elements % elements_per_row != 0) { - os << "\n"; - } os.flags(old_fmtflags); } diff --git a/src/target/source/codegen_params.h b/src/target/source/codegen_params.h index a3d277eac590..cc126c767c58 100644 --- a/src/target/source/codegen_params.h +++ b/src/target/source/codegen_params.h @@ -31,6 +31,19 @@ namespace tvm { namespace codegen { +/*! + * \brief Write a C representation of arr to os. + * + * This function generates a comma-separated, indented list of C integer listeals suitable for use + * in an initializer. The NDArray is flattened and then the list is produced element by element. + * For the int16_t NDArray [-3, -2, -1, 0, 1, 2, 3, ...], and indent_chars = 4, the following output + * is produced: + * -0x0003, -0x0002, -0x0001, +0x0000, +0x0001, +0x0002, +0x0003 + * + * \param arr The array to generate + * \param indent_chars Number of chars to indent + * \param os Output stream where the array data should be written. + */ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& os); } // namespace codegen From 4400a346c303746e9c9bf651a20a36c3c757576e Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 24 Nov 2020 13:49:05 -0800 Subject: [PATCH 58/60] git-clang-format --- src/target/llvm/codegen_params.cc | 4 ++-- src/target/source/codegen_params.cc | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc index 254cfe8d1283..694be5621606 100644 --- a/src/target/llvm/codegen_params.cc +++ b/src/target/llvm/codegen_params.cc @@ -157,8 +157,8 @@ llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime:: break; case runtime::DataType::TypeCode::kBFloat: - CHECK(arr_type.bits() == 16) << "CodegenParams: only support 16-bit bfloat; saw " - << arr_type.bits() << "-bit array"; + CHECK(arr_type.bits() == 16) + << "CodegenParams: only support 16-bit bfloat; saw " << arr_type.bits() << "-bit array"; element_type = llvm::Type::getIntNTy(*ctx, arr_type.bits()); BuildLLVMVector(element_type, arr->data, num_elements, &elements); diff --git a/src/target/source/codegen_params.cc b/src/target/source/codegen_params.cc index c1cb59d6e870..bd29722e312e 100644 --- a/src/target/source/codegen_params.cc +++ b/src/target/source/codegen_params.cc @@ -24,12 +24,12 @@ #include "codegen_params.h" #include +#include #include #include #include #include -#include namespace tvm { namespace codegen { @@ -104,9 +104,8 @@ void PrintIntegralArray(void* data, size_t num_elements, int indent_chars, std:: template ::value>> void PrintFloatingPointArray(void* data, size_t num_elements, int indent_chars, std::ostream& os) { // Floats and doubles are printed as hex but casted. - int one_element_size_bytes = - (sizeof(T) / 4) + (2 /* "0x" */) + (2 /* ", " */) - + 1 /* sign */ + 1 /* decimal point */ + 1 /* exponent sign */; + int one_element_size_bytes = (sizeof(T) / 4) + (2 /* "0x" */) + (2 /* ", " */) + 1 /* sign */ + + 1 /* decimal point */ + 1 /* exponent sign */; if (sizeof(T) == 64 / 8) { one_element_size_bytes += 2; /* 4 decimal digits in exponent, relative to bits / 4 */ } else if (sizeof(T) == 32 / 8) { @@ -227,8 +226,8 @@ void NDArrayDataToC(::tvm::runtime::NDArray arr, int indent_chars, std::ostream& case runtime::DataType::TypeCode::kBFloat: { // NOTE: print types not widely supported by C as uint16_t. CHECK(arr_type.bits() == 16) - << "CodegenParams: only support generating 16-bit bfloat params; saw " - << arr_type.bits() << "-bit array"; + << "CodegenParams: only support generating 16-bit bfloat params; saw " << arr_type.bits() + << "-bit array"; PrintIntegralArray(arr->data, num_elements, indent_chars, os); break; } From f53c2e2813b95ff1b418648191b2cf10fff17595 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Tue, 24 Nov 2020 14:24:46 -0800 Subject: [PATCH 59/60] remove fls, which isn't widely available --- src/target/source/codegen_params.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/target/source/codegen_params.cc b/src/target/source/codegen_params.cc index bd29722e312e..cc7695abfd25 100644 --- a/src/target/source/codegen_params.cc +++ b/src/target/source/codegen_params.cc @@ -24,7 +24,6 @@ #include "codegen_params.h" #include -#include #include #include @@ -44,7 +43,13 @@ static int ComputeNumElementsPerRow(int one_element_size_bytes, int indent_chars // When multiple elements fit per line, divide the available space by the size of one element, // and return the largest power of 2 less than the result. Using power-of-2-sized elements allows // for easily traversing the generated code. - return 1 << (fls((kMaxLineLength - indent_chars) / one_element_size_bytes) - 1); + int elements_per_row = (kMaxLineLength - indent_chars) / one_element_size_bytes; + + // Implementation of fls. Iteratively clear the LSB until one bit remains. + while ((elements_per_row & (elements_per_row - 1)) > 0) { + elements_per_row &= elements_per_row - 1; + } + return elements_per_row; } template ::value>> From 754cf350c660a767e3663ca2911129dfaba27759 Mon Sep 17 00:00:00 2001 From: Andrew Reusch Date: Wed, 25 Nov 2020 08:29:09 -0800 Subject: [PATCH 60/60] address tqchen comments --- src/runtime/graph/graph_runtime.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/graph/graph_runtime.cc b/src/runtime/graph/graph_runtime.cc index 0033a1d5d8d2..26b66be72bd4 100644 --- a/src/runtime/graph/graph_runtime.cc +++ b/src/runtime/graph/graph_runtime.cc @@ -253,7 +253,7 @@ void GraphRuntime::ShareParams(const GraphRuntime& other, dmlc::Stream* strm) { void GraphRuntime::LinkedNDArrayDeleter(Object* container) { // container is the NDArray::Container which needs to get deleted. // The data member points to global const memory, so it does not need deleting. - delete reinterpret_cast(container); + delete static_cast(container); } void GraphRuntime::DefaultLookupLinkedParam(TVMArgs args, TVMRetValue* rv) {