From 1dae13b0a0d9827d04610b226bfdeb1bf07d02e5 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Fri, 22 Apr 2022 12:18:02 +0300 Subject: [PATCH 01/21] high-level design for invoke_with_output method was implemented --- include/tvm/runtime/vm/vm.h | 10 ++++++++++ python/tvm/runtime/vm.py | 19 +++++++++++++++++++ src/runtime/vm/vm.cc | 8 ++++++++ 3 files changed, 37 insertions(+) diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index 5a72a99fa635..216361cdd1a1 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -281,6 +281,13 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { */ void SetOneInput(std::string name, const TVMArgValue& tag, const TVMArgValue& tensor); + /*! + * \brief Set pre-allocated outputs to a function. + * \param name The function name + * \param args outputs to the function. + */ + void SetOutputs(std::string name, TVMArgs args); + /*! * \brief Internal hook for profiling the start of an op. * @@ -356,6 +363,9 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { ObjectPtr exec_; /*! \brief The function name to inputs mapping. */ std::unordered_map> inputs_; + bool set_outputs_enabled_ = false; + /*! \brief The function name to pre-allocated outputs mapping. */ + std::unordered_map> outputs_; /*! * \brief The "physical" devices the VM can execute primitives on. All "device indexes" * are w.r.t. this vector. Each entry in this vector must match the corresponding entry diff --git a/python/tvm/runtime/vm.py b/python/tvm/runtime/vm.py index 615f66fdcc1c..3415f277f786 100644 --- a/python/tvm/runtime/vm.py +++ b/python/tvm/runtime/vm.py @@ -399,6 +399,7 @@ def __init__(self, exe, device, memory_cfg=None): self._get_input_index = self.module["get_input_index"] self._set_input = self.module["set_input"] self._set_one_input = self.module["set_one_input"] + self._set_outputs = self.module["set_outputs"] self._setup_device(device, memory_cfg) def _setup_device(self, dev, memory_cfg): @@ -560,6 +561,24 @@ def invoke_stateful(self, func_name, *args, **kwargs): self.set_input(func_name, *args, **kwargs) self._invoke_stateful(func_name) + def invoke_with_outputs(self, func_name, *args): + """Invoke a function with pre-allocated outputs tensors. + It requires use set_input method before. + + This invoke method allows to avoid excess copying if memory for output tensors + was allocated before inference. + + Parameters + ---------- + func_name : str + The name of the function. + + args : list[tvm.runtime.NDArray] or list[DLTensor] + The output tensors of the function. + """ + self._set_outputs(func_name, *args) + self._invoke(func_name) + def get_outputs(self): """Get the outputs from a call to :py:func`invoke_stateful`. diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 6f52f4b83c81..56f24369225a 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -224,6 +224,9 @@ PackedFunc VirtualMachine::GetFunction(const std::string& name, << "(func_name, index or name, tensor)"; SetOneInput(args[0], args[1], args[2]); }); + } else if (name == "set_outputs") { + return PackedFunc( + [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { SetOutputs(args[0], args); }); } else if (name == "load_late_bound_consts") { return PackedFunc([this](TVMArgs args, TVMRetValue* rv) { CHECK_EQ(args.size(), 1); @@ -272,6 +275,10 @@ void VirtualMachine::SetOneInput(std::string func_name, const TVMArgValue& tag, SetInputTensorWithIndex(inputs_[func_name], tensor, inp_index, dev); } +void VirtualMachine::SetOutputs(std::string name, TVMArgs args) { + set_outputs_enabled_ = true; +} + int64_t VirtualMachine::GetInputIndexFromVMFunction(const std::string& func_name, const std::string& input_name) const { const auto& vm_func = CheckAndGetVMFunction(func_name); @@ -765,6 +772,7 @@ void VirtualMachine::RunLoop() { auto caller_return_register = frames_.back().caller_return_register; if (PopFrame() == frame_start) { + set_outputs_enabled_ = false; return; // Otherwise we are just returning from a local call. } else { From 3795e046f9cb0543f86e83463e696a2063f2d03d Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Fri, 22 Apr 2022 14:50:03 +0300 Subject: [PATCH 02/21] GetResultRegisterIndex was implemented --- include/tvm/runtime/vm/vm.h | 6 ++++++ src/runtime/vm/vm.cc | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index 216361cdd1a1..30f4d8058bba 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -346,6 +346,12 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { void SetInputTensorWithIndex(std::vector& tensors, // NOLINT(*) const TVMArgValue& tensor, int index, Device dev); + /*! + * \brief Get index of outputs in register_file from frame + * \return index + */ + Index GetResultRegisterIndex(); + protected: /*! \brief The virtual machine's packed function table. */ std::vector packed_funcs_; diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 56f24369225a..f048c0a497cb 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -525,6 +525,15 @@ int64_t VirtualMachine::LoadScalarInt(Index r) const { return result; } +Index VirtualMachine::GetResultRegisterIndex() { + Index op_index = 0; + while (code_[op_index].op != Opcode::Ret) { + ++op_index; + } + + return code_[op_index].result; +} + void VirtualMachine::RunLoop() { ICHECK(this->exec_); ICHECK(this->code_); From db4fe130b4481f0a882392f0d87b9a0197bab3fd Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Thu, 12 May 2022 19:01:38 +0300 Subject: [PATCH 03/21] SetOutputs method was implemented --- src/runtime/vm/vm.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index f048c0a497cb..7d27c9d95c28 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -277,6 +277,22 @@ void VirtualMachine::SetOneInput(std::string func_name, const TVMArgValue& tag, void VirtualMachine::SetOutputs(std::string name, TVMArgs args) { set_outputs_enabled_ = true; + std::vector external_output_arrays; + for (int i = 0; i < args.size(); ++i) { + TVMArgValue output_tensor = args[i]; + if (output_tensor.type_code() == kTVMDLTensorHandle) { + DLTensor* dl_tensor = output_tensor; + external_output_arrays.emplace_back(NDArray::FromExternalDLTensor(*dl_tensor)); + } else if (output_tensor.type_code() == kTVMNDArrayHandle) { + // TODO(vvchernov): emplace_back? + external_output_arrays.push_back(output_tensor.AsObjectRef()); + } else { + LOG(FATAL) << "Output tensors of not DLTensor or NDArray type are not supported now!"; + } + } + // TODO(vvchernov): I'm not sure we need any tag here. Nevertheless it is required + auto output_set = ADT(0, external_output_arrays); + WriteRegister(GetResultRegisterIndex(), output_set); } int64_t VirtualMachine::GetInputIndexFromVMFunction(const std::string& func_name, From c598286bb742796b36d6533afa9227e1e0731f77 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Mon, 16 May 2022 10:37:25 +0300 Subject: [PATCH 04/21] update writting to register for AllocTensor op --- include/tvm/runtime/vm/vm.h | 15 +++++++++++++ src/runtime/vm/vm.cc | 44 +++++++++++++++++++++++++------------ 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index 30f4d8058bba..edd31b7a8372 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -352,6 +352,21 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { */ Index GetResultRegisterIndex(); + /*! + * \brief Write new allocated tensor to register_file of frame + * \param instr current instruction containing shape and storage info + */ + void WriteAllocatedTensor(const Instruction& instr); + + /*! + * \brief 'set_outputs_enabled' is assumed true for using this method. + * It is expected that result register has already contained tensor from outside, + * new tensor is not allocated and write, but expected shape is checked. + * For other register WriteAllocatedMethod is used. + * \param instr current instruction containing shape and storage info + */ + void WriteAllocatedTensorFromOutside(const Instruction& instr); + protected: /*! \brief The virtual machine's packed function table. */ std::vector packed_funcs_; diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 7d27c9d95c28..469c88fa310a 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -698,21 +698,11 @@ void VirtualMachine::RunLoop() { } case Opcode::AllocTensor: { OpStartHook(instr); - auto shape = std::vector(instr.alloc_tensor.ndim); - - for (uint32_t i = 0; i < instr.alloc_tensor.ndim; ++i) { - shape[i] = instr.alloc_tensor.shape[i]; + if (set_outputs_enabled_) { + WriteAllocatedTensorFromOutside(instr); + } else { + WriteAllocatedTensor(instr); } - - auto storage_obj = ReadRegister(instr.alloc_tensor.storage); - auto offset = LoadScalarInt(instr.alloc_tensor.offset); - auto storage = Downcast(storage_obj); - auto obj = storage->AllocNDArray(offset, shape, instr.alloc_tensor.dtype); - VLOG(2) << "allocated " - << RuntimeObject2String(obj, GetDevice(exec_->host_device_index), - /*show_contents=*/false); - - WriteRegister(instr.dst, obj); OpStopHook(); pc_++; goto main_loop; @@ -858,6 +848,32 @@ void VirtualMachine::RunLoop() { } } +void VirtualMachine::WriteAllocatedTensor(const Instruction& instr) { + auto shape = std::vector(instr.alloc_tensor.ndim); + + for (uint32_t i = 0; i < instr.alloc_tensor.ndim; ++i) { + shape[i] = instr.alloc_tensor.shape[i]; + } + + auto storage_obj = ReadRegister(instr.alloc_tensor.storage); + auto offset = LoadScalarInt(instr.alloc_tensor.offset); + auto storage = Downcast(storage_obj); + auto obj = storage->AllocNDArray(offset, shape, instr.alloc_tensor.dtype); + VLOG(2) << "allocated " + << RuntimeObject2String(obj, GetDevice(exec_->host_device_index), + /*show_contents=*/false); + + WriteRegister(instr.dst, obj); +} + +void VirtualMachine::WriteAllocatedTensorFromOutside(const Instruction& instr) { + if (instr.dst == GetResultRegisterIndex()) { + // TODO(vvchernov): check shape + } else { + WriteAllocatedTensor(instr); + } +} + runtime::Module CreateVirtualMachine(Executable* exec) { auto vm = make_object(); vm->LoadExecutable(GetObjectPtr(exec)); From d9cbaf51ba4b9cedc6bbd52d91e832ee29553a0f Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Tue, 17 May 2022 19:27:11 +0300 Subject: [PATCH 05/21] update SetOutputs based on number of outputs. Take into account different funcs through func name --- include/tvm/runtime/vm/vm.h | 37 ++++++++++++--- src/runtime/vm/vm.cc | 89 ++++++++++++++++++++++++++++--------- 2 files changed, 98 insertions(+), 28 deletions(-) diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index edd31b7a8372..31731c8b78b4 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -226,6 +226,17 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { */ ObjectRef Invoke(const std::string& name, const std::vector& args); + /*! + * \brief Invoke a VM function. + * \param func The function. + * \param input_args The input arguments to the function. + * \param output_args The pre-allocated output arguments of the function. + * \return The object(s) representing the result. + */ + ObjectRef Invoke(const VMFunction& func, + const std::vector& input_args, + const std::vector& output_args); + /*! * \brief Invoke a PackedFunction * @@ -249,7 +260,7 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { const std::vector& alloc_types); /*! \brief Run VM dispatch loop. */ - void RunLoop(); + void RunLoop(bool set_output_enabled = false); /*! \brief Get device from the device list based on a given device index. */ Device GetDevice(Index device_index) const; @@ -288,6 +299,12 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { */ void SetOutputs(std::string name, TVMArgs args); + /*! + * \brief Set pre-allocated outputs to register for specified function. + * \param outputs set of output tensors. + */ + void SetOutputTensorsToRegister(const std::vector& outputs); + /*! * \brief Internal hook for profiling the start of an op. * @@ -347,10 +364,18 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { const TVMArgValue& tensor, int index, Device dev); /*! - * \brief Get index of outputs in register_file from frame - * \return index + * \brief Convert tensor from TVMArgValue to ObjectRef. + * DLTensor and NDArray types are supported. + * \param tensor given arg value containing tensor. + * \return tensor in ObjectRef format + */ + ObjectRef TensorFromTVMArgValueToObjectRef(const TVMArgValue& tensor) const; + + /*! + * \brief Get index of outputs in register_file from func code + * \return result register index */ - Index GetResultRegisterIndex(); + Index GetResultRegisterIndex() const; /*! * \brief Write new allocated tensor to register_file of frame @@ -365,7 +390,7 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { * For other register WriteAllocatedMethod is used. * \param instr current instruction containing shape and storage info */ - void WriteAllocatedTensorFromOutside(const Instruction& instr); + void WriteAllocatedTensorFromOutside(const Instruction& instr, Index res_index); protected: /*! \brief The virtual machine's packed function table. */ @@ -384,7 +409,7 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { ObjectPtr exec_; /*! \brief The function name to inputs mapping. */ std::unordered_map> inputs_; - bool set_outputs_enabled_ = false; + std::unordered_map set_outputs_enabled_; /*! \brief The function name to pre-allocated outputs mapping. */ std::unordered_map> outputs_; /*! diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 469c88fa310a..228d7840bdfc 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -143,8 +143,14 @@ PackedFunc VirtualMachine::GetFunction(const std::string& name, } else { auto it = inputs_.find(func_name); ICHECK(it != inputs_.end()) << "Input has not been set for function " << func_name; - const std::vector& func_args = it->second; - *rv = Invoke(func, func_args); + const std::vector& input_args = it->second; + if (set_outputs_enabled_.count(func_name) && set_outputs_enabled_[func_name]) { + ICHECK(outputs_.count(func_name)) << "Outputs have not been set for function " << func_name; + *rv = Invoke(func, input_args, outputs_[func_name]); + set_outputs_enabled_[func_name] = false; + } else { + *rv = Invoke(func, input_args); + } } }); } else if (name == "invoke_stateful") { @@ -275,24 +281,47 @@ void VirtualMachine::SetOneInput(std::string func_name, const TVMArgValue& tag, SetInputTensorWithIndex(inputs_[func_name], tensor, inp_index, dev); } -void VirtualMachine::SetOutputs(std::string name, TVMArgs args) { - set_outputs_enabled_ = true; - std::vector external_output_arrays; - for (int i = 0; i < args.size(); ++i) { - TVMArgValue output_tensor = args[i]; +void VirtualMachine::SetOutputs(std::string func_name, TVMArgs args) { + set_outputs_enabled_[func_name] = true; + size_t outputs_size = args.size(); + // First args is func_name + ICHECK_GT(outputs_size, 1) + << "There is no output arguments set"; + + std::vector func_args(outputs_size - 1); + for (size_t i = 1; i < outputs_size; ++i) { + // TODO(vvchernov): device? + // TODO(vvchernov): correct index sequence for multiple outputs? + func_args[i-1] = TensorFromTVMArgValueToObjectRef(args[i]); + } + outputs_.erase(func_name); + outputs_.emplace(func_name, func_args); +} + +void VirtualMachine::SetOutputTensorsToRegister(const std::vector& outputs) { + size_t size = outputs.size(); + + Index res_ind = GetResultRegisterIndex(); + if (size == 1) { + WriteRegister(res_ind, outputs[0]); + } else { + // TODO(vvchernov): I'm not sure we need any tag here. Nevertheless it is required + auto output_set = ADT(0, outputs); + WriteRegister(res_ind, output_set); + } +} + +ObjectRef VirtualMachine::TensorFromTVMArgValueToObjectRef(const TVMArgValue& output_tensor) const { if (output_tensor.type_code() == kTVMDLTensorHandle) { DLTensor* dl_tensor = output_tensor; - external_output_arrays.emplace_back(NDArray::FromExternalDLTensor(*dl_tensor)); + return NDArray::FromExternalDLTensor(*dl_tensor); } else if (output_tensor.type_code() == kTVMNDArrayHandle) { - // TODO(vvchernov): emplace_back? - external_output_arrays.push_back(output_tensor.AsObjectRef()); + return output_tensor.AsObjectRef(); } else { - LOG(FATAL) << "Output tensors of not DLTensor or NDArray type are not supported now!"; + LOG(FATAL) << "It supports tensor of DLTensor or NDArray type only! Given type is " + << output_tensor.type_code(); } - } - // TODO(vvchernov): I'm not sure we need any tag here. Nevertheless it is required - auto output_set = ADT(0, external_output_arrays); - WriteRegister(GetResultRegisterIndex(), output_set); + return ObjectRef(); } int64_t VirtualMachine::GetInputIndexFromVMFunction(const std::string& func_name, @@ -403,6 +432,22 @@ ObjectRef VirtualMachine::Invoke(const std::string& name, const std::vectorfunctions[func_index], args); } +ObjectRef VirtualMachine::Invoke(const VMFunction& func, + const std::vector& input_args, + const std::vector& output_args) { + DLOG(INFO) << "Executing Function: " << std::endl << func; + for (int i = 0; i < static_cast(devices_.size()); ++i) { + DLOG(INFO) << "Device " << i << " has device type " << devices_[i].device_type + << " and device id " << devices_[i].device_id + << (i == exec_->host_device_index ? " (using as host device)" : ""); + } + + InvokeGlobal(func, input_args); + SetOutputTensorsToRegister(output_args); + RunLoop(set_outputs_enabled_[func.name]); + return return_register_; +} + void VirtualMachine::InvokePacked(Index packed_index, const PackedFunc& func, Index arg_count, Index output_size, const std::vector& args) { size_t arity = 0; @@ -541,7 +586,7 @@ int64_t VirtualMachine::LoadScalarInt(Index r) const { return result; } -Index VirtualMachine::GetResultRegisterIndex() { +Index VirtualMachine::GetResultRegisterIndex() const { Index op_index = 0; while (code_[op_index].op != Opcode::Ret) { ++op_index; @@ -550,11 +595,12 @@ Index VirtualMachine::GetResultRegisterIndex() { return code_[op_index].result; } -void VirtualMachine::RunLoop() { +void VirtualMachine::RunLoop(bool set_output_enabled) { ICHECK(this->exec_); ICHECK(this->code_); pc_ = 0; Index frame_start = frames_.size(); + Index res_reg_index = GetResultRegisterIndex(); while (true) { main_loop: auto const& instr = code_[this->pc_]; @@ -698,8 +744,8 @@ void VirtualMachine::RunLoop() { } case Opcode::AllocTensor: { OpStartHook(instr); - if (set_outputs_enabled_) { - WriteAllocatedTensorFromOutside(instr); + if (set_output_enabled) { + WriteAllocatedTensorFromOutside(instr, res_reg_index); } else { WriteAllocatedTensor(instr); } @@ -787,7 +833,6 @@ void VirtualMachine::RunLoop() { auto caller_return_register = frames_.back().caller_return_register; if (PopFrame() == frame_start) { - set_outputs_enabled_ = false; return; // Otherwise we are just returning from a local call. } else { @@ -866,8 +911,8 @@ void VirtualMachine::WriteAllocatedTensor(const Instruction& instr) { WriteRegister(instr.dst, obj); } -void VirtualMachine::WriteAllocatedTensorFromOutside(const Instruction& instr) { - if (instr.dst == GetResultRegisterIndex()) { +void VirtualMachine::WriteAllocatedTensorFromOutside(const Instruction& instr, Index res_index) { + if (instr.dst == res_index) { // TODO(vvchernov): check shape } else { WriteAllocatedTensor(instr); From 82d1bb46e5582d56af31ff0bdbcafcded90326e0 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Wed, 18 May 2022 16:13:46 +0300 Subject: [PATCH 06/21] clean duplicated code in Invoke methods --- include/tvm/runtime/vm/vm.h | 7 +++++++ src/runtime/vm/vm.cc | 30 ++++++++++++++---------------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index 31731c8b78b4..0b04c9ec974b 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -299,6 +299,13 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { */ void SetOutputs(std::string name, TVMArgs args); + /*! + * \brief Preparation part of Invoke method before RunLoop. + * \param func the function. + * \param args input args + */ + void PrintInfoAndSetInputArgs(const VMFunction& func, const std::vector& args); + /*! * \brief Set pre-allocated outputs to register for specified function. * \param outputs set of output tensors. diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 228d7840bdfc..e50c44f6665a 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -298,6 +298,18 @@ void VirtualMachine::SetOutputs(std::string func_name, TVMArgs args) { outputs_.emplace(func_name, func_args); } + +void VirtualMachine::PrintInfoAndSetInputArgs(const VMFunction& func, const std::vector& args) { + VLOG(2) << "Executing Function: " << std::endl << func; + for (int i = 0; i < static_cast(devices_.size()); ++i) { + VLOG(2) << "Device " << i << " has device type " << devices_[i].device_type + << " and device id " << devices_[i].device_id + << (i == exec_->host_device_index ? " (using as host device)" : ""); + } + + InvokeGlobal(func, args); +} + void VirtualMachine::SetOutputTensorsToRegister(const std::vector& outputs) { size_t size = outputs.size(); @@ -411,14 +423,7 @@ void VirtualMachine::InvokeGlobal(const VMFunction& func, const std::vector& args) { - VLOG(2) << "Executing Function: " << std::endl << func; - for (int i = 0; i < static_cast(devices_.size()); ++i) { - VLOG(2) << "Device " << i << " has device type " << devices_[i].device_type << " and device id " - << devices_[i].device_id - << (i == exec_->host_device_index ? " (using as host device)" : ""); - } - - InvokeGlobal(func, args); + PrintInfoAndSetInputArgs(func, args); RunLoop(); return return_register_; } @@ -435,14 +440,7 @@ ObjectRef VirtualMachine::Invoke(const std::string& name, const std::vector& input_args, const std::vector& output_args) { - DLOG(INFO) << "Executing Function: " << std::endl << func; - for (int i = 0; i < static_cast(devices_.size()); ++i) { - DLOG(INFO) << "Device " << i << " has device type " << devices_[i].device_type - << " and device id " << devices_[i].device_id - << (i == exec_->host_device_index ? " (using as host device)" : ""); - } - - InvokeGlobal(func, input_args); + PrintInfoAndSetInputArgs(func, input_args); SetOutputTensorsToRegister(output_args); RunLoop(set_outputs_enabled_[func.name]); return return_register_; From 830d2d33a0dbfb72537c832dc5a19e3a52064917 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Wed, 18 May 2022 16:35:57 +0300 Subject: [PATCH 07/21] support multiple outputs --- include/tvm/runtime/vm/vm.h | 23 +++++++++++++++++----- src/runtime/vm/vm.cc | 38 +++++++++++++++++++++++++++++-------- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index 0b04c9ec974b..da4ff274a939 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -308,9 +308,10 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { /*! * \brief Set pre-allocated outputs to register for specified function. + * \param func_name The function's name. * \param outputs set of output tensors. */ - void SetOutputTensorsToRegister(const std::vector& outputs); + void SetOutputTensorsToRegister(const std::string& func_name, const std::vector& outputs); /*! * \brief Internal hook for profiling the start of an op. @@ -385,8 +386,16 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { Index GetResultRegisterIndex() const; /*! - * \brief Write new allocated tensor to register_file of frame - * \param instr current instruction containing shape and storage info + * \brief Collect indices from register_file for output tensors. + * It helps to replace output tensors allocated in RunLoop by + * tensors pre-allocated outside. Scenario is when `set_output` is used + * \param func_name The function's name. + */ + void CollectOutputTensorRegIndices(const std::string& func_name); + + /*! + * \brief Write new allocated tensor to register_file of frame. + * \param instr current instruction containing shape and storage info. */ void WriteAllocatedTensor(const Instruction& instr); @@ -394,8 +403,9 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { * \brief 'set_outputs_enabled' is assumed true for using this method. * It is expected that result register has already contained tensor from outside, * new tensor is not allocated and write, but expected shape is checked. - * For other register WriteAllocatedMethod is used. - * \param instr current instruction containing shape and storage info + * For other register WriteAllocatedTensor method is used. + * \param instr current instruction containing shape and storage info. + * \param res_index register index of result. */ void WriteAllocatedTensorFromOutside(const Instruction& instr, Index res_index); @@ -416,7 +426,10 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { ObjectPtr exec_; /*! \brief The function name to inputs mapping. */ std::unordered_map> inputs_; + /*! \brief The function name to flag enabling scenario with set outputs. */ std::unordered_map set_outputs_enabled_; + /*! \brief The function name to indices of output tensors in register file. */ + std::unordered_map> output_tensor_reg_indices_; /*! \brief The function name to pre-allocated outputs mapping. */ std::unordered_map> outputs_; /*! diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index e50c44f6665a..167a7e24bbdb 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -310,16 +310,16 @@ void VirtualMachine::PrintInfoAndSetInputArgs(const VMFunction& func, const std: InvokeGlobal(func, args); } -void VirtualMachine::SetOutputTensorsToRegister(const std::vector& outputs) { +void VirtualMachine::SetOutputTensorsToRegister(const std::string& func_name, const std::vector& outputs) { size_t size = outputs.size(); Index res_ind = GetResultRegisterIndex(); - if (size == 1) { - WriteRegister(res_ind, outputs[0]); - } else { - // TODO(vvchernov): I'm not sure we need any tag here. Nevertheless it is required - auto output_set = ADT(0, outputs); - WriteRegister(res_ind, output_set); + CollectOutputTensorRegIndices(func_name); + auto& reg_indices = output_tensor_reg_indices_[func_name]; + ICHECK_EQ(reg_indices.size(), size) + << "Number of outside output tensors should be equal to model outputs number"; + for (size_t i = 0; i < size; ++i) { + WriteRegister(reg_indices[i], outputs[i]); } } @@ -441,7 +441,7 @@ ObjectRef VirtualMachine::Invoke(const VMFunction& func, const std::vector& input_args, const std::vector& output_args) { PrintInfoAndSetInputArgs(func, input_args); - SetOutputTensorsToRegister(output_args); + SetOutputTensorsToRegister(func.name, output_args); RunLoop(set_outputs_enabled_[func.name]); return return_register_; } @@ -593,6 +593,28 @@ Index VirtualMachine::GetResultRegisterIndex() const { return code_[op_index].result; } +void VirtualMachine::CollectOutputTensorRegIndices(const std::string& func_name) { + if (!output_tensor_reg_indices_[func_name].empty()) { + return; + } + + auto& reg_indices = output_tensor_reg_indices_[func_name]; + Index res_index = GetResultRegisterIndex(); + Index op_index = 0; + while (code_[op_index].dst != res_index) { + ++op_index; + } + if (code_[op_index].op == Opcode::AllocTensor) { + reg_indices.emplace_back(res_index); + } else if (code_[op_index].op == Opcode::AllocADT) { + for (Index i = 0; i < code_[op_index].num_fields; ++i) { + reg_indices.push_back(code_[op_index].datatype_fields[i]); + } + } else { + // TODO(vvchernov): possible extension + } +} + void VirtualMachine::RunLoop(bool set_output_enabled) { ICHECK(this->exec_); ICHECK(this->code_); From 31fc06961bd0566be3b17aa1b562265871d0a26d Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Wed, 18 May 2022 20:19:15 +0300 Subject: [PATCH 08/21] lint fix --- include/tvm/runtime/vm/vm.h | 6 +++--- src/runtime/vm/vm.cc | 43 ++++++++++++++++++------------------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index da4ff274a939..d2dc9b03324f 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -233,8 +233,7 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { * \param output_args The pre-allocated output arguments of the function. * \return The object(s) representing the result. */ - ObjectRef Invoke(const VMFunction& func, - const std::vector& input_args, + ObjectRef Invoke(const VMFunction& func, const std::vector& input_args, const std::vector& output_args); /*! @@ -311,7 +310,8 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { * \param func_name The function's name. * \param outputs set of output tensors. */ - void SetOutputTensorsToRegister(const std::string& func_name, const std::vector& outputs); + void SetOutputTensorsToRegister(const std::string& func_name, + const std::vector& outputs); /*! * \brief Internal hook for profiling the start of an op. diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 167a7e24bbdb..4dcadd59c213 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -145,7 +145,8 @@ PackedFunc VirtualMachine::GetFunction(const std::string& name, ICHECK(it != inputs_.end()) << "Input has not been set for function " << func_name; const std::vector& input_args = it->second; if (set_outputs_enabled_.count(func_name) && set_outputs_enabled_[func_name]) { - ICHECK(outputs_.count(func_name)) << "Outputs have not been set for function " << func_name; + ICHECK(outputs_.count(func_name)) + << "Outputs have not been set for function " << func_name; *rv = Invoke(func, input_args, outputs_[func_name]); set_outputs_enabled_[func_name] = false; } else { @@ -285,35 +286,34 @@ void VirtualMachine::SetOutputs(std::string func_name, TVMArgs args) { set_outputs_enabled_[func_name] = true; size_t outputs_size = args.size(); // First args is func_name - ICHECK_GT(outputs_size, 1) - << "There is no output arguments set"; + ICHECK_GT(outputs_size, 1) << "There is no output arguments set"; std::vector func_args(outputs_size - 1); for (size_t i = 1; i < outputs_size; ++i) { // TODO(vvchernov): device? // TODO(vvchernov): correct index sequence for multiple outputs? - func_args[i-1] = TensorFromTVMArgValueToObjectRef(args[i]); + func_args[i - 1] = TensorFromTVMArgValueToObjectRef(args[i]); } outputs_.erase(func_name); outputs_.emplace(func_name, func_args); } - -void VirtualMachine::PrintInfoAndSetInputArgs(const VMFunction& func, const std::vector& args) { +void VirtualMachine::PrintInfoAndSetInputArgs(const VMFunction& func, + const std::vector& args) { VLOG(2) << "Executing Function: " << std::endl << func; for (int i = 0; i < static_cast(devices_.size()); ++i) { - VLOG(2) << "Device " << i << " has device type " << devices_[i].device_type - << " and device id " << devices_[i].device_id + VLOG(2) << "Device " << i << " has device type " << devices_[i].device_type << " and device id " + << devices_[i].device_id << (i == exec_->host_device_index ? " (using as host device)" : ""); } InvokeGlobal(func, args); } -void VirtualMachine::SetOutputTensorsToRegister(const std::string& func_name, const std::vector& outputs) { +void VirtualMachine::SetOutputTensorsToRegister(const std::string& func_name, + const std::vector& outputs) { size_t size = outputs.size(); - Index res_ind = GetResultRegisterIndex(); CollectOutputTensorRegIndices(func_name); auto& reg_indices = output_tensor_reg_indices_[func_name]; ICHECK_EQ(reg_indices.size(), size) @@ -324,16 +324,16 @@ void VirtualMachine::SetOutputTensorsToRegister(const std::string& func_name, co } ObjectRef VirtualMachine::TensorFromTVMArgValueToObjectRef(const TVMArgValue& output_tensor) const { - if (output_tensor.type_code() == kTVMDLTensorHandle) { - DLTensor* dl_tensor = output_tensor; - return NDArray::FromExternalDLTensor(*dl_tensor); - } else if (output_tensor.type_code() == kTVMNDArrayHandle) { - return output_tensor.AsObjectRef(); - } else { - LOG(FATAL) << "It supports tensor of DLTensor or NDArray type only! Given type is " - << output_tensor.type_code(); - } - return ObjectRef(); + if (output_tensor.type_code() == kTVMDLTensorHandle) { + DLTensor* dl_tensor = output_tensor; + return NDArray::FromExternalDLTensor(*dl_tensor); + } else if (output_tensor.type_code() == kTVMNDArrayHandle) { + return output_tensor.AsObjectRef(); + } else { + LOG(FATAL) << "It supports tensor of DLTensor or NDArray type only! Given type is " + << output_tensor.type_code(); + } + return ObjectRef(); } int64_t VirtualMachine::GetInputIndexFromVMFunction(const std::string& func_name, @@ -437,8 +437,7 @@ ObjectRef VirtualMachine::Invoke(const std::string& name, const std::vectorfunctions[func_index], args); } -ObjectRef VirtualMachine::Invoke(const VMFunction& func, - const std::vector& input_args, +ObjectRef VirtualMachine::Invoke(const VMFunction& func, const std::vector& input_args, const std::vector& output_args) { PrintInfoAndSetInputArgs(func, input_args); SetOutputTensorsToRegister(func.name, output_args); From 156202f54fbf73fa4b13912781fd5e0e52b2a80f Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Thu, 18 Aug 2022 11:49:17 +0300 Subject: [PATCH 09/21] update for support multi output network --- include/tvm/runtime/vm/vm.h | 9 ++++++--- src/runtime/vm/vm.cc | 27 +++++++++++++++++---------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index d2dc9b03324f..8f6ea8619b3e 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -259,7 +259,7 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { const std::vector& alloc_types); /*! \brief Run VM dispatch loop. */ - void RunLoop(bool set_output_enabled = false); + void RunLoop(const std::vector& output_tensor_reg_indices = {}); /*! \brief Get device from the device list based on a given device index. */ Device GetDevice(Index device_index) const; @@ -405,9 +405,12 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { * new tensor is not allocated and write, but expected shape is checked. * For other register WriteAllocatedTensor method is used. * \param instr current instruction containing shape and storage info. - * \param res_index register index of result. + * \param output_tensor_reg_indices register indices of output tensors. */ - void WriteAllocatedTensorFromOutside(const Instruction& instr, Index res_index); + void WriteAllocatedTensorFromOutside(const Instruction& instr, + const std::vector& output_tensor_reg_indices); + + bool FindIndex(const std::vector& indices, Index val) const; protected: /*! \brief The virtual machine's packed function table. */ diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 4dcadd59c213..c810d00942a6 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -318,8 +318,9 @@ void VirtualMachine::SetOutputTensorsToRegister(const std::string& func_name, auto& reg_indices = output_tensor_reg_indices_[func_name]; ICHECK_EQ(reg_indices.size(), size) << "Number of outside output tensors should be equal to model outputs number"; - for (size_t i = 0; i < size; ++i) { - WriteRegister(reg_indices[i], outputs[i]); + size_t i = 0; + for (auto it = reg_indices.begin(); it != reg_indices.end(); ++it, ++i) { + WriteRegister(*it, outputs[i]); } } @@ -441,7 +442,7 @@ ObjectRef VirtualMachine::Invoke(const VMFunction& func, const std::vector& output_args) { PrintInfoAndSetInputArgs(func, input_args); SetOutputTensorsToRegister(func.name, output_args); - RunLoop(set_outputs_enabled_[func.name]); + RunLoop(output_tensor_reg_indices_[func.name]); return return_register_; } @@ -614,12 +615,11 @@ void VirtualMachine::CollectOutputTensorRegIndices(const std::string& func_name) } } -void VirtualMachine::RunLoop(bool set_output_enabled) { +void VirtualMachine::RunLoop(const std::vector& output_tensor_reg_indices) { ICHECK(this->exec_); ICHECK(this->code_); pc_ = 0; Index frame_start = frames_.size(); - Index res_reg_index = GetResultRegisterIndex(); while (true) { main_loop: auto const& instr = code_[this->pc_]; @@ -763,10 +763,10 @@ void VirtualMachine::RunLoop(bool set_output_enabled) { } case Opcode::AllocTensor: { OpStartHook(instr); - if (set_output_enabled) { - WriteAllocatedTensorFromOutside(instr, res_reg_index); - } else { + if (output_tensor_reg_indices.empty()) { WriteAllocatedTensor(instr); + } else { + WriteAllocatedTensorFromOutside(instr, output_tensor_reg_indices); } OpStopHook(); pc_++; @@ -930,14 +930,21 @@ void VirtualMachine::WriteAllocatedTensor(const Instruction& instr) { WriteRegister(instr.dst, obj); } -void VirtualMachine::WriteAllocatedTensorFromOutside(const Instruction& instr, Index res_index) { - if (instr.dst == res_index) { +void VirtualMachine::WriteAllocatedTensorFromOutside( + const Instruction& instr, const std::vector& output_tensor_reg_indices) { + if (FindIndex(output_tensor_reg_indices, instr.dst)) { // TODO(vvchernov): check shape } else { + LOG(WARNING) << "Writting of allocated tensor from outside fails. Usual approach is used"; WriteAllocatedTensor(instr); } } +bool VirtualMachine::FindIndex(const std::vector& indices, Index val) const { + auto it = std::find(indices.begin(), indices.end(), val); + return it != indices.end(); +} + runtime::Module CreateVirtualMachine(Executable* exec) { auto vm = make_object(); vm->LoadExecutable(GetObjectPtr(exec)); From 605eeafaa9276931130aada7eb6013c1257c2de9 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Fri, 19 Aug 2022 08:30:04 +0300 Subject: [PATCH 10/21] extend set_output method for ReshapeTensor Op in VM --- include/tvm/runtime/vm/vm.h | 8 ++++ src/runtime/vm/vm.cc | 76 ++++++++++++++++++++++++++++--------- 2 files changed, 67 insertions(+), 17 deletions(-) diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index 8f6ea8619b3e..b07dce900987 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -385,6 +385,12 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { */ Index GetResultRegisterIndex() const; + /*! + * \brief Calculate the index of operation which destination is result + * \param res_index is the index of op returning result + */ + void CalculatePreResultOpIndex(Index res_index); + /*! * \brief Collect indices from register_file for output tensors. * It helps to replace output tensors allocated in RunLoop by @@ -431,6 +437,8 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { std::unordered_map> inputs_; /*! \brief The function name to flag enabling scenario with set outputs. */ std::unordered_map set_outputs_enabled_; + /*! \brief The index of operation which destination is result. */ + Index preresult_op_index_ = -1; /*! \brief The function name to indices of output tensors in register file. */ std::unordered_map> output_tensor_reg_indices_; /*! \brief The function name to pre-allocated outputs mapping. */ diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index c810d00942a6..541e6d2ddfae 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -593,6 +593,15 @@ Index VirtualMachine::GetResultRegisterIndex() const { return code_[op_index].result; } +void VirtualMachine::CalculatePreResultOpIndex(Index res_index) { + if (preresult_op_index_ == -1) { + preresult_op_index_ = 0; + while (code_[preresult_op_index_].dst != res_index) { + ++preresult_op_index_; + } + } +} + void VirtualMachine::CollectOutputTensorRegIndices(const std::string& func_name) { if (!output_tensor_reg_indices_[func_name].empty()) { return; @@ -600,18 +609,19 @@ void VirtualMachine::CollectOutputTensorRegIndices(const std::string& func_name) auto& reg_indices = output_tensor_reg_indices_[func_name]; Index res_index = GetResultRegisterIndex(); - Index op_index = 0; - while (code_[op_index].dst != res_index) { - ++op_index; - } - if (code_[op_index].op == Opcode::AllocTensor) { + CalculatePreResultOpIndex(res_index); + auto& preres_instr = code_[preresult_op_index_]; + auto op_code = preres_instr.op; + if (op_code == Opcode::AllocTensor) { reg_indices.emplace_back(res_index); - } else if (code_[op_index].op == Opcode::AllocADT) { - for (Index i = 0; i < code_[op_index].num_fields; ++i) { - reg_indices.push_back(code_[op_index].datatype_fields[i]); + } else if (op_code == Opcode::AllocADT) { + for (Index i = 0; i < preres_instr.num_fields; ++i) { + reg_indices.push_back(preres_instr.datatype_fields[i]); } + } else if (op_code == Opcode::ReshapeTensor) { + reg_indices.push_back(preres_instr.reshape_tensor.tensor); } else { - // TODO(vvchernov): possible extension + LOG(WARNING) << "Operation " << size_t(op_code) << " is not supported for set_outputs method"; } } @@ -763,10 +773,10 @@ void VirtualMachine::RunLoop(const std::vector& output_tensor_reg_indices } case Opcode::AllocTensor: { OpStartHook(instr); - if (output_tensor_reg_indices.empty()) { - WriteAllocatedTensor(instr); - } else { + if (!output_tensor_reg_indices.empty() && FindIndex(output_tensor_reg_indices, instr.dst)) { WriteAllocatedTensorFromOutside(instr, output_tensor_reg_indices); + } else { + WriteAllocatedTensor(instr); } OpStopHook(); pc_++; @@ -932,11 +942,43 @@ void VirtualMachine::WriteAllocatedTensor(const Instruction& instr) { void VirtualMachine::WriteAllocatedTensorFromOutside( const Instruction& instr, const std::vector& output_tensor_reg_indices) { - if (FindIndex(output_tensor_reg_indices, instr.dst)) { - // TODO(vvchernov): check shape - } else { - LOG(WARNING) << "Writting of allocated tensor from outside fails. Usual approach is used"; - WriteAllocatedTensor(instr); + for (auto res_index : output_tensor_reg_indices) { + auto arr = Downcast(ReadRegister(res_index)); + auto shape = arr.Shape(); + size_t size = shape.size(); + bool size_check = false; + if (size != instr.alloc_tensor.ndim) { + size_check = true; + } else { + for (size_t i = 0; i < size; ++i) { + if (shape[i] != instr.alloc_tensor.shape[i]) { + size_check = true; + break; + } + } + } + + if (size_check) { + // Match element number + size_t in_el_num = 1, ex_el_num = 1; + for (size_t i = 0; i < size; ++i) { + in_el_num *= shape[i]; + } + for (size_t i = 0; i < instr.alloc_tensor.ndim; ++i) { + ex_el_num *= instr.alloc_tensor.shape[i]; + } + ICHECK_EQ(in_el_num, ex_el_num) + << "Element number mismatching of internal and external output tensors"; + if (code_[preresult_op_index_].op == Opcode::ReshapeTensor) { + int64_t* dims = instr.alloc_tensor.shape; + int64_t ndim = instr.alloc_tensor.ndim; + std::vector ref_shape(dims, dims + ndim); + auto reshaped_tensor = arr.CreateView(ref_shape, arr->dtype); + WriteRegister(res_index, reshaped_tensor); + } else { + LOG_ERROR << "Internal and external output tensor shapes are mismatched"; + } + } } } From dec50224b863d89c6d0aa11974ba7e71dea6bf63 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Mon, 29 Aug 2022 17:21:58 +0300 Subject: [PATCH 11/21] small fix. code cleaning --- src/runtime/vm/vm.cc | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 541e6d2ddfae..504aada7d83a 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -943,15 +943,23 @@ void VirtualMachine::WriteAllocatedTensor(const Instruction& instr) { void VirtualMachine::WriteAllocatedTensorFromOutside( const Instruction& instr, const std::vector& output_tensor_reg_indices) { for (auto res_index : output_tensor_reg_indices) { - auto arr = Downcast(ReadRegister(res_index)); - auto shape = arr.Shape(); - size_t size = shape.size(); + // External tensor(s) has been already written to the register + auto ex_arr = Downcast(ReadRegister(res_index)); + auto ex_shape = ex_arr.Shape(); + auto ex_size = ex_shape.size(); + auto ex_dtype = ex_arr->dtype; + + auto in_size = instr.alloc_tensor.ndim; + auto in_dtype = instr.alloc_tensor.dtype; + ICHECK_EQ(TypeEqual(in_dtype, ex_dtype), true) + << "Data types mismatching for internal and external output tensors"; + bool size_check = false; - if (size != instr.alloc_tensor.ndim) { + if (ex_size != in_size) { size_check = true; } else { - for (size_t i = 0; i < size; ++i) { - if (shape[i] != instr.alloc_tensor.shape[i]) { + for (size_t i = 0; i < in_size; ++i) { + if (ex_shape[i] != instr.alloc_tensor.shape[i]) { size_check = true; break; } @@ -961,19 +969,18 @@ void VirtualMachine::WriteAllocatedTensorFromOutside( if (size_check) { // Match element number size_t in_el_num = 1, ex_el_num = 1; - for (size_t i = 0; i < size; ++i) { - in_el_num *= shape[i]; + for (size_t i = 0; i < ex_size; ++i) { + ex_el_num *= ex_shape[i]; } - for (size_t i = 0; i < instr.alloc_tensor.ndim; ++i) { - ex_el_num *= instr.alloc_tensor.shape[i]; + for (size_t i = 0; i < in_size; ++i) { + in_el_num *= instr.alloc_tensor.shape[i]; } ICHECK_EQ(in_el_num, ex_el_num) << "Element number mismatching of internal and external output tensors"; if (code_[preresult_op_index_].op == Opcode::ReshapeTensor) { int64_t* dims = instr.alloc_tensor.shape; - int64_t ndim = instr.alloc_tensor.ndim; - std::vector ref_shape(dims, dims + ndim); - auto reshaped_tensor = arr.CreateView(ref_shape, arr->dtype); + std::vector ref_shape(dims, dims + int64_t(in_size)); + auto reshaped_tensor = ex_arr.CreateView(ref_shape, ex_dtype); WriteRegister(res_index, reshaped_tensor); } else { LOG_ERROR << "Internal and external output tensor shapes are mismatched"; From 6318515885d86455a705d82dbc743e91173e0a94 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Mon, 29 Aug 2022 18:43:34 +0300 Subject: [PATCH 12/21] fix excess passing during shape and data type check for multiple outputs networks --- include/tvm/runtime/vm/vm.h | 6 +-- src/runtime/vm/vm.cc | 79 ++++++++++++++++++------------------- 2 files changed, 40 insertions(+), 45 deletions(-) diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index b07dce900987..8d585f6704a6 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -408,13 +408,11 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { /*! * \brief 'set_outputs_enabled' is assumed true for using this method. * It is expected that result register has already contained tensor from outside, - * new tensor is not allocated and write, but expected shape is checked. + * new memory is not allocated and write, but expected shape and data type are checked. * For other register WriteAllocatedTensor method is used. * \param instr current instruction containing shape and storage info. - * \param output_tensor_reg_indices register indices of output tensors. */ - void WriteAllocatedTensorFromOutside(const Instruction& instr, - const std::vector& output_tensor_reg_indices); + void WriteAllocatedTensorFromOutside(const Instruction& instr); bool FindIndex(const std::vector& indices, Index val) const; diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 504aada7d83a..80a566415634 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -774,7 +774,7 @@ void VirtualMachine::RunLoop(const std::vector& output_tensor_reg_indices case Opcode::AllocTensor: { OpStartHook(instr); if (!output_tensor_reg_indices.empty() && FindIndex(output_tensor_reg_indices, instr.dst)) { - WriteAllocatedTensorFromOutside(instr, output_tensor_reg_indices); + WriteAllocatedTensorFromOutside(instr); } else { WriteAllocatedTensor(instr); } @@ -940,51 +940,48 @@ void VirtualMachine::WriteAllocatedTensor(const Instruction& instr) { WriteRegister(instr.dst, obj); } -void VirtualMachine::WriteAllocatedTensorFromOutside( - const Instruction& instr, const std::vector& output_tensor_reg_indices) { - for (auto res_index : output_tensor_reg_indices) { - // External tensor(s) has been already written to the register - auto ex_arr = Downcast(ReadRegister(res_index)); - auto ex_shape = ex_arr.Shape(); - auto ex_size = ex_shape.size(); - auto ex_dtype = ex_arr->dtype; +void VirtualMachine::WriteAllocatedTensorFromOutside(const Instruction& instr) { + // External tensor(s) has been already written to the register (instr.dst) + auto ex_arr = Downcast(ReadRegister(instr.dst)); + auto ex_shape = ex_arr.Shape(); + auto ex_size = ex_shape.size(); + auto ex_dtype = ex_arr->dtype; - auto in_size = instr.alloc_tensor.ndim; - auto in_dtype = instr.alloc_tensor.dtype; - ICHECK_EQ(TypeEqual(in_dtype, ex_dtype), true) - << "Data types mismatching for internal and external output tensors"; + auto in_size = instr.alloc_tensor.ndim; + auto in_dtype = instr.alloc_tensor.dtype; + ICHECK_EQ(TypeEqual(in_dtype, ex_dtype), true) + << "Data types mismatching for internal and external output tensors"; - bool size_check = false; - if (ex_size != in_size) { - size_check = true; - } else { - for (size_t i = 0; i < in_size; ++i) { - if (ex_shape[i] != instr.alloc_tensor.shape[i]) { - size_check = true; - break; - } + bool size_check = false; + if (ex_size != in_size) { + size_check = true; + } else { + for (size_t i = 0; i < in_size; ++i) { + if (ex_shape[i] != instr.alloc_tensor.shape[i]) { + size_check = true; + break; } } + } - if (size_check) { - // Match element number - size_t in_el_num = 1, ex_el_num = 1; - for (size_t i = 0; i < ex_size; ++i) { - ex_el_num *= ex_shape[i]; - } - for (size_t i = 0; i < in_size; ++i) { - in_el_num *= instr.alloc_tensor.shape[i]; - } - ICHECK_EQ(in_el_num, ex_el_num) - << "Element number mismatching of internal and external output tensors"; - if (code_[preresult_op_index_].op == Opcode::ReshapeTensor) { - int64_t* dims = instr.alloc_tensor.shape; - std::vector ref_shape(dims, dims + int64_t(in_size)); - auto reshaped_tensor = ex_arr.CreateView(ref_shape, ex_dtype); - WriteRegister(res_index, reshaped_tensor); - } else { - LOG_ERROR << "Internal and external output tensor shapes are mismatched"; - } + if (size_check) { + // Match element number + size_t in_el_num = 1, ex_el_num = 1; + for (size_t i = 0; i < ex_size; ++i) { + ex_el_num *= ex_shape[i]; + } + for (size_t i = 0; i < in_size; ++i) { + in_el_num *= instr.alloc_tensor.shape[i]; + } + ICHECK_EQ(in_el_num, ex_el_num) + << "Element number mismatching of internal and external output tensors"; + if (code_[preresult_op_index_].op == Opcode::ReshapeTensor) { + int64_t* dims = instr.alloc_tensor.shape; + std::vector ref_shape(dims, dims + int64_t(in_size)); + auto reshaped_tensor = ex_arr.CreateView(ref_shape, ex_dtype); + WriteRegister(instr.dst, reshaped_tensor); + } else { + LOG_ERROR << "Internal and external output tensor shapes are mismatched"; } } } From ef3e0963fcf1f3aa8494bc74f159e73585493a37 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Fri, 9 Sep 2022 12:04:27 +0300 Subject: [PATCH 13/21] update fatal error logs --- src/runtime/vm/vm.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 80a566415634..1fe9e74d4cd1 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -621,7 +621,7 @@ void VirtualMachine::CollectOutputTensorRegIndices(const std::string& func_name) } else if (op_code == Opcode::ReshapeTensor) { reg_indices.push_back(preres_instr.reshape_tensor.tensor); } else { - LOG(WARNING) << "Operation " << size_t(op_code) << " is not supported for set_outputs method"; + LOG(FATAL) << "Operation " << size_t(op_code) << " is not supported for set_outputs method"; } } @@ -981,7 +981,7 @@ void VirtualMachine::WriteAllocatedTensorFromOutside(const Instruction& instr) { auto reshaped_tensor = ex_arr.CreateView(ref_shape, ex_dtype); WriteRegister(instr.dst, reshaped_tensor); } else { - LOG_ERROR << "Internal and external output tensor shapes are mismatched"; + LOG(FATAL) << "Internal and external output tensor shapes are mismatched"; } } } From d8df1d54663b7b2b5476fdd361461402b59efa04 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Fri, 9 Sep 2022 12:32:00 +0300 Subject: [PATCH 14/21] clean CollectOutputTensorRegIndices method --- include/tvm/runtime/vm/vm.h | 6 +++--- src/runtime/vm/vm.cc | 13 ++++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index 8d585f6704a6..f50794f0657d 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -392,12 +392,12 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { void CalculatePreResultOpIndex(Index res_index); /*! - * \brief Collect indices from register_file for output tensors. + * \brief Get indices from register_file for output tensors. * It helps to replace output tensors allocated in RunLoop by * tensors pre-allocated outside. Scenario is when `set_output` is used - * \param func_name The function's name. + * \return indices from register_file for output tensors. */ - void CollectOutputTensorRegIndices(const std::string& func_name); + std::vector GetOutputTensorRegIndices(); /*! * \brief Write new allocated tensor to register_file of frame. diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 1fe9e74d4cd1..57c77c730fe3 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -314,7 +314,9 @@ void VirtualMachine::SetOutputTensorsToRegister(const std::string& func_name, const std::vector& outputs) { size_t size = outputs.size(); - CollectOutputTensorRegIndices(func_name); + if (output_tensor_reg_indices_[func_name].empty()) { + output_tensor_reg_indices_[func_name] = GetOutputTensorRegIndices(); + } auto& reg_indices = output_tensor_reg_indices_[func_name]; ICHECK_EQ(reg_indices.size(), size) << "Number of outside output tensors should be equal to model outputs number"; @@ -602,12 +604,8 @@ void VirtualMachine::CalculatePreResultOpIndex(Index res_index) { } } -void VirtualMachine::CollectOutputTensorRegIndices(const std::string& func_name) { - if (!output_tensor_reg_indices_[func_name].empty()) { - return; - } - - auto& reg_indices = output_tensor_reg_indices_[func_name]; +std::vector VirtualMachine::GetOutputTensorRegIndices() { + std::vector reg_indices; Index res_index = GetResultRegisterIndex(); CalculatePreResultOpIndex(res_index); auto& preres_instr = code_[preresult_op_index_]; @@ -623,6 +621,7 @@ void VirtualMachine::CollectOutputTensorRegIndices(const std::string& func_name) } else { LOG(FATAL) << "Operation " << size_t(op_code) << " is not supported for set_outputs method"; } + return reg_indices; } void VirtualMachine::RunLoop(const std::vector& output_tensor_reg_indices) { From 94635528074880059dc26f0dc7776477ad3404f5 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Fri, 9 Sep 2022 17:33:41 +0300 Subject: [PATCH 15/21] extend description --- include/tvm/runtime/vm/vm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index f50794f0657d..ecb7223ce77e 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -293,6 +293,8 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { /*! * \brief Set pre-allocated outputs to a function. + * It is native implementation of 'set_outputs' python method. + * It is used in scenario when output tensors are allocated outside. * \param name The function name * \param args outputs to the function. */ From 1493e59c5578504ebae8ebac9b3b0809c4a39267 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Mon, 12 Sep 2022 21:00:24 +0300 Subject: [PATCH 16/21] clear outputs_ after invoke --- src/runtime/vm/vm.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 57c77c730fe3..976a1cde0b89 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -148,6 +148,7 @@ PackedFunc VirtualMachine::GetFunction(const std::string& name, ICHECK(outputs_.count(func_name)) << "Outputs have not been set for function " << func_name; *rv = Invoke(func, input_args, outputs_[func_name]); + outputs_[func_name].clear(); set_outputs_enabled_[func_name] = false; } else { *rv = Invoke(func, input_args); From 2dc148a578f9ebba69e7e8da9c09a2143fa2b556 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Mon, 12 Sep 2022 21:21:27 +0300 Subject: [PATCH 17/21] update invoke_with_outputs by input args --- python/tvm/runtime/vm.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/python/tvm/runtime/vm.py b/python/tvm/runtime/vm.py index 3415f277f786..25dbfde37040 100644 --- a/python/tvm/runtime/vm.py +++ b/python/tvm/runtime/vm.py @@ -561,9 +561,9 @@ def invoke_stateful(self, func_name, *args, **kwargs): self.set_input(func_name, *args, **kwargs) self._invoke_stateful(func_name) - def invoke_with_outputs(self, func_name, *args): + def invoke_with_outputs(self, func_name, input_args, output_args): """Invoke a function with pre-allocated outputs tensors. - It requires use set_input method before. + input_args can be None if set_input method was used before. This invoke method allows to avoid excess copying if memory for output tensors was allocated before inference. @@ -573,10 +573,31 @@ def invoke_with_outputs(self, func_name, *args): func_name : str The name of the function. - args : list[tvm.runtime.NDArray] or list[DLTensor] + input_args: dict of str to tvm.runtime.NDArray or np.ndarray + Named arguments to the function. + + output_args : list[tvm.runtime.NDArray] or list[DLTensor] The output tensors of the function. """ - self._set_outputs(func_name, *args) + if input_args: + func_params = self._exec.get_function_params(func_name) + new_args = [None] * len(func_params) + cnt = 0 + for k in input_args: + if k in func_params: + idx = func_params.index(k) + new_args[idx] = input_args[k] + cnt += 1 + assert len(args) + cnt == len(func_params) + idx = 0 + for i, arg in enumerate(new_args): + if arg is None: + new_args[i] = args[idx] + idx += 1 + args = new_args + cargs = convert(args) + self._set_input(func_name, *cargs) + self._set_outputs(func_name, *output_args) self._invoke(func_name) def get_outputs(self): From 4c117d3ca4a2c094023ad274ffaf955fbec40499 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Thu, 29 Sep 2022 10:40:53 +0300 Subject: [PATCH 18/21] small fix in invoke_with_outputs method of VM. rpc test for this method was implemented --- python/tvm/runtime/vm.py | 10 ++---- tests/python/relay/test_vm.py | 66 +++++++++++++++++++++++++++++++---- 2 files changed, 61 insertions(+), 15 deletions(-) diff --git a/python/tvm/runtime/vm.py b/python/tvm/runtime/vm.py index 25dbfde37040..65f971f261e8 100644 --- a/python/tvm/runtime/vm.py +++ b/python/tvm/runtime/vm.py @@ -588,14 +588,8 @@ def invoke_with_outputs(self, func_name, input_args, output_args): idx = func_params.index(k) new_args[idx] = input_args[k] cnt += 1 - assert len(args) + cnt == len(func_params) - idx = 0 - for i, arg in enumerate(new_args): - if arg is None: - new_args[i] = args[idx] - idx += 1 - args = new_args - cargs = convert(args) + assert cnt == len(func_params) + cargs = convert(new_args) self._set_input(func_name, *cargs) self._set_outputs(func_name, *output_args) self._invoke(func_name) diff --git a/tests/python/relay/test_vm.py b/tests/python/relay/test_vm.py index 0b62db85c904..dbc10d02124a 100644 --- a/tests/python/relay/test_vm.py +++ b/tests/python/relay/test_vm.py @@ -846,26 +846,38 @@ def relay_ext_test(func): assert "shape_func" in opt_mod.astext(False) -def test_vm_rpc(): +def prepare_vm_model(path, tensor_shape): """ - This test checks to make sure you can export a VMExecutable, - upload it to a remote machine using RPC and then execute it - on the other machine. + Virtual Machine is compiled for simple topology and + exported as library to given path """ target = tvm.target.Target("llvm --host=llvm") # Build a IRModule. - x = relay.var("x", shape=(10, 1)) + x = relay.var("x", shape=tensor_shape) f = relay.Function([x], x + x) mod = IRModule.from_expr(f) # Compile to VMExecutable. vm_exec = vm.compile(mod, target=target) + # Export to Disk + vm_exec.mod.export_library(path) + + +def test_vm_rpc(): + """ + This test checks to make sure you can export a VMExecutable, + upload it to a remote machine using RPC and then execute it + on the other machine. + """ + # Shape for input and output tensors + shape = (10,1) + # Export to Disk temp = utils.tempdir() path = temp.relpath("vm_library.so") - vm_exec.mod.export_library(path) + prepare_vm_model(path, shape) # Use local rpc server for testing. # Server must use popen so it doesn't inherit the current process state. It @@ -881,7 +893,7 @@ def check_remote(server): device = remote.cpu() # Build a VM out of the executable and context. vm_factory = runtime.vm.VirtualMachine(rexec, device) - np_input = np.random.uniform(size=(10, 1)).astype("float32") + np_input = np.random.uniform(size=shape).astype("float32") input_tensor = tvm.nd.array(np_input, device) # Invoke its "main" function. out = vm_factory.invoke("main", input_tensor) @@ -891,6 +903,46 @@ def check_remote(server): check_remote(rpc.Server("127.0.0.1")) +def test_vm_invoke_with_outputs_rpc(): + """ + This test checks to make sure you can export a VMExecutable, + upload it to a remote machine using RPC and then execute it + on the other machine with preallocated outputs. + """ + # Shape for input and output tensors + shape = (3,2) + + # Export to Disk + temp = utils.tempdir() + path = temp.relpath("vm_library.so") + prepare_vm_model(path, shape) + + # Use local rpc server for testing. + # Server must use popen so it doesn't inherit the current process state. It + # will crash otherwise. + def check_remote_invoke_with_outputs(server): + remote = rpc.connect(server.host, server.port, session_timeout=10) + + # Upload the serialized Executable. + remote.upload(path) + # Get a handle to remote Executable. + rexec = remote.load_module("vm_library.so") + + device = remote.cpu() + # Build a VM out of the executable and context. + vm_factory = runtime.vm.VirtualMachine(rexec, device) + np_input = np.random.uniform(size=shape).astype("float32") + input_tensor = tvm.nd.array(np_input, device) + np_output = np.empty(shape, dtype="float32") + output_tensor = tvm.nd.array(np_output, device) + # Invoke its "main" function. + vm_factory.invoke_with_outputs("main", input_args={"x": input_tensor}, output_args=[output_tensor]) + # Check the result. + np.testing.assert_allclose(output_tensor.numpy(), np_input + np_input) + + check_remote_invoke_with_outputs(rpc.Server("127.0.0.1")) + + def test_get_output_single(): target = tvm.target.Target("llvm") From 2a9d1b378e97169b0d2f33db7cebf9a3b8186897 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Thu, 29 Sep 2022 11:07:00 +0300 Subject: [PATCH 19/21] local test of invoke_with_outputs of VM was implemented --- tests/python/relay/test_vm.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/python/relay/test_vm.py b/tests/python/relay/test_vm.py index dbc10d02124a..ce700c971ac4 100644 --- a/tests/python/relay/test_vm.py +++ b/tests/python/relay/test_vm.py @@ -943,6 +943,28 @@ def check_remote_invoke_with_outputs(server): check_remote_invoke_with_outputs(rpc.Server("127.0.0.1")) +def test_vm_invoke_with_outputs(): + target = tvm.target.Target("llvm") + shape=(3, 2) + + # Build a IRModule. + x = relay.var("x", shape=shape) + f = relay.Function([x], x + x) + mod = IRModule.from_expr(f) + + # Compile to VMExecutable. + vm_exec = vm.compile(mod, target=target) + vm_factory = runtime.vm.VirtualMachine(vm_exec, tvm.cpu()) + np_input = np.random.uniform(size=shape).astype("float32") + input_tensor = tvm.nd.array(np_input) + np_output = np.empty(shape, dtype="float32") + output_tensor = tvm.nd.array(np_output) + # Invoke + vm_factory.invoke_with_outputs("main", input_args={"x": input_tensor}, output_args=[output_tensor]) + # Check the result. + np.testing.assert_allclose(output_tensor.numpy(), np_input + np_input) + + def test_get_output_single(): target = tvm.target.Target("llvm") From 6a34710719934f5f6fc891d9a7b1edcef5214185 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Thu, 29 Sep 2022 11:25:40 +0300 Subject: [PATCH 20/21] update description for set_outputs scenario --- include/tvm/runtime/vm/vm.h | 6 ++++-- python/tvm/runtime/vm.py | 4 +++- src/runtime/vm/vm.cc | 1 - 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/include/tvm/runtime/vm/vm.h b/include/tvm/runtime/vm/vm.h index ecb7223ce77e..6fa91832a731 100644 --- a/include/tvm/runtime/vm/vm.h +++ b/include/tvm/runtime/vm/vm.h @@ -292,9 +292,11 @@ class TVM_DLL VirtualMachine : public runtime::ModuleNode { void SetOneInput(std::string name, const TVMArgValue& tag, const TVMArgValue& tensor); /*! - * \brief Set pre-allocated outputs to a function. + * \brief Set pre-allocated output tensors to a function. * It is native implementation of 'set_outputs' python method. - * It is used in scenario when output tensors are allocated outside. + * It is used in scenario when output tensors are allocated outside each invocation. + * Note: it sets set_outputs_enabled_[name] true and fill outputs_[name] + * but after invocation the first is switched off and the second is cleared * \param name The function name * \param args outputs to the function. */ diff --git a/python/tvm/runtime/vm.py b/python/tvm/runtime/vm.py index 65f971f261e8..20778c40fd51 100644 --- a/python/tvm/runtime/vm.py +++ b/python/tvm/runtime/vm.py @@ -562,7 +562,9 @@ def invoke_stateful(self, func_name, *args, **kwargs): self._invoke_stateful(func_name) def invoke_with_outputs(self, func_name, input_args, output_args): - """Invoke a function with pre-allocated outputs tensors. + # TODO(vvchernov): consider scenario then output tensors set once + """Invoke a function with pre-allocated output tensors. + The output tensors should be set every invocation. input_args can be None if set_input method was used before. This invoke method allows to avoid excess copying if memory for output tensors diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index 976a1cde0b89..aaf4675733a8 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -292,7 +292,6 @@ void VirtualMachine::SetOutputs(std::string func_name, TVMArgs args) { std::vector func_args(outputs_size - 1); for (size_t i = 1; i < outputs_size; ++i) { // TODO(vvchernov): device? - // TODO(vvchernov): correct index sequence for multiple outputs? func_args[i - 1] = TensorFromTVMArgValueToObjectRef(args[i]); } outputs_.erase(func_name); From c335fdb414203bf7e2c4b13e2674ecadf1a37318 Mon Sep 17 00:00:00 2001 From: Valery Chernov Date: Thu, 29 Sep 2022 12:19:52 +0300 Subject: [PATCH 21/21] lint fixes --- tests/python/relay/test_vm.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/python/relay/test_vm.py b/tests/python/relay/test_vm.py index ce700c971ac4..45e305c9a195 100644 --- a/tests/python/relay/test_vm.py +++ b/tests/python/relay/test_vm.py @@ -872,7 +872,7 @@ def test_vm_rpc(): on the other machine. """ # Shape for input and output tensors - shape = (10,1) + shape = (10, 1) # Export to Disk temp = utils.tempdir() @@ -910,7 +910,7 @@ def test_vm_invoke_with_outputs_rpc(): on the other machine with preallocated outputs. """ # Shape for input and output tensors - shape = (3,2) + shape = (3, 2) # Export to Disk temp = utils.tempdir() @@ -936,7 +936,9 @@ def check_remote_invoke_with_outputs(server): np_output = np.empty(shape, dtype="float32") output_tensor = tvm.nd.array(np_output, device) # Invoke its "main" function. - vm_factory.invoke_with_outputs("main", input_args={"x": input_tensor}, output_args=[output_tensor]) + vm_factory.invoke_with_outputs( + "main", input_args={"x": input_tensor}, output_args=[output_tensor] + ) # Check the result. np.testing.assert_allclose(output_tensor.numpy(), np_input + np_input) @@ -945,7 +947,7 @@ def check_remote_invoke_with_outputs(server): def test_vm_invoke_with_outputs(): target = tvm.target.Target("llvm") - shape=(3, 2) + shape = (3, 2) # Build a IRModule. x = relay.var("x", shape=shape) @@ -960,7 +962,9 @@ def test_vm_invoke_with_outputs(): np_output = np.empty(shape, dtype="float32") output_tensor = tvm.nd.array(np_output) # Invoke - vm_factory.invoke_with_outputs("main", input_args={"x": input_tensor}, output_args=[output_tensor]) + vm_factory.invoke_with_outputs( + "main", input_args={"x": input_tensor}, output_args=[output_tensor] + ) # Check the result. np.testing.assert_allclose(output_tensor.numpy(), np_input + np_input)