diff --git a/python/gen_requirements.py b/python/gen_requirements.py index 1a55dccd1130..55f2cad8411d 100644 --- a/python/gen_requirements.py +++ b/python/gen_requirements.py @@ -71,6 +71,7 @@ "psutil", "scipy", "tornado", + "typing_extensions", ], ), ), @@ -276,6 +277,7 @@ ("torch", None), ("torchvision", None), ("tornado", None), + ("typing_extensions", None), ("xgboost", ">=1.1.0"), # From PR #4953 & Issue #12009 ] diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc index 3c3d931df5d9..d72e198fec0d 100644 --- a/src/runtime/graph_executor/graph_executor.cc +++ b/src/runtime/graph_executor/graph_executor.cc @@ -475,8 +475,13 @@ void GraphExecutor::SetupStorage() { // is mapped to this pool. data_entry_.resize(num_node_entries()); data_alignment_.resize(num_node_entries()); + // sid_to_eid has a size of storage_id's size, which is the size of storage_pool_. + sid_to_eid_.resize(storage_pool_.size()); for (size_t i = 0; i < data_entry_.size(); ++i) { int storage_id = attrs_.storage_id[i]; + // Update "storage_id -> entry_id" pair. + sid_to_eid_[storage_id].push_back(i); + ICHECK_LT(static_cast(storage_id), storage_pool_.size()); data_entry_[i] = storage_pool_[storage_id].CreateView(attrs_.shape[i], vtype[i]); @@ -504,14 +509,14 @@ void GraphExecutor::SetupOpExecs() { for (uint32_t nid = 0; nid < this->GetNumOfNodes(); ++nid) { const auto& inode = nodes_[nid]; if (inode.op_type == "null") continue; - std::vector args; + std::vector args; for (const auto& e : inode.inputs) { uint32_t eid = this->entry_id(e); - args.push_back(*(data_entry_[eid].operator->())); + args.push_back(const_cast(data_entry_[eid].operator->())); } for (uint32_t index = 0; index < inode.param.num_outputs; ++index) { uint32_t eid = this->entry_id(nid, index); - args.push_back(*(data_entry_[eid].operator->())); + args.push_back(const_cast(data_entry_[eid].operator->())); } ICHECK(inode.op_type == "tvm_op") << "Can only take tvm_op as op"; @@ -524,6 +529,16 @@ void GraphExecutor::SetupOpExecs() { if (input_node_eids.count(input_eid) > 0) { input_dltensors_[input_eid].push_back( static_cast(op_args->arg_values[i].v_handle)); + + // Data entry who has the same storage_id should also be pushed into "input_dltensors" and + // being able to be updated by "SetInputZeroCopy()". This is to handle the situation that a + // "relay.reshape" follows immediately after input and input dltensor and reshape's output + // dltensor point to the same data_entry. + auto storage_id = attrs_.storage_id[input_eid]; + for (auto eid : sid_to_eid_[storage_id]) { + input_dltensors_[input_eid].push_back( + const_cast(data_entry_[eid].operator->())); + } } // check if any model output is the input of the op if (output_node_eids.count(input_eid) > 0) { @@ -544,7 +559,7 @@ void GraphExecutor::SetupOpExecs() { } std::pair, std::shared_ptr> GraphExecutor::CreateTVMOp( - const TVMOpParam& param, const std::vector& args) { + const TVMOpParam& param, const std::vector& args) { std::shared_ptr arg_ptr = std::make_shared(); // setup address. arg_ptr->args = args; @@ -553,7 +568,7 @@ std::pair, std::shared_ptr> GraphEx } for (size_t i = 0; i < arg_ptr->args.size(); ++i) { TVMValue v; - DLTensor* t = &arg_ptr->args[i]; + DLTensor* t = arg_ptr->args[i]; v.v_handle = t; arg_ptr->arg_values.push_back(v); arg_ptr->arg_tcodes.push_back(kTVMDLTensorHandle); diff --git a/src/runtime/graph_executor/graph_executor.h b/src/runtime/graph_executor/graph_executor.h index 0a7086c9f125..c93f35976cc0 100644 --- a/src/runtime/graph_executor/graph_executor.h +++ b/src/runtime/graph_executor/graph_executor.h @@ -66,7 +66,7 @@ struct TVMOpParam { */ class TVM_DLL GraphExecutor : public ModuleNode { struct OpArgs { - std::vector args; + std::vector args; std::vector arg_values; std::vector arg_tcodes; std::vector shape_data; @@ -437,7 +437,7 @@ class TVM_DLL GraphExecutor : public ModuleNode { * \return The created executor. */ std::pair, std::shared_ptr> CreateTVMOp( - const TVMOpParam& attrs, const std::vector& args); + const TVMOpParam& attrs, const std::vector& args); // Get node entry index. uint32_t entry_id(uint32_t nid, uint32_t index) const { return node_row_ptr_[nid] + index; } // Get node entry index. @@ -460,6 +460,8 @@ class TVM_DLL GraphExecutor : public ModuleNode { std::vector> output_dltensors_; /*! \brief Used for quick node(both model output and op input) DLTensor* lookup given an eid. */ std::vector> both_output_opinput_dltensors_; + /*! \brief Used for quick entry_id lookup given an storage_id. */ + std::vector> sid_to_eid_; /*! \brief Used for quick entry indexing. */ std::vector node_row_ptr_; /*! \brief Output entries. */ diff --git a/tests/python/unittest/test_set_input_zero_copy.py b/tests/python/unittest/test_set_input_zero_copy.py new file mode 100644 index 000000000000..3effbaed152f --- /dev/null +++ b/tests/python/unittest/test_set_input_zero_copy.py @@ -0,0 +1,137 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=missing-function-docstring,missing-module-docstring +import tvm +from tvm import relay +import numpy as np +from tvm.contrib import graph_executor +from tvm import testing +import numpy as np +import pytest + + +dev = tvm.cpu(0) +target = tvm.target.Target("llvm") + + +def build_relay_module(func): + mod = tvm.IRModule() + mod["main"] = func + lib = relay.build(mod, target=target) + + return graph_executor.GraphModule(lib["default"](dev)) + + +@testing.requires_llvm +def test_simple_graph(): + # Simple relay func: + # 1. y = x + 1 + # 2. return y + shape = (2, 2) + x = relay.var("x", shape=shape, dtype="float32") + y = relay.add(x, relay.ones(shape, dtype="float32")) + func = relay.Function([x], y) + + # Build 2 exactly same relay modules. + mod = build_relay_module(func) + mod_zero_copy = build_relay_module(func) + x_np = np.random.uniform(size=shape).astype(np.float32) + + # Use set_input() + x_nd = tvm.nd.array(x_np, device=dev) + mod.set_input("x", x_nd) + mod.run() + + # Use set_input_zero_copy() + x_nd_zero_copy = tvm.nd.array(x_np, device=dev) + index = mod_zero_copy.get_input_index("x") + mod_zero_copy.module["set_input_zero_copy"](index, x_nd_zero_copy) + mod_zero_copy.run() + + # Expect get same output "x". + testing.assert_allclose(mod.get_output(0).numpy(), mod_zero_copy.get_output(0).numpy()) + + +@testing.requires_llvm +def test_input_in_output(): + # Relay func that input is also in output: + # 1. y = x + 1 + # 2. return [x, y] + shape = (3, 4) + x = relay.var("x", shape=shape, dtype="float32") + y = relay.add(x, relay.ones(shape, dtype="float32")) + func = relay.Function([x], relay.expr.Tuple([x, y])) + + # Build 2 exactly same relay modules. + mod = build_relay_module(func) + mod_zero_copy = build_relay_module(func) + + x_np = np.random.uniform(size=shape).astype(np.float32) + + # Use set_input() + x_nd = tvm.nd.array(x_np, device=dev) + mod.set_input("x", x_nd) + mod.run() + + # Use set_input_zero_copy() + x_nd_zero_copy = tvm.nd.array(x_np, device=dev) + index = mod_zero_copy.get_input_index("x") + mod_zero_copy.module["set_input_zero_copy"](index, x_nd_zero_copy) + mod_zero_copy.run() + + # Expect get same output "x". + testing.assert_allclose(mod.get_output(0).numpy(), mod_zero_copy.get_output(0).numpy()) + + +@testing.requires_llvm +def test_reshape_after_input(): + # Relay func that a reshape op follows immediately after input: + # 1. y = x + 1 + # 2. return [x, y] + shape = (3, 4) + x = relay.var("x", shape=shape, dtype="float32") + y = relay.reshape(x, (1, 12)) + z = relay.add(y, relay.ones((1, 12), dtype="float32")) + func = relay.Function([x], relay.expr.Tuple([x, y, z])) + + # Build 2 exactly same relay modules. + mod = build_relay_module(func) + mod_zero_copy = build_relay_module(func) + + x_np = np.random.uniform(size=shape).astype(np.float32) + + # Use set_input() + x_nd = tvm.nd.array(x_np, device=dev) + mod.set_input("x", x_nd) + mod.run() + + # Use set_input_zero_copy() + x_nd_zero_copy = tvm.nd.array(x_np, device=dev) + index = mod_zero_copy.get_input_index("x") + mod_zero_copy.module["set_input_zero_copy"](index, x_nd_zero_copy) + mod_zero_copy.run() + + # Expect get same output "x". + testing.assert_allclose(mod.get_output(0).numpy(), mod_zero_copy.get_output(0).numpy()) + # Expect get same output "y". + testing.assert_allclose(mod.get_output(1).numpy(), mod_zero_copy.get_output(1).numpy()) + + +if __name__ == "__main__": + test_simple_graph() + test_input_in_output() + test_reshape_after_input()