From a436c433bcc134be9089299f42281c26aa3fe90b Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Wed, 21 Oct 2020 16:42:01 +0800 Subject: [PATCH 01/19] sync vta-hw --- 3rdparty/vta-hw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/vta-hw b/3rdparty/vta-hw index 87ce9acfae55..db65157208ec 160000 --- a/3rdparty/vta-hw +++ b/3rdparty/vta-hw @@ -1 +1 @@ -Subproject commit 87ce9acfae550d1a487746e9d06c2e250076e54c +Subproject commit db65157208ec8fabb7b548c94596211b9db04190 From a4abee12b80c98672c4e93e84358c6cfb8d0eb8a Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Wed, 2 Dec 2020 10:51:49 +0800 Subject: [PATCH 02/19] sync for vta-hw --- 3rdparty/vta-hw | 1 + 1 file changed, 1 insertion(+) create mode 160000 3rdparty/vta-hw diff --git a/3rdparty/vta-hw b/3rdparty/vta-hw new file mode 160000 index 000000000000..87ce9acfae55 --- /dev/null +++ b/3rdparty/vta-hw @@ -0,0 +1 @@ +Subproject commit 87ce9acfae550d1a487746e9d06c2e250076e54c From 2fbfba49f1d8960aebe10b84bd9e62c9003317c3 Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Wed, 2 Dec 2020 10:53:32 +0800 Subject: [PATCH 03/19] sync for tvm --- 3rdparty/vta-hw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/vta-hw b/3rdparty/vta-hw index 87ce9acfae55..12fb486a491b 160000 --- a/3rdparty/vta-hw +++ b/3rdparty/vta-hw @@ -1 +1 @@ -Subproject commit 87ce9acfae550d1a487746e9d06c2e250076e54c +Subproject commit 12fb486a491b75d70ec4c5e0a0cd112ab49a95bc From e72cac1cebb46787826522376512ecd132bdb1e7 Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Wed, 2 Dec 2020 11:33:06 +0800 Subject: [PATCH 04/19] upgrade dmlc-core --- 3rdparty/dmlc-core | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/dmlc-core b/3rdparty/dmlc-core index bfad207b4484..6c401e242c59 160000 --- a/3rdparty/dmlc-core +++ b/3rdparty/dmlc-core @@ -1 +1 @@ -Subproject commit bfad207b448480783a1f428ae3d93d87032d8349 +Subproject commit 6c401e242c59a1f4c913918246591bb13fd714e7 From de91b6a250518f5f0d9b97326f91c4f3c9276435 Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Tue, 8 Dec 2020 10:35:50 +0800 Subject: [PATCH 05/19] syn with upstream --- 3rdparty/vta-hw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/vta-hw b/3rdparty/vta-hw index 87ce9acfae55..12fb486a491b 160000 --- a/3rdparty/vta-hw +++ b/3rdparty/vta-hw @@ -1 +1 @@ -Subproject commit 87ce9acfae550d1a487746e9d06c2e250076e54c +Subproject commit 12fb486a491b75d70ec4c5e0a0cd112ab49a95bc From 4dee78289a39bf2cc9dc3a6adacd469415881037 Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Wed, 23 Dec 2020 14:59:13 +0800 Subject: [PATCH 06/19] sync --- 3rdparty/vta-hw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/vta-hw b/3rdparty/vta-hw index 57db5a718c74..12fb486a491b 160000 --- a/3rdparty/vta-hw +++ b/3rdparty/vta-hw @@ -1 +1 @@ -Subproject commit 57db5a718c74a788c98120ebbe1230797be698c8 +Subproject commit 12fb486a491b75d70ec4c5e0a0cd112ab49a95bc From 98f8eb804c89eb162631e8fbd5a4a5093f4bc9a3 Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Thu, 24 Dec 2020 17:13:11 +0800 Subject: [PATCH 07/19] fix tensorrt runtime error when load from cache --- .../contrib/tensorrt/tensorrt_builder.cc | 11 ++++++++++ .../contrib/tensorrt/tensorrt_builder.h | 6 +++++ .../contrib/tensorrt/tensorrt_runtime.cc | 22 +++++++++++++++++-- 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.cc b/src/runtime/contrib/tensorrt/tensorrt_builder.cc index 4060b240cf8e..4a15b3075103 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.cc @@ -185,6 +185,17 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() { return {engine, context, network_input_names_, network_output_names_, device_buffers}; } +void TensorRTBuilder::CreateDeviceBuffers(TensorRTEngineAndContext& engine_and_context) { + std::vector device_buffers(engine_and_context.engine->getNbBindings()); + for (size_t i = 0; i < network_input_names_.size(); ++i) { + AllocateDeviceBuffer(engine_and_context.engine, network_input_names_[i], &device_buffers); + } + for (size_t i = 0; i < network_output_names_.size(); ++i) { + AllocateDeviceBuffer(engine_and_context.engine, network_output_names_[i], &device_buffers); + } + engine_and_context.device_buffers = device_buffers; +} + nvinfer1::Weights TensorRTBuilder::GetDLTensorAsWeights(const DLTensor* dptr, DLDeviceType src_device) { ICHECK_EQ(dptr->ctx.device_type, src_device); diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index 4926a4d02685..8c9eee9f6a0b 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -113,6 +113,12 @@ class TensorRTBuilder { */ TensorRTEngineAndContext BuildEngine(); + /*! + * \brief Create Device Buffers for TensorRTEngineAndContext when Loading from Cache + * \param engine_and_context The TensorRTEngineAndContext + */ + void CreateDeviceBuffers(TensorRTEngineAndContext& engine_and_context); + private: /*! \brief Convert a DLTensor to a TensorRT weight. */ nvinfer1::Weights GetDLTensorAsWeights(const DLTensor* dptr, DLDeviceType src_device); diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 3f87f8d00ee6..60334892a827 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -83,8 +83,8 @@ class TensorRTRuntime : public JSONRuntimeBase { ICHECK_EQ(consts.size(), const_idx_.size()) << "The number of input constants must match the number of required."; LoadGlobalAttributes(); - if (GetCachedEnginesFromDisk()) return; SetupConstants(consts); + if (GetCachedEnginesFromDisk()) return; } void LoadGlobalAttributes() { @@ -178,7 +178,14 @@ class TensorRTRuntime : public JSONRuntimeBase { */ void BuildEngine() { batch_size_ = data_entry_[input_var_eid_[0]]->shape[0]; - if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) return; + if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) { + TensorRTEngineAndContext& engine_and_context = + trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_)); + size_t binding_num = engine_and_context.engine->getNbBindings(); + if(engine_and_context.device_buffers.size() == binding_num) { + return; + } + } DLOG(INFO) << "Building new TensorRT engine for subgraph " << symbol_name_ << " with batch size " << batch_size_; const bool use_fp16 = dmlc::GetEnv("TVM_TENSORRT_USE_FP16", false); @@ -210,6 +217,17 @@ class TensorRTRuntime : public JSONRuntimeBase { for (size_t i = 0; i < outputs_.size(); ++i) { builder.AddOutput(outputs_[i], EntryID(outputs_[i])); } + + // Allocate Device Buffers + + if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) { + TensorRTEngineAndContext& engine_and_context = + trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_)); + if(engine_and_context.device_buffers.size() == 0) { + builder.CreateDeviceBuffers(engine_and_context); + return; + } + } // Build engine. trt_engine_cache_[std::make_pair(symbol_name_, batch_size_)] = builder.BuildEngine(); From 38d59e5779813260c7f49d7b3b696e6ec7db2eb4 Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Thu, 24 Dec 2020 17:21:45 +0800 Subject: [PATCH 08/19] sync 3rd party --- 3rdparty/vta-hw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/vta-hw b/3rdparty/vta-hw index 12fb486a491b..57db5a718c74 160000 --- a/3rdparty/vta-hw +++ b/3rdparty/vta-hw @@ -1 +1 @@ -Subproject commit 12fb486a491b75d70ec4c5e0a0cd112ab49a95bc +Subproject commit 57db5a718c74a788c98120ebbe1230797be698c8 From 4114555eca3270c1695d463beb541cbb2f53502a Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Thu, 24 Dec 2020 18:00:04 +0800 Subject: [PATCH 09/19] fix lint --- src/runtime/contrib/tensorrt/tensorrt_builder.cc | 10 +++++----- src/runtime/contrib/tensorrt/tensorrt_builder.h | 2 +- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 13 ++++++------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.cc b/src/runtime/contrib/tensorrt/tensorrt_builder.cc index 4a15b3075103..16d3629f2345 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.cc @@ -185,15 +185,15 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() { return {engine, context, network_input_names_, network_output_names_, device_buffers}; } -void TensorRTBuilder::CreateDeviceBuffers(TensorRTEngineAndContext& engine_and_context) { - std::vector device_buffers(engine_and_context.engine->getNbBindings()); +void TensorRTBuilder::CreateDeviceBuffers(TensorRTEngineAndContext* engine_and_context) { + std::vector device_buffers(engine_and_context->engine->getNbBindings()); for (size_t i = 0; i < network_input_names_.size(); ++i) { - AllocateDeviceBuffer(engine_and_context.engine, network_input_names_[i], &device_buffers); + AllocateDeviceBuffer(engine_and_context->engine, network_input_names_[i], &device_buffers); } for (size_t i = 0; i < network_output_names_.size(); ++i) { - AllocateDeviceBuffer(engine_and_context.engine, network_output_names_[i], &device_buffers); + AllocateDeviceBuffer(engine_and_context->engine, network_output_names_[i], &device_buffers); } - engine_and_context.device_buffers = device_buffers; + engine_and_context->device_buffers = device_buffers; } nvinfer1::Weights TensorRTBuilder::GetDLTensorAsWeights(const DLTensor* dptr, diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index 8c9eee9f6a0b..439a4c13ea26 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -117,7 +117,7 @@ class TensorRTBuilder { * \brief Create Device Buffers for TensorRTEngineAndContext when Loading from Cache * \param engine_and_context The TensorRTEngineAndContext */ - void CreateDeviceBuffers(TensorRTEngineAndContext& engine_and_context); + void CreateDeviceBuffers(TensorRTEngineAndContext* engine_and_context); private: /*! \brief Convert a DLTensor to a TensorRT weight. */ diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 60334892a827..2751f7bca8de 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -179,10 +179,10 @@ class TensorRTRuntime : public JSONRuntimeBase { void BuildEngine() { batch_size_ = data_entry_[input_var_eid_[0]]->shape[0]; if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) { - TensorRTEngineAndContext& engine_and_context = + TensorRTEngineAndContext& engine_and_context = trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_)); size_t binding_num = engine_and_context.engine->getNbBindings(); - if(engine_and_context.device_buffers.size() == binding_num) { + if (engine_and_context.device_buffers.size() == binding_num) { return; } } @@ -217,14 +217,13 @@ class TensorRTRuntime : public JSONRuntimeBase { for (size_t i = 0; i < outputs_.size(); ++i) { builder.AddOutput(outputs_[i], EntryID(outputs_[i])); } - + // Allocate Device Buffers - if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) { - TensorRTEngineAndContext& engine_and_context = + TensorRTEngineAndContext& engine_and_context = trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_)); - if(engine_and_context.device_buffers.size() == 0) { - builder.CreateDeviceBuffers(engine_and_context); + if (engine_and_context.device_buffers.size() == 0) { + builder.CreateDeviceBuffers(&engine_and_context); return; } } From 22169082518b88b433b0fa422c538599d42f8bcd Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Thu, 24 Dec 2020 18:13:38 +0800 Subject: [PATCH 10/19] fix clang format --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 2751f7bca8de..8a418a032981 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -180,7 +180,7 @@ class TensorRTRuntime : public JSONRuntimeBase { batch_size_ = data_entry_[input_var_eid_[0]]->shape[0]; if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) { TensorRTEngineAndContext& engine_and_context = - trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_)); + trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_)); size_t binding_num = engine_and_context.engine->getNbBindings(); if (engine_and_context.device_buffers.size() == binding_num) { return; @@ -221,7 +221,7 @@ class TensorRTRuntime : public JSONRuntimeBase { // Allocate Device Buffers if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) { TensorRTEngineAndContext& engine_and_context = - trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_)); + trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_)); if (engine_and_context.device_buffers.size() == 0) { builder.CreateDeviceBuffers(&engine_and_context); return; From 08a369d7757c141f043521a29f6129c672b84e08 Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Thu, 24 Dec 2020 18:15:49 +0800 Subject: [PATCH 11/19] clang format --- src/runtime/contrib/tensorrt/tensorrt_builder.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index 439a4c13ea26..a7700c4fe1b5 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -114,8 +114,8 @@ class TensorRTBuilder { TensorRTEngineAndContext BuildEngine(); /*! - * \brief Create Device Buffers for TensorRTEngineAndContext when Loading from Cache - * \param engine_and_context The TensorRTEngineAndContext + * \brief Create Device Buffers for TensorRTEngineAndContext when Loading from Cache. + * \param engine_and_context The TensorRTEngineAndContext. */ void CreateDeviceBuffers(TensorRTEngineAndContext* engine_and_context); From aac7620a964678c0ff305b21e19091d29ff45126 Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Thu, 24 Dec 2020 18:18:56 +0800 Subject: [PATCH 12/19] clang format --- src/runtime/contrib/tensorrt/tensorrt_builder.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index a7700c4fe1b5..39d237da0f75 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -114,8 +114,8 @@ class TensorRTBuilder { TensorRTEngineAndContext BuildEngine(); /*! - * \brief Create Device Buffers for TensorRTEngineAndContext when Loading from Cache. - * \param engine_and_context The TensorRTEngineAndContext. + * \brief Create device buffers. + * \param engine_and_context The pointer pointing at TensorRTEngineAndContext. */ void CreateDeviceBuffers(TensorRTEngineAndContext* engine_and_context); From bee04503cb6b894c9eeec45a96e1705cf5326fb7 Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Mon, 4 Jan 2021 16:20:30 +0800 Subject: [PATCH 13/19] fix reshape attr loss error --- 3rdparty/vta-hw | 2 +- src/runtime/contrib/json/json_node.h | 2 +- src/runtime/contrib/tensorrt/tensorrt_ops.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/3rdparty/vta-hw b/3rdparty/vta-hw index 87ce9acfae55..57db5a718c74 160000 --- a/3rdparty/vta-hw +++ b/3rdparty/vta-hw @@ -1 +1 @@ -Subproject commit 87ce9acfae550d1a487746e9d06c2e250076e54c +Subproject commit 57db5a718c74a788c98120ebbe1230797be698c8 diff --git a/src/runtime/contrib/json/json_node.h b/src/runtime/contrib/json/json_node.h index 77c289b04c6d..d57eeb08df10 100644 --- a/src/runtime/contrib/json/json_node.h +++ b/src/runtime/contrib/json/json_node.h @@ -256,7 +256,7 @@ class JSONGraphNode { */ template T GetAttr(const std::string& key) const { - ICHECK_GT(attrs_.count(key), 0U) << "Key: " << key << "is not found"; + ICHECK_GT(attrs_.count(key), 0U) << "Key: " << key << " is not found"; return dmlc::get(attrs_.at(key)); } diff --git a/src/runtime/contrib/tensorrt/tensorrt_ops.cc b/src/runtime/contrib/tensorrt/tensorrt_ops.cc index 1e6867b83cff..d0fd57ead204 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_ops.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_ops.cc @@ -921,7 +921,7 @@ class ReshapeOpConverter : public TensorRTOpConverter { void Convert(TensorRTOpConverterParams* params) const { auto input = params->inputs.at(0).tensor; - ICHECK_EQ(std::stoi(params->node.GetAttr>("reverse")[0]), false); + //ICHECK_EQ(std::stoi(params->node.GetAttr>("reverse")[0]), false); auto str_newshape = params->node.GetAttr>("newshape"); std::vector new_shape; const int start_index = TRT_HAS_IMPLICIT_BATCH(params) ? 1 : 0; From 1256f1cf1a1ed35988cf49bb26a4fbaa2a4dd9e5 Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Mon, 4 Jan 2021 16:22:07 +0800 Subject: [PATCH 14/19] sync upstream --- 3rdparty/vta-hw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/vta-hw b/3rdparty/vta-hw index 57db5a718c74..87ce9acfae55 160000 --- a/3rdparty/vta-hw +++ b/3rdparty/vta-hw @@ -1 +1 @@ -Subproject commit 57db5a718c74a788c98120ebbe1230797be698c8 +Subproject commit 87ce9acfae550d1a487746e9d06c2e250076e54c From 5598d6e76493192b210bace8c8f8c8127056259e Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Mon, 1 Feb 2021 11:06:37 +0800 Subject: [PATCH 15/19] refactor --- .../contrib/tensorrt/tensorrt_builder.cc | 18 ++++++------------ .../contrib/tensorrt/tensorrt_builder.h | 2 +- .../contrib/tensorrt/tensorrt_runtime.cc | 10 +++++----- 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.cc b/src/runtime/contrib/tensorrt/tensorrt_builder.cc index f9e6a4a7b31d..95c6bd18ef84 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.cc @@ -171,25 +171,19 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() { CleanUp(); // Allocate I/O buffers on GPU for TVM inputs which are on a different context. - std::vector device_buffers(engine->getNbBindings()); - for (size_t i = 0; i < network_input_names_.size(); ++i) { - AllocateDeviceBuffer(engine, network_input_names_[i], &device_buffers); - } - for (size_t i = 0; i < network_output_names_.size(); ++i) { - AllocateDeviceBuffer(engine, network_output_names_[i], &device_buffers); - } + std::vector device_buffers = CreateDeviceBuffers(engine); return {engine, context, network_input_names_, network_output_names_, device_buffers}; } -void TensorRTBuilder::CreateDeviceBuffers(TensorRTEngineAndContext* engine_and_context) { - std::vector device_buffers(engine_and_context->engine->getNbBindings()); +std::vector TensorRTBuilder::CreateDeviceBuffers(nvinfer1::ICudaEngine* engine) { + std::vector device_buffers(engine->getNbBindings()); for (size_t i = 0; i < network_input_names_.size(); ++i) { - AllocateDeviceBuffer(engine_and_context->engine, network_input_names_[i], &device_buffers); + AllocateDeviceBuffer(engine, network_input_names_[i], &device_buffers); } for (size_t i = 0; i < network_output_names_.size(); ++i) { - AllocateDeviceBuffer(engine_and_context->engine, network_output_names_[i], &device_buffers); + AllocateDeviceBuffer(engine, network_output_names_[i], &device_buffers); } - engine_and_context->device_buffers = device_buffers; + return device_buffers; } nvinfer1::Weights TensorRTBuilder::GetDLTensorAsWeights(const DLTensor* dptr, diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index 39d237da0f75..fcae52f7e994 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -117,7 +117,7 @@ class TensorRTBuilder { * \brief Create device buffers. * \param engine_and_context The pointer pointing at TensorRTEngineAndContext. */ - void CreateDeviceBuffers(TensorRTEngineAndContext* engine_and_context); + std::vector CreateDeviceBuffers(nvinfer1::ICudaEngine* engine); private: /*! \brief Convert a DLTensor to a TensorRT weight. */ diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 8a418a032981..a6c7271483cb 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -84,7 +84,7 @@ class TensorRTRuntime : public JSONRuntimeBase { << "The number of input constants must match the number of required."; LoadGlobalAttributes(); SetupConstants(consts); - if (GetCachedEnginesFromDisk()) return; + GetCachedEnginesFromDisk(); } void LoadGlobalAttributes() { @@ -181,8 +181,7 @@ class TensorRTRuntime : public JSONRuntimeBase { if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) { TensorRTEngineAndContext& engine_and_context = trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_)); - size_t binding_num = engine_and_context.engine->getNbBindings(); - if (engine_and_context.device_buffers.size() == binding_num) { + if (!engine_and_context.device_buffers.empty()) { return; } } @@ -222,8 +221,9 @@ class TensorRTRuntime : public JSONRuntimeBase { if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) { TensorRTEngineAndContext& engine_and_context = trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_)); - if (engine_and_context.device_buffers.size() == 0) { - builder.CreateDeviceBuffers(&engine_and_context); + if (engine_and_context.device_buffers.empty()) { + engine_and_context.device_buffers = builder.CreateDeviceBuffers(engine_and_context.engine); + builder.CleanUp(); return; } } From fb273d3940e6419ad2830b96dda64e71c9697a5d Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Mon, 1 Feb 2021 11:07:53 +0800 Subject: [PATCH 16/19] make cleanup public --- src/runtime/contrib/tensorrt/tensorrt_builder.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index fcae52f7e994..49de6d96a9fd 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -119,15 +119,15 @@ class TensorRTBuilder { */ std::vector CreateDeviceBuffers(nvinfer1::ICudaEngine* engine); + /*! \brief Clean up resources used to create engine. */ + void CleanUp(); + private: /*! \brief Convert a DLTensor to a TensorRT weight. */ nvinfer1::Weights GetDLTensorAsWeights(const DLTensor* dptr, DLDeviceType src_device); /*! \brief Convert an input to a Tensor if it is a Weight */ - nvinfer1::ITensor* GetInputAsTensor(const TensorRTOpInput& input); - - /*! \brief Clean up resources used to create engine. */ - void CleanUp(); + nvinfer1::ITensor* GetInputAsTensor(const TensorRTOpInput& input); /*! \brief Allocate a GPU buffer for input or output DLTensor, only if the context is not GPU * already. Inputs that are already on the GPU can be passed directly to TensorRT and will not From 75ca51beae67997801e7281c4fce5748269f299d Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Mon, 1 Feb 2021 11:11:12 +0800 Subject: [PATCH 17/19] fix lint --- src/runtime/contrib/tensorrt/tensorrt_builder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index 49de6d96a9fd..92932e9da200 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -127,7 +127,7 @@ class TensorRTBuilder { nvinfer1::Weights GetDLTensorAsWeights(const DLTensor* dptr, DLDeviceType src_device); /*! \brief Convert an input to a Tensor if it is a Weight */ - nvinfer1::ITensor* GetInputAsTensor(const TensorRTOpInput& input); + nvinfer1::ITensor* GetInputAsTensor(const TensorRTOpInput& input); /*! \brief Allocate a GPU buffer for input or output DLTensor, only if the context is not GPU * already. Inputs that are already on the GPU can be passed directly to TensorRT and will not From 236fb410925a5af54bde053bf8f334303165cacf Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Mon, 1 Feb 2021 11:20:00 +0800 Subject: [PATCH 18/19] sync vta-hw --- 3rdparty/vta-hw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/3rdparty/vta-hw b/3rdparty/vta-hw index 87ce9acfae55..57db5a718c74 160000 --- a/3rdparty/vta-hw +++ b/3rdparty/vta-hw @@ -1 +1 @@ -Subproject commit 87ce9acfae550d1a487746e9d06c2e250076e54c +Subproject commit 57db5a718c74a788c98120ebbe1230797be698c8 From 2c9af6ea97e3ce542a1273be110d4c0cfa9c46b1 Mon Sep 17 00:00:00 2001 From: lisiyuan Date: Mon, 1 Feb 2021 14:49:51 +0800 Subject: [PATCH 19/19] remove cleanup --- src/runtime/contrib/tensorrt/tensorrt_builder.h | 6 +++--- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index 92932e9da200..fcae52f7e994 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -119,9 +119,6 @@ class TensorRTBuilder { */ std::vector CreateDeviceBuffers(nvinfer1::ICudaEngine* engine); - /*! \brief Clean up resources used to create engine. */ - void CleanUp(); - private: /*! \brief Convert a DLTensor to a TensorRT weight. */ nvinfer1::Weights GetDLTensorAsWeights(const DLTensor* dptr, DLDeviceType src_device); @@ -129,6 +126,9 @@ class TensorRTBuilder { /*! \brief Convert an input to a Tensor if it is a Weight */ nvinfer1::ITensor* GetInputAsTensor(const TensorRTOpInput& input); + /*! \brief Clean up resources used to create engine. */ + void CleanUp(); + /*! \brief Allocate a GPU buffer for input or output DLTensor, only if the context is not GPU * already. Inputs that are already on the GPU can be passed directly to TensorRT and will not * need a buffer. */ diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index a6c7271483cb..79c01e018d82 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -223,7 +223,6 @@ class TensorRTRuntime : public JSONRuntimeBase { trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_)); if (engine_and_context.device_buffers.empty()) { engine_and_context.device_buffers = builder.CreateDeviceBuffers(engine_and_context.engine); - builder.CleanUp(); return; } }