From 2375c0b04e0d822841fc718096804ee6a49a27e0 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Sat, 21 Aug 2021 06:17:38 +0800 Subject: [PATCH 01/58] update trt --- .../contrib/tensorrt/tensorrt_builder.cc | 48 ++++-- .../contrib/tensorrt/tensorrt_builder.h | 15 +- .../contrib/tensorrt/tensorrt_calibrator.h | 141 +++++++++++++++++ .../contrib/tensorrt/tensorrt_runtime.cc | 146 +++++++++++++++++- 4 files changed, 327 insertions(+), 23 deletions(-) create mode 100755 src/runtime/contrib/tensorrt/tensorrt_calibrator.h diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.cc b/src/runtime/contrib/tensorrt/tensorrt_builder.cc index 08ac2ae0ec45..0dc1a4f6c18d 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.cc @@ -40,7 +40,7 @@ namespace contrib { TensorRTBuilder::TensorRTBuilder(TensorRTLogger* logger, const std::vector& data_entry, size_t max_workspace_size, bool use_implicit_batch, bool use_fp16, - int batch_size) + int batch_size, nvinfer1::IInt8Calibrator* calibrator) : data_entry_(data_entry), max_workspace_size_(max_workspace_size), use_implicit_batch_(use_implicit_batch), @@ -48,22 +48,22 @@ TensorRTBuilder::TensorRTBuilder(TensorRTLogger* logger, batch_size_(batch_size) { // Create TRT builder and network. builder_ = nvinfer1::createInferBuilder(*logger); -#if TRT_VERSION_GE(6, 0, 1) - // Use INetworkV2. - auto flags = - 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); - if (use_implicit_batch_) { - flags = 0U; - builder_->setMaxBatchSize(batch_size_); - } - network_ = builder_->createNetworkV2(flags); -#else + LOG(INFO) << "create a builder_ "; + use_int8_ = false; // Use INetwork with implicit batch. builder_->setMaxBatchSize(batch_size_); builder_->setMaxWorkspaceSize(max_workspace_size_); builder_->setFp16Mode(use_fp16_); + + this->calibrator_ = calibrator; + if (calibrator != nullptr) { + LOG(INFO) << "calibrator is not null, and setting up int8 mode ... "; + set_use_int8(); + builder_->setFp16Mode(true); + builder_->setInt8Mode(true); + builder_->setInt8Calibrator(calibrator); + } network_ = builder_->createNetwork(); -#endif } void TensorRTBuilder::AddInput(int nid, uint32_t entry_id, const JSONGraphNode& node) { @@ -88,6 +88,11 @@ void TensorRTBuilder::AddInput(int nid, uint32_t entry_id, const JSONGraphNode& } } + + void TensorRTBuilder::set_use_int8(){ + use_int8_ = true; + } + void TensorRTBuilder::AddConstant(int nid, const DLTensor* data) { nvinfer1::Weights weight = GetDLTensorAsWeights(data, kDLCPU); std::vector shape(data->shape, data->shape + data->ndim); @@ -156,8 +161,18 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() { config_ = builder_->createBuilderConfig(); config_->setMaxWorkspaceSize(max_workspace_size_); if (use_fp16_) { + config_->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK); config_->setFlag(nvinfer1::BuilderFlag::kFP16); + // config_->setDefaultDeviceType(nvinfer1::DeviceType::kDLA); } + + LOG(INFO) << "use_int8_ " << use_int8_; + if(use_int8_){ + config_->setFlag(nvinfer1::BuilderFlag::kINT8); + config_->setInt8Calibrator(calibrator_); + LOG(INFO)<<"config finishes setting calibrator for int8 mode ... "; + } + // Add profiles. if (!use_implicit_batch_) { auto profile = builder_->createOptimizationProfile(); @@ -179,7 +194,9 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() { } config_->addOptimizationProfile(profile); } + LOG(INFO)<<"start building up engine with new network and builder ... "; nvinfer1::ICudaEngine* engine = builder_->buildEngineWithConfig(*network_, *config_); + LOG(INFO)<<"finished building up engine with new network and builder ... "; #else nvinfer1::ICudaEngine* engine = builder_->buildCudaEngine(*network_); #endif @@ -238,9 +255,10 @@ void TensorRTBuilder::CleanUp() { #endif builder_->destroy(); for (auto weight : trt_weights_) { - if (weight.type == nvinfer1::DataType::kFLOAT) { + if (weight.type == nvinfer1::DataType::kFLOAT) + { delete[] static_cast(weight.values); - } else { + }else{ delete[] static_cast(weight.values); } } @@ -248,4 +266,4 @@ void TensorRTBuilder::CleanUp() { } // namespace contrib } // namespace runtime -} // namespace tvm +} // namespace tvm \ No newline at end of file diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index 0b1c3997ec57..161fbd21f44d 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -73,7 +73,7 @@ class TensorRTBuilder { */ TensorRTBuilder(TensorRTLogger* logger, const std::vector& data_entry, size_t max_workspace_size, bool use_implicit_batch, bool use_fp16, - int batch_size); + int batch_size, nvinfer1::IInt8Calibrator* calibrator = nullptr); /*! * \brief Add TensorRT input(s) for input node in network definition. @@ -97,6 +97,12 @@ class TensorRTBuilder { */ void AddLayer(int nid, const JSONGraphNode& node); + + /* + set int8 flag for calibrating data + */ + void set_use_int8(); + /*! * \brief Mark TensorRT output in network definition. * \param entry The output node entry. @@ -153,6 +159,8 @@ class TensorRTBuilder { /*! \brief Whether to automatically convert model to 16-bit floating point precision. */ bool use_fp16_; + bool use_int8_; + /*! \brief Batch size to optimize for. */ int batch_size_; @@ -161,10 +169,13 @@ class TensorRTBuilder { /*! \brief Output names. */ std::vector network_output_names_; + + // calibrator pointer + nvinfer1::IInt8Calibrator* calibrator_; }; } // namespace contrib } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_BUILDER_H_ +#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_BUILDER_H_ \ No newline at end of file diff --git a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h new file mode 100755 index 000000000000..91b2ce107da3 --- /dev/null +++ b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h @@ -0,0 +1,141 @@ +/* * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! +* \file runtime/contrib/tensorrt/tensorrt_builder.h +* \brief Contains TensorRTBuilder class which can be used to convert a relay +* program into a TRT engine which can be used for inference. +*/ + +#ifndef TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ +#define TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ + +#include "../../cuda/cuda_common.h" +#include "NvInfer.h" + +namespace tvm { +namespace runtime { + +class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { + public: + TensorRTCalibrator(int batch_size, + const std::vector& input_names) + : batch_size_(batch_size), + num_batches_calibrated_(0), + input_names_(input_names) {} + + ~TensorRTCalibrator() { + // Free calibration data + for (auto& inputs : data_) { + for (size_t i = 0; i < inputs.size(); ++i) { + delete[] inputs[i]; + } + } + // Free buffers + for (size_t i = 0; i < buffers_.size(); ++i) { + CUDA_CALL(cudaFree(buffers_[i])); + } + } + + /* + */ + void AddBatchData(const std::vector& bindings, + const std::vector& binding_sizes) { + // Copy data from GPU + std::vector data_host(bindings.size(), nullptr); + // LOG(INFO) << "bindings.size() is : " << bindings.size(); + // LOG(INFO) << "binding_sizes.size() is : " << binding_sizes.size(); + for (size_t i = 0; i < bindings.size(); ++i) { + data_host[i] = new float[batch_size_ * binding_sizes[i]]; + CUDA_CALL(cudaMemcpy(static_cast(data_host[i]), bindings[i], + batch_size_ * binding_sizes[i] * sizeof(float), + cudaMemcpyDeviceToHost)); + } + data_.push_back(data_host); + data_sizes_.push_back(binding_sizes); + } + + int getBatchSize() const override { return batch_size_; } + + /*! + * \brief TensorRT will call this method to get next batch of data to + * calibrate with. + */ + bool getBatch(void* bindings[], const char* names[], + int nbBindings) override { + AllocateBuffersIfNotAllocated(); + CHECK_EQ(input_names_.size(), nbBindings); + for (size_t i = 0; i < input_names_.size(); ++i) { + CHECK_EQ(input_names_[i], names[i]); + CUDA_CALL(cudaMemcpy( + buffers_[i], data_[num_batches_calibrated_][i], + batch_size_ * data_sizes_[num_batches_calibrated_][i] * sizeof(float), + cudaMemcpyHostToDevice)); + bindings[i] = buffers_[i]; + } + num_batches_calibrated_++; + // TODO(trevmorr): Free data from previous batch? + return (num_batches_calibrated_ < data_.size()); + } + + const void* readCalibrationCache(size_t& length) override { + if (calibration_cache_.empty()) return nullptr; + length = calibration_cache_.size(); + return calibration_cache_.data(); + } + + void writeCalibrationCache(const void* cache, size_t length) override { + calibration_cache_.assign(static_cast(cache), length); + } + + private: + /*! \brief Batch size. */ + int batch_size_; + /*! \brief Number of batches already fed to calibrator. */ + int num_batches_calibrated_; + /*! \brief Storage for calibration cache. */ + std::string calibration_cache_; + + /*! \brief Data to be used for calibration. */ + std::vector> data_; + /*! \brief Number of elements for data to be used for calibration. */ + std::vector> data_sizes_; + + /*! \brief Device buffers to be used for calibration. */ + std::vector buffers_; + + /*! \brief Names of inputs */ + const std::vector input_names_; + + /*! \brief Allocate device memory buffers. data_sizes_ must already have one + * entry. */ + void AllocateBuffersIfNotAllocated() { + if (!buffers_.empty()) return; + CHECK_GE(data_sizes_.size(), 1); + const int num_inputs = data_sizes_[0].size(); + buffers_.assign(num_inputs, nullptr); + for (int i = 0; i < num_inputs; ++i) { + CUDA_CALL(cudaMalloc(&buffers_[i], data_sizes_[0][i] * sizeof(float))); + } + } +}; + +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ \ No newline at end of file diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 5562f853383c..547b85bdeb44 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -27,6 +27,11 @@ #include #include +#include +#include +#include +#include +#include #include "../../file_utils.h" #include "../json/json_node.h" @@ -35,6 +40,7 @@ #ifdef TVM_GRAPH_EXECUTOR_TENSORRT #include "NvInfer.h" #include "tensorrt_builder.h" +#include "tensorrt_calibrator.h" #endif namespace tvm { @@ -48,6 +54,12 @@ struct PairHash { } }; +std::string getEnvVar( std::string const & key ) +{ + char * val = getenv( key.c_str() ); + return val == NULL ? std::string("") : std::string(val); +} + using namespace tvm::runtime::json; class TensorRTRuntime : public JSONRuntimeBase { @@ -66,7 +78,16 @@ class TensorRTRuntime : public JSONRuntimeBase { use_implicit_batch_(true), max_workspace_size_(size_t(1) << 30), max_batch_size_(-1), - multi_engine_mode_(false) {} + multi_engine_mode_(false) { + const bool use_int8 = dmlc::GetEnv("TVM_TENSORRT_USE_INT8", false); + if(use_int8){ + std::string num_cali_var("TENSORRT_NUM_CALI_INT8"); + std::string num = getEnvVar(num_cali_var); + num_calibration_batches_remaining_ = stoi(num); + LOG(INFO) << "set up num_calibration_batches_remaining_ : " << num_calibration_batches_remaining_; + // num_calibration_batches_remaining_ = 10; + } + } /*! * \brief The type key of the module. @@ -82,6 +103,7 @@ class TensorRTRuntime : public JSONRuntimeBase { * \param consts The constant params from compiled model. */ void Init(const Array& consts) override { + LOG(INFO) << "calling Init function in tensorrt runtime ... "; ICHECK_EQ(consts.size(), const_idx_.size()) << "The number of input constants must match the number of required."; LoadGlobalAttributes(); @@ -125,17 +147,26 @@ class TensorRTRuntime : public JSONRuntimeBase { /*! \brief Run inference using built engine. */ void Run() override { + auto& engine_and_context = GetOrBuildEngine(); + // LOG(INFO) << "start running inference"; + // this->CreateCalibratorIfUsingInt8(engine_and_context, data_entry_); int batch_size = GetBatchSize(); if (batch_size == 0) return; auto engine = engine_and_context.engine; auto context = engine_and_context.context; - std::vector bindings(engine->getNbBindings(), nullptr); + const int num_bindings = engine->getNbBindings(); + std::vector bindings(num_bindings, nullptr); + std::vector binding_sizes(num_bindings, 0); // Setup input bindings. + const size_t num_inputs = input_nodes_.size(); + int count_inputs = 0; for (size_t i = 0; i < input_nodes_.size(); ++i) { auto nid = input_nodes_[i]; if (nodes_[nid].GetOpType() == "input") { + for (size_t j = 0; j < nodes_[nid].GetOpShape().size(); ++j) { + count_inputs++; uint32_t eid = EntryID(nid, j); const std::string name = nodes_[nid].GetOpName() + "_" + std::to_string(j); int binding_index = engine->getBindingIndex(name.c_str()); @@ -153,9 +184,31 @@ class TensorRTRuntime : public JSONRuntimeBase { device_buffer.CopyFrom(data_entry_[eid]); bindings[binding_index] = device_buffer->data; } + + auto dims = engine->getBindingDimensions(binding_index); + int num_elements = 1; + for (int i = 0; i < dims.nbDims; ++i) num_elements *= dims.d[i]; + binding_sizes[binding_index] = num_elements; + } } } + + // add batch data to calibrator + if(num_calibration_batches_remaining_ != 0){ + if(calibrator_ != nullptr){ + LOG(INFO) << "starting adding last " << num_calibration_batches_remaining_ << "-th batch data to calibrator"; + std::vector input_bindings(bindings.begin(), + bindings.begin() + count_inputs); + std::vector input_sizes(binding_sizes.begin(), + binding_sizes.begin() + count_inputs); + calibrator_->AddBatchData(input_bindings, input_sizes); + num_calibration_batches_remaining_--; + } + return; + } + + // Setup output bindings. for (size_t i = 0; i < outputs_.size(); ++i) { uint32_t eid = EntryID(outputs_[i]); @@ -170,6 +223,7 @@ class TensorRTRuntime : public JSONRuntimeBase { } } + #if TRT_VERSION_GE(6, 0, 1) if (use_implicit_batch_) { ICHECK(context->execute(batch_size, bindings.data())) << "Running TensorRT failed."; @@ -225,10 +279,12 @@ class TensorRTRuntime : public JSONRuntimeBase { TensorRTEngineAndContext& GetOrBuildEngine() { int batch_size = GetBatchSize(); int compatible_engine_batch_size = -1; - if (FindCompatibleEngine(batch_size, &compatible_engine_batch_size)) { + bool find_engine_flag = FindCompatibleEngine(batch_size, &compatible_engine_batch_size); + if (find_engine_flag && calibrator_ == nullptr){ // A compatible engine already exists. return trt_engine_cache_.at(std::make_pair(symbol_name_, compatible_engine_batch_size)); } + // For single engine mode, remove previous engine and update max_batch_size. if (!multi_engine_mode_) { DestroyEngines(); @@ -267,13 +323,68 @@ class TensorRTRuntime : public JSONRuntimeBase { } // Build engine. - trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = builder.BuildEngine(); - DLOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_ + // trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = builder.BuildEngine(); + const bool use_int8 = (dmlc::GetEnv("TVM_TENSORRT_USE_INT8", 0) != 0); + TensorRTEngineAndContext engine_and_context = builder.BuildEngine(); + trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; + if(use_int8 == true){ + if(calibrator_ == nullptr){ + this->CreateCalibratorIfUsingInt8(engine_and_context); + } + + if(num_calibration_batches_remaining_ == 0){ + engine_and_context.context->destroy(); + engine_and_context.engine->destroy(); + + LOG(INFO)<<"rebuild builder using int8 mode"; + TensorRTBuilder builder2(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, + use_fp16, batch_size, calibrator_.get()); + set_up_input_output(builder2); + TensorRTEngineAndContext new_engine_and_context = builder2.BuildEngine(); + trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = new_engine_and_context; + calibrator_.reset(nullptr); + LOG(INFO) <<"finished rebuilding using int8 mode ... "; + } + + } + + LOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_ << " with batch size " << batch_size; CacheEngineToDisk(); return trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size)); } + + void set_up_input_output(TensorRTBuilder& builder){ + for (size_t i = 0; i < input_nodes_.size(); ++i) { + auto nid = input_nodes_[i]; + const auto& node = nodes_[nid]; + std::string name = node.GetOpName(); + if (node.GetOpType() == "input") { + builder.AddInput(nid, EntryID(nid, 0), node); + } else { + ICHECK_EQ(node.GetOpType(), "const"); + uint32_t eid = EntryID(nid, 0); + builder.AddConstant(nid, data_entry_[eid]); + } + } + + // Add layers. + for (size_t nid = 0; nid < nodes_.size(); ++nid) { + const auto& node = nodes_[nid]; + if (node.GetOpType() != "kernel") continue; + builder.AddLayer(nid, node); + } + + // Add outputs. + for (size_t i = 0; i < outputs_.size(); ++i) { + builder.AddOutput(outputs_[i], EntryID(outputs_[i])); + } + + } + + + /*! \brief If TVM_TENSORRT_CACHE_DIR is set, will check that directory for * already built TRT engines and load into trt_engine_cache_ so they don't * have to be built at first inference. @@ -286,7 +397,7 @@ class TensorRTRuntime : public JSONRuntimeBase { // Check if engine is in the cache. std::ifstream infile(path, std::ios::binary); if (!infile.good()) return false; - DLOG(INFO) << "Loading cached TensorRT engine from " << path; + LOG(INFO) << "Loading cached TensorRT engine from " << path; infile.close(); std::string serialized_engine; LoadBinaryFromFile(path, &serialized_engine); @@ -306,8 +417,11 @@ class TensorRTRuntime : public JSONRuntimeBase { helper.DeclareField("inputs", &engine_and_context.inputs); helper.DeclareField("outputs", &engine_and_context.outputs); helper.ReadAllFields(&reader); + LOG(INFO) << "reader helper ends"; const int batch_size = GetBatchSize(); + LOG(INFO) << "got batch size"; trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; + LOG(INFO) << "finished saving engine and context ... "; return true; } @@ -369,9 +483,29 @@ class TensorRTRuntime : public JSONRuntimeBase { return device_buffers_.at(binding_index); } + void CreateCalibratorIfUsingInt8(const TensorRTEngineAndContext& engine_and_context) { + LOG(INFO) << "Using INT8. Now in calibration mode, will create inference engine after " << num_calibration_batches_remaining_ << " input batches are provided."; + // Get input names in binding order. + std::vector input_names; + for(size_t i=0; i, TensorRTEngineAndContext, PairHash> trt_engine_cache_; + + + /*! \brief Calibrator for INT8 mode. */ + std::unique_ptr calibrator_; + /*! \brief Number of calibration batches until we are done. */ + int num_calibration_batches_remaining_; /*! \brief Map of inding index to GPU buffers for inputs and outputs. Only used when target device * is not "cuda". Since TensorRT execution can only read data from GPU, we need to copy data from From 911365982ba2c000dbc0797f8c9eee99d037fc0f Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Tue, 24 Aug 2021 13:09:20 +0800 Subject: [PATCH 02/58] clean codes --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 547b85bdeb44..355c56274c5a 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -85,7 +85,6 @@ class TensorRTRuntime : public JSONRuntimeBase { std::string num = getEnvVar(num_cali_var); num_calibration_batches_remaining_ = stoi(num); LOG(INFO) << "set up num_calibration_batches_remaining_ : " << num_calibration_batches_remaining_; - // num_calibration_batches_remaining_ = 10; } } From 6671366114bf8439f7c26744c683680e6c5665b2 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Tue, 24 Aug 2021 13:12:42 +0800 Subject: [PATCH 03/58] tetsing running trt --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 355c56274c5a..1182073f5bf3 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -148,8 +148,6 @@ class TensorRTRuntime : public JSONRuntimeBase { void Run() override { auto& engine_and_context = GetOrBuildEngine(); - // LOG(INFO) << "start running inference"; - // this->CreateCalibratorIfUsingInt8(engine_and_context, data_entry_); int batch_size = GetBatchSize(); if (batch_size == 0) return; auto engine = engine_and_context.engine; From 99c0a5716a5b1f5ed7db4eabb8c59439d6700b02 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Tue, 24 Aug 2021 13:13:49 +0800 Subject: [PATCH 04/58] clean data --- src/runtime/contrib/tensorrt/tensorrt_builder.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.cc b/src/runtime/contrib/tensorrt/tensorrt_builder.cc index 0dc1a4f6c18d..d669bb67fa2d 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.cc @@ -161,9 +161,7 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() { config_ = builder_->createBuilderConfig(); config_->setMaxWorkspaceSize(max_workspace_size_); if (use_fp16_) { - config_->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK); config_->setFlag(nvinfer1::BuilderFlag::kFP16); - // config_->setDefaultDeviceType(nvinfer1::DeviceType::kDLA); } LOG(INFO) << "use_int8_ " << use_int8_; From 525af93d9c410d967d0d7cbc366a39c796c11dc5 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Fri, 27 Aug 2021 03:30:14 +0800 Subject: [PATCH 05/58] clean codes? --- src/runtime/contrib/tensorrt/tensorrt_builder.cc | 13 +++---------- src/runtime/contrib/tensorrt/tensorrt_builder.h | 7 +------ 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.cc b/src/runtime/contrib/tensorrt/tensorrt_builder.cc index d669bb67fa2d..8abddb047cf6 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.cc @@ -48,7 +48,6 @@ TensorRTBuilder::TensorRTBuilder(TensorRTLogger* logger, batch_size_(batch_size) { // Create TRT builder and network. builder_ = nvinfer1::createInferBuilder(*logger); - LOG(INFO) << "create a builder_ "; use_int8_ = false; // Use INetwork with implicit batch. builder_->setMaxBatchSize(batch_size_); @@ -56,9 +55,9 @@ TensorRTBuilder::TensorRTBuilder(TensorRTLogger* logger, builder_->setFp16Mode(use_fp16_); this->calibrator_ = calibrator; - if (calibrator != nullptr) { - LOG(INFO) << "calibrator is not null, and setting up int8 mode ... "; - set_use_int8(); + if (calibrator != nullptr) + { + use_int8_ = true; builder_->setFp16Mode(true); builder_->setInt8Mode(true); builder_->setInt8Calibrator(calibrator); @@ -88,11 +87,6 @@ void TensorRTBuilder::AddInput(int nid, uint32_t entry_id, const JSONGraphNode& } } - - void TensorRTBuilder::set_use_int8(){ - use_int8_ = true; - } - void TensorRTBuilder::AddConstant(int nid, const DLTensor* data) { nvinfer1::Weights weight = GetDLTensorAsWeights(data, kDLCPU); std::vector shape(data->shape, data->shape + data->ndim); @@ -164,7 +158,6 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() { config_->setFlag(nvinfer1::BuilderFlag::kFP16); } - LOG(INFO) << "use_int8_ " << use_int8_; if(use_int8_){ config_->setFlag(nvinfer1::BuilderFlag::kINT8); config_->setInt8Calibrator(calibrator_); diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index 161fbd21f44d..054e409cc005 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -97,12 +97,6 @@ class TensorRTBuilder { */ void AddLayer(int nid, const JSONGraphNode& node); - - /* - set int8 flag for calibrating data - */ - void set_use_int8(); - /*! * \brief Mark TensorRT output in network definition. * \param entry The output node entry. @@ -159,6 +153,7 @@ class TensorRTBuilder { /*! \brief Whether to automatically convert model to 16-bit floating point precision. */ bool use_fp16_; + /*! \brief whether to automatically convert model to int8 precision */ bool use_int8_; /*! \brief Batch size to optimize for. */ From 0eda37236a4dec08e3e8c8c138d1d6f28081d47b Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Fri, 27 Aug 2021 03:56:26 +0800 Subject: [PATCH 06/58] remove env func --- src/runtime/contrib/tensorrt/tensorrt_builder.cc | 5 ++--- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 12 +++--------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.cc b/src/runtime/contrib/tensorrt/tensorrt_builder.cc index 8abddb047cf6..09455ffc1a8c 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.cc @@ -158,7 +158,7 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() { config_->setFlag(nvinfer1::BuilderFlag::kFP16); } - if(use_int8_){ + if(use_int8_) { config_->setFlag(nvinfer1::BuilderFlag::kINT8); config_->setInt8Calibrator(calibrator_); LOG(INFO)<<"config finishes setting calibrator for int8 mode ... "; @@ -246,8 +246,7 @@ void TensorRTBuilder::CleanUp() { #endif builder_->destroy(); for (auto weight : trt_weights_) { - if (weight.type == nvinfer1::DataType::kFLOAT) - { + if (weight.type == nvinfer1::DataType::kFLOAT) { delete[] static_cast(weight.values); }else{ delete[] static_cast(weight.values); diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 1182073f5bf3..534221d67741 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -54,11 +54,6 @@ struct PairHash { } }; -std::string getEnvVar( std::string const & key ) -{ - char * val = getenv( key.c_str() ); - return val == NULL ? std::string("") : std::string(val); -} using namespace tvm::runtime::json; @@ -81,10 +76,9 @@ class TensorRTRuntime : public JSONRuntimeBase { multi_engine_mode_(false) { const bool use_int8 = dmlc::GetEnv("TVM_TENSORRT_USE_INT8", false); if(use_int8){ - std::string num_cali_var("TENSORRT_NUM_CALI_INT8"); - std::string num = getEnvVar(num_cali_var); - num_calibration_batches_remaining_ = stoi(num); - LOG(INFO) << "set up num_calibration_batches_remaining_ : " << num_calibration_batches_remaining_; + const int extract_cali_num = dmlc::GetEnv("TENSORRT_NUM_CALI_INT8", 0); + num_calibration_batches_remaining_ = extract_cali_num; + LOG(INFO) << "settiing up num_calibration_batches_remaining_ as " << num_calibration_batches_remaining_ << " for calibrating data ... "; } } From 7ec05868c6d794e40bc2ee8ca9169c191d553221 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Fri, 27 Aug 2021 04:39:20 +0800 Subject: [PATCH 07/58] fix num_bings --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 534221d67741..f7aa6453fb5e 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -96,7 +96,6 @@ class TensorRTRuntime : public JSONRuntimeBase { * \param consts The constant params from compiled model. */ void Init(const Array& consts) override { - LOG(INFO) << "calling Init function in tensorrt runtime ... "; ICHECK_EQ(consts.size(), const_idx_.size()) << "The number of input constants must match the number of required."; LoadGlobalAttributes(); @@ -151,13 +150,10 @@ class TensorRTRuntime : public JSONRuntimeBase { std::vector binding_sizes(num_bindings, 0); // Setup input bindings. const size_t num_inputs = input_nodes_.size(); - int count_inputs = 0; for (size_t i = 0; i < input_nodes_.size(); ++i) { auto nid = input_nodes_[i]; if (nodes_[nid].GetOpType() == "input") { - for (size_t j = 0; j < nodes_[nid].GetOpShape().size(); ++j) { - count_inputs++; uint32_t eid = EntryID(nid, j); const std::string name = nodes_[nid].GetOpName() + "_" + std::to_string(j); int binding_index = engine->getBindingIndex(name.c_str()); @@ -186,13 +182,13 @@ class TensorRTRuntime : public JSONRuntimeBase { } // add batch data to calibrator - if(num_calibration_batches_remaining_ != 0){ + if(num_calibration_batches_remaining_ > 0){ if(calibrator_ != nullptr){ LOG(INFO) << "starting adding last " << num_calibration_batches_remaining_ << "-th batch data to calibrator"; std::vector input_bindings(bindings.begin(), - bindings.begin() + count_inputs); + bindings.begin() + num_bindings); std::vector input_sizes(binding_sizes.begin(), - binding_sizes.begin() + count_inputs); + binding_sizes.begin() + num_bindings); calibrator_->AddBatchData(input_bindings, input_sizes); num_calibration_batches_remaining_--; } From a39a5a18baad5025bb2751c9012aee36d81250ff Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Fri, 27 Aug 2021 06:36:31 +0800 Subject: [PATCH 08/58] add buildfromjson func --- .../contrib/tensorrt/tensorrt_builder.cc | 4 +-- .../contrib/tensorrt/tensorrt_runtime.cc | 29 ++++++++++--------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.cc b/src/runtime/contrib/tensorrt/tensorrt_builder.cc index 09455ffc1a8c..2fe1e6d83fd4 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.cc @@ -161,7 +161,7 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() { if(use_int8_) { config_->setFlag(nvinfer1::BuilderFlag::kINT8); config_->setInt8Calibrator(calibrator_); - LOG(INFO)<<"config finishes setting calibrator for int8 mode ... "; + LOG(INFO)<<"config finishes setting up calibrator as INT8 mode ... "; } // Add profiles. @@ -185,9 +185,7 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() { } config_->addOptimizationProfile(profile); } - LOG(INFO)<<"start building up engine with new network and builder ... "; nvinfer1::ICudaEngine* engine = builder_->buildEngineWithConfig(*network_, *config_); - LOG(INFO)<<"finished building up engine with new network and builder ... "; #else nvinfer1::ICudaEngine* engine = builder_->buildCudaEngine(*network_); #endif diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index f7aa6453fb5e..7a9e3f536617 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -185,11 +185,9 @@ class TensorRTRuntime : public JSONRuntimeBase { if(num_calibration_batches_remaining_ > 0){ if(calibrator_ != nullptr){ LOG(INFO) << "starting adding last " << num_calibration_batches_remaining_ << "-th batch data to calibrator"; - std::vector input_bindings(bindings.begin(), - bindings.begin() + num_bindings); std::vector input_sizes(binding_sizes.begin(), binding_sizes.begin() + num_bindings); - calibrator_->AddBatchData(input_bindings, input_sizes); + calibrator_->AddBatchData(bindings, input_sizes); num_calibration_batches_remaining_--; } return; @@ -314,7 +312,19 @@ class TensorRTRuntime : public JSONRuntimeBase { const bool use_int8 = (dmlc::GetEnv("TVM_TENSORRT_USE_INT8", 0) != 0); TensorRTEngineAndContext engine_and_context = builder.BuildEngine(); trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; - if(use_int8 == true){ + if(use_int8){ + BuildEngineFromJson(engine_and_context, use_fp16, batch_size); + } + + LOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_ + << " with batch size " << batch_size; + CacheEngineToDisk(); + return trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size)); + } + + + + void BuildEngineFromJson(TensorRTEngineAndContext& engine_and_context, bool use_fp16, int batch_size){ if(calibrator_ == nullptr){ this->CreateCalibratorIfUsingInt8(engine_and_context); } @@ -323,22 +333,15 @@ class TensorRTRuntime : public JSONRuntimeBase { engine_and_context.context->destroy(); engine_and_context.engine->destroy(); - LOG(INFO)<<"rebuild builder using int8 mode"; + LOG(INFO)<<"rebuild builder using INT8 mode"; TensorRTBuilder builder2(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, use_fp16, batch_size, calibrator_.get()); set_up_input_output(builder2); TensorRTEngineAndContext new_engine_and_context = builder2.BuildEngine(); trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = new_engine_and_context; calibrator_.reset(nullptr); - LOG(INFO) <<"finished rebuilding using int8 mode ... "; + LOG(INFO) <<"finished rebuilding using INT8 mode ... "; } - - } - - LOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_ - << " with batch size " << batch_size; - CacheEngineToDisk(); - return trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size)); } From eada412a893172797f2816ed52f4e37a564002d0 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Sat, 28 Aug 2021 07:41:21 +0800 Subject: [PATCH 09/58] change condition --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 7a9e3f536617..1d20b258d0ae 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -265,7 +265,10 @@ class TensorRTRuntime : public JSONRuntimeBase { int batch_size = GetBatchSize(); int compatible_engine_batch_size = -1; bool find_engine_flag = FindCompatibleEngine(batch_size, &compatible_engine_batch_size); - if (find_engine_flag && calibrator_ == nullptr){ + const bool use_int8 = (dmlc::GetEnv("TVM_TENSORRT_USE_INT8", 0) != 0); + if (find_engine_flag && + (!use_int8 || calibrator_ == nullptr + || (calibrator_ != nullptr && num_calibration_batches_remaining_!=0))){ // A compatible engine already exists. return trt_engine_cache_.at(std::make_pair(symbol_name_, compatible_engine_batch_size)); } @@ -308,8 +311,7 @@ class TensorRTRuntime : public JSONRuntimeBase { } // Build engine. - // trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = builder.BuildEngine(); - const bool use_int8 = (dmlc::GetEnv("TVM_TENSORRT_USE_INT8", 0) != 0); + TensorRTEngineAndContext engine_and_context = builder.BuildEngine(); trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; if(use_int8){ From 0a22eff99423c3e28a6fec07387257f9f885127d Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Sat, 28 Aug 2021 08:34:23 +0800 Subject: [PATCH 10/58] reset input and output func --- .../contrib/tensorrt/tensorrt_runtime.cc | 32 +++---------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 1d20b258d0ae..bf970e2e16d7 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -285,33 +285,9 @@ class TensorRTRuntime : public JSONRuntimeBase { use_fp16, batch_size); // Add inputs and constants. - for (size_t i = 0; i < input_nodes_.size(); ++i) { - auto nid = input_nodes_[i]; - const auto& node = nodes_[nid]; - std::string name = node.GetOpName(); - if (node.GetOpType() == "input") { - builder.AddInput(nid, EntryID(nid, 0), node); - } else { - ICHECK_EQ(node.GetOpType(), "const"); - uint32_t eid = EntryID(nid, 0); - builder.AddConstant(nid, data_entry_[eid]); - } - } + SetUpInputAndOutput(builder); - // Add layers. - for (size_t nid = 0; nid < nodes_.size(); ++nid) { - const auto& node = nodes_[nid]; - if (node.GetOpType() != "kernel") continue; - builder.AddLayer(nid, node); - } - - // Add outputs. - for (size_t i = 0; i < outputs_.size(); ++i) { - builder.AddOutput(outputs_[i], EntryID(outputs_[i])); - } - - // Build engine. - + // Build engine. TensorRTEngineAndContext engine_and_context = builder.BuildEngine(); trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; if(use_int8){ @@ -338,7 +314,7 @@ class TensorRTRuntime : public JSONRuntimeBase { LOG(INFO)<<"rebuild builder using INT8 mode"; TensorRTBuilder builder2(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, use_fp16, batch_size, calibrator_.get()); - set_up_input_output(builder2); + SetUpInputAndOutput(builder2); TensorRTEngineAndContext new_engine_and_context = builder2.BuildEngine(); trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = new_engine_and_context; calibrator_.reset(nullptr); @@ -347,7 +323,7 @@ class TensorRTRuntime : public JSONRuntimeBase { } - void set_up_input_output(TensorRTBuilder& builder){ + void SetUpInputAndOutput(TensorRTBuilder& builder){ for (size_t i = 0; i < input_nodes_.size(); ++i) { auto nid = input_nodes_[i]; const auto& node = nodes_[nid]; From 3f9fec27dc97d557527f4a611f6f86840b886849 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Tue, 31 Aug 2021 03:07:35 +0800 Subject: [PATCH 11/58] re-config func --- .../contrib/tensorrt/tensorrt_runtime.cc | 80 +++++++++---------- 1 file changed, 38 insertions(+), 42 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index bf970e2e16d7..7ab6f7125c4b 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -267,8 +267,9 @@ class TensorRTRuntime : public JSONRuntimeBase { bool find_engine_flag = FindCompatibleEngine(batch_size, &compatible_engine_batch_size); const bool use_int8 = (dmlc::GetEnv("TVM_TENSORRT_USE_INT8", 0) != 0); if (find_engine_flag && - (!use_int8 || calibrator_ == nullptr - || (calibrator_ != nullptr && num_calibration_batches_remaining_!=0))){ + (!use_int8 || + calibrator_ == nullptr || + (calibrator_ != nullptr && num_calibration_batches_remaining_!=0))){ // A compatible engine already exists. return trt_engine_cache_.at(std::make_pair(symbol_name_, compatible_engine_batch_size)); } @@ -284,25 +285,13 @@ class TensorRTRuntime : public JSONRuntimeBase { TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, use_fp16, batch_size); - // Add inputs and constants. - SetUpInputAndOutput(builder); - - // Build engine. - TensorRTEngineAndContext engine_and_context = builder.BuildEngine(); - trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; - if(use_int8){ - BuildEngineFromJson(engine_and_context, use_fp16, batch_size); + // Build engine. + if(trt_engine_cache_.find(std::make_pair(symbol_name_, batch_size)) == trt_engine_cache_.end()){ + BuildEngineFromJson(use_fp16, batch_size); } - - LOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_ - << " with batch size " << batch_size; - CacheEngineToDisk(); - return trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size)); - } - - - - void BuildEngineFromJson(TensorRTEngineAndContext& engine_and_context, bool use_fp16, int batch_size){ + + if(use_int8){ + TensorRTEngineAndContext& engine_and_context = trt_engine_cache_[std::make_pair(symbol_name_, batch_size)]; if(calibrator_ == nullptr){ this->CreateCalibratorIfUsingInt8(engine_and_context); } @@ -310,21 +299,27 @@ class TensorRTRuntime : public JSONRuntimeBase { if(num_calibration_batches_remaining_ == 0){ engine_and_context.context->destroy(); engine_and_context.engine->destroy(); - LOG(INFO)<<"rebuild builder using INT8 mode"; - TensorRTBuilder builder2(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, - use_fp16, batch_size, calibrator_.get()); - SetUpInputAndOutput(builder2); - TensorRTEngineAndContext new_engine_and_context = builder2.BuildEngine(); - trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = new_engine_and_context; + BuildEngineFromJson(use_fp16, batch_size); calibrator_.reset(nullptr); LOG(INFO) <<"finished rebuilding using INT8 mode ... "; } + } + + LOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_ + << " with batch size " << batch_size; + CacheEngineToDisk(); + return trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size)); } - void SetUpInputAndOutput(TensorRTBuilder& builder){ - for (size_t i = 0; i < input_nodes_.size(); ++i) { + + void BuildEngineFromJson(bool use_fp16, int batch_size){ + + TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, + use_fp16, batch_size, calibrator_.get()); + + for (size_t i = 0; i < input_nodes_.size(); ++i) { auto nid = input_nodes_[i]; const auto& node = nodes_[nid]; std::string name = node.GetOpName(); @@ -337,20 +332,23 @@ class TensorRTRuntime : public JSONRuntimeBase { } } - // Add layers. - for (size_t nid = 0; nid < nodes_.size(); ++nid) { - const auto& node = nodes_[nid]; - if (node.GetOpType() != "kernel") continue; - builder.AddLayer(nid, node); - } + // Add layers. + for (size_t nid = 0; nid < nodes_.size(); ++nid) { + const auto& node = nodes_[nid]; + if (node.GetOpType() != "kernel") continue; + builder.AddLayer(nid, node); + } - // Add outputs. - for (size_t i = 0; i < outputs_.size(); ++i) { - builder.AddOutput(outputs_[i], EntryID(outputs_[i])); - } - - } + // Add outputs. + for (size_t i = 0; i < outputs_.size(); ++i) { + builder.AddOutput(outputs_[i], EntryID(outputs_[i])); + } + + TensorRTEngineAndContext engine_and_context = builder.BuildEngine(); + trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; + + } /*! \brief If TVM_TENSORRT_CACHE_DIR is set, will check that directory for @@ -385,9 +383,7 @@ class TensorRTRuntime : public JSONRuntimeBase { helper.DeclareField("inputs", &engine_and_context.inputs); helper.DeclareField("outputs", &engine_and_context.outputs); helper.ReadAllFields(&reader); - LOG(INFO) << "reader helper ends"; const int batch_size = GetBatchSize(); - LOG(INFO) << "got batch size"; trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; LOG(INFO) << "finished saving engine and context ... "; return true; From 7f7343c63dcdf834ff73f8ff7d439ab673a9643c Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Tue, 31 Aug 2021 03:55:39 +0800 Subject: [PATCH 12/58] re-added trt version check --- .../contrib/tensorrt/tensorrt_builder.cc | 20 ++++++++++++++----- .../contrib/tensorrt/tensorrt_runtime.cc | 6 +++--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.cc b/src/runtime/contrib/tensorrt/tensorrt_builder.cc index 2fe1e6d83fd4..96ac5cb2c3fe 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.cc @@ -49,11 +49,15 @@ TensorRTBuilder::TensorRTBuilder(TensorRTLogger* logger, // Create TRT builder and network. builder_ = nvinfer1::createInferBuilder(*logger); use_int8_ = false; - // Use INetwork with implicit batch. - builder_->setMaxBatchSize(batch_size_); - builder_->setMaxWorkspaceSize(max_workspace_size_); - builder_->setFp16Mode(use_fp16_); - + +#if TRT_VERSION_GE(6, 0, 1) + // Use INetworkV2. + auto flags = + 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); + if (use_implicit_batch_) { + flags = 0U; + builder_->setMaxBatchSize(batch_size_); + } this->calibrator_ = calibrator; if (calibrator != nullptr) { @@ -62,7 +66,13 @@ TensorRTBuilder::TensorRTBuilder(TensorRTLogger* logger, builder_->setInt8Mode(true); builder_->setInt8Calibrator(calibrator); } + network_ = builder_->createNetworkV2(flags); +#else + builder_->setMaxBatchSize(batch_size_); + builder_->setMaxWorkspaceSize(max_workspace_size_); + builder_->setFp16Mode(use_fp16_); network_ = builder_->createNetwork(); +#endif } void TensorRTBuilder::AddInput(int nid, uint32_t entry_id, const JSONGraphNode& node) { diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 7ab6f7125c4b..f55b6fb1cc1d 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -41,6 +41,7 @@ #include "NvInfer.h" #include "tensorrt_builder.h" #include "tensorrt_calibrator.h" +#include "tensorrt_utils.h" #endif namespace tvm { @@ -77,6 +78,7 @@ class TensorRTRuntime : public JSONRuntimeBase { const bool use_int8 = dmlc::GetEnv("TVM_TENSORRT_USE_INT8", false); if(use_int8){ const int extract_cali_num = dmlc::GetEnv("TENSORRT_NUM_CALI_INT8", 0); + ICHECK(extract_cali_num != 0); num_calibration_batches_remaining_ = extract_cali_num; LOG(INFO) << "settiing up num_calibration_batches_remaining_ as " << num_calibration_batches_remaining_ << " for calibrating data ... "; } @@ -149,7 +151,6 @@ class TensorRTRuntime : public JSONRuntimeBase { std::vector bindings(num_bindings, nullptr); std::vector binding_sizes(num_bindings, 0); // Setup input bindings. - const size_t num_inputs = input_nodes_.size(); for (size_t i = 0; i < input_nodes_.size(); ++i) { auto nid = input_nodes_[i]; if (nodes_[nid].GetOpType() == "input") { @@ -448,8 +449,7 @@ class TensorRTRuntime : public JSONRuntimeBase { } void CreateCalibratorIfUsingInt8(const TensorRTEngineAndContext& engine_and_context) { - LOG(INFO) << "Using INT8. Now in calibration mode, will create inference engine after " << num_calibration_batches_remaining_ << " input batches are provided."; - // Get input names in binding order. + // Get input names in binding order. std::vector input_names; for(size_t i=0; i Date: Tue, 31 Aug 2021 04:09:19 +0800 Subject: [PATCH 13/58] checking sanity --- src/runtime/contrib/tensorrt/tensorrt_builder.cc | 11 +++++------ src/runtime/contrib/tensorrt/tensorrt_builder.h | 2 +- src/runtime/contrib/tensorrt/tensorrt_calibrator.h | 4 +++- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 1 - 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.cc b/src/runtime/contrib/tensorrt/tensorrt_builder.cc index 96ac5cb2c3fe..e4ea015e91ca 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.cc @@ -59,8 +59,7 @@ TensorRTBuilder::TensorRTBuilder(TensorRTLogger* logger, builder_->setMaxBatchSize(batch_size_); } this->calibrator_ = calibrator; - if (calibrator != nullptr) - { + if (calibrator != nullptr){ use_int8_ = true; builder_->setFp16Mode(true); builder_->setInt8Mode(true); @@ -168,10 +167,10 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() { config_->setFlag(nvinfer1::BuilderFlag::kFP16); } - if(use_int8_) { + if (use_int8_) { config_->setFlag(nvinfer1::BuilderFlag::kINT8); config_->setInt8Calibrator(calibrator_); - LOG(INFO)<<"config finishes setting up calibrator as INT8 mode ... "; + LOG(INFO) << "config finishes setting up calibrator as INT8 mode ... "; } // Add profiles. @@ -256,7 +255,7 @@ void TensorRTBuilder::CleanUp() { for (auto weight : trt_weights_) { if (weight.type == nvinfer1::DataType::kFLOAT) { delete[] static_cast(weight.values); - }else{ + } else { delete[] static_cast(weight.values); } } @@ -264,4 +263,4 @@ void TensorRTBuilder::CleanUp() { } // namespace contrib } // namespace runtime -} // namespace tvm \ No newline at end of file +} // namespace tvm diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index 054e409cc005..7b950f0dffb0 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -173,4 +173,4 @@ class TensorRTBuilder { } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_BUILDER_H_ \ No newline at end of file +#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_BUILDER_H_ diff --git a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h index 91b2ce107da3..25a214e4e208 100755 --- a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h +++ b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h @@ -27,6 +27,8 @@ #include "../../cuda/cuda_common.h" #include "NvInfer.h" +#include +#include namespace tvm { namespace runtime { @@ -138,4 +140,4 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ \ No newline at end of file +#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index f55b6fb1cc1d..7c7c94abbac6 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -27,7 +27,6 @@ #include #include -#include #include #include #include From 8566cc6f2bfc359bad5576e5de92d18813edba6e Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Tue, 31 Aug 2021 04:19:55 +0800 Subject: [PATCH 14/58] try to fix sanity issue --- .../contrib/tensorrt/tensorrt_builder.cc | 2 +- .../contrib/tensorrt/tensorrt_builder.h | 8 ++--- .../contrib/tensorrt/tensorrt_runtime.cc | 30 +++++++++---------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.cc b/src/runtime/contrib/tensorrt/tensorrt_builder.cc index e4ea015e91ca..23f7339605df 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.cc @@ -59,7 +59,7 @@ TensorRTBuilder::TensorRTBuilder(TensorRTLogger* logger, builder_->setMaxBatchSize(batch_size_); } this->calibrator_ = calibrator; - if (calibrator != nullptr){ + if (calibrator != nullptr) { use_int8_ = true; builder_->setFp16Mode(true); builder_->setInt8Mode(true); diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index 7b950f0dffb0..7de7b344b9c7 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -27,15 +27,15 @@ #include -#include -#include -#include - #include "../json/json_node.h" #include "NvInfer.h" #include "tensorrt_logger.h" #include "tensorrt_ops.h" +#include +#include +#include + namespace tvm { namespace runtime { namespace contrib { diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 7c7c94abbac6..62497386cdb5 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -75,11 +75,11 @@ class TensorRTRuntime : public JSONRuntimeBase { max_batch_size_(-1), multi_engine_mode_(false) { const bool use_int8 = dmlc::GetEnv("TVM_TENSORRT_USE_INT8", false); - if(use_int8){ + if (use_int8) { const int extract_cali_num = dmlc::GetEnv("TENSORRT_NUM_CALI_INT8", 0); ICHECK(extract_cali_num != 0); num_calibration_batches_remaining_ = extract_cali_num; - LOG(INFO) << "settiing up num_calibration_batches_remaining_ as " << num_calibration_batches_remaining_ << " for calibrating data ... "; + LOG(INFO) << "settiing up " << num_calibration_batches_remaining_ << " sample data to calibrate data ... "; } } @@ -140,7 +140,6 @@ class TensorRTRuntime : public JSONRuntimeBase { /*! \brief Run inference using built engine. */ void Run() override { - auto& engine_and_context = GetOrBuildEngine(); int batch_size = GetBatchSize(); if (batch_size == 0) return; @@ -176,15 +175,15 @@ class TensorRTRuntime : public JSONRuntimeBase { int num_elements = 1; for (int i = 0; i < dims.nbDims; ++i) num_elements *= dims.d[i]; binding_sizes[binding_index] = num_elements; - } } } // add batch data to calibrator if(num_calibration_batches_remaining_ > 0){ - if(calibrator_ != nullptr){ - LOG(INFO) << "starting adding last " << num_calibration_batches_remaining_ << "-th batch data to calibrator"; + if (calibrator_ != nullptr) { + LOG(INFO) << "starting adding last " << num_calibration_batches_remaining_ << + "-th batch data to calibrator"; std::vector input_sizes(binding_sizes.begin(), binding_sizes.begin() + num_bindings); calibrator_->AddBatchData(bindings, input_sizes); @@ -266,10 +265,10 @@ class TensorRTRuntime : public JSONRuntimeBase { int compatible_engine_batch_size = -1; bool find_engine_flag = FindCompatibleEngine(batch_size, &compatible_engine_batch_size); const bool use_int8 = (dmlc::GetEnv("TVM_TENSORRT_USE_INT8", 0) != 0); - if (find_engine_flag && - (!use_int8 || - calibrator_ == nullptr || - (calibrator_ != nullptr && num_calibration_batches_remaining_!=0))){ + if (find_engine_flag && + (!use_int8 || + calibrator_ == nullptr || + (calibrator_ != nullptr && num_calibration_batches_remaining_ != 0))) { // A compatible engine already exists. return trt_engine_cache_.at(std::make_pair(symbol_name_, compatible_engine_batch_size)); } @@ -286,17 +285,18 @@ class TensorRTRuntime : public JSONRuntimeBase { use_fp16, batch_size); // Build engine. - if(trt_engine_cache_.find(std::make_pair(symbol_name_, batch_size)) == trt_engine_cache_.end()){ + if (trt_engine_cache_.find(std::make_pair(symbol_name_, batch_size)) == trt_engine_cache_.end()) { BuildEngineFromJson(use_fp16, batch_size); } - if(use_int8){ - TensorRTEngineAndContext& engine_and_context = trt_engine_cache_[std::make_pair(symbol_name_, batch_size)]; - if(calibrator_ == nullptr){ + if (use_int8) { + TensorRTEngineAndContext& engine_and_context = + trt_engine_cache_[std::make_pair(symbol_name_, batch_size)]; + if (calibrator_ == nullptr) { this->CreateCalibratorIfUsingInt8(engine_and_context); } - if(num_calibration_batches_remaining_ == 0){ + if (num_calibration_batches_remaining_ == 0) { engine_and_context.context->destroy(); engine_and_context.engine->destroy(); LOG(INFO)<<"rebuild builder using INT8 mode"; From 7a1f3ffb95830d9a77fe127f130b5c8f5821ab47 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Tue, 31 Aug 2021 04:33:02 +0800 Subject: [PATCH 15/58] checking sainity --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 62497386cdb5..b2662dca6eef 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -288,21 +288,20 @@ class TensorRTRuntime : public JSONRuntimeBase { if (trt_engine_cache_.find(std::make_pair(symbol_name_, batch_size)) == trt_engine_cache_.end()) { BuildEngineFromJson(use_fp16, batch_size); } - if (use_int8) { - TensorRTEngineAndContext& engine_and_context = + TensorRTEngineAndContext& engine_and_context = trt_engine_cache_[std::make_pair(symbol_name_, batch_size)]; if (calibrator_ == nullptr) { this->CreateCalibratorIfUsingInt8(engine_and_context); } - if (num_calibration_batches_remaining_ == 0) { + if (num_calibration_batches_remaining_ == 0) { engine_and_context.context->destroy(); engine_and_context.engine->destroy(); - LOG(INFO)<<"rebuild builder using INT8 mode"; + LOG(INFO) << "rebuild builder using INT8 mode"; BuildEngineFromJson(use_fp16, batch_size); calibrator_.reset(nullptr); - LOG(INFO) <<"finished rebuilding using INT8 mode ... "; + LOG(INFO) << "finished rebuilding using INT8 mode ... "; } } @@ -315,10 +314,8 @@ class TensorRTRuntime : public JSONRuntimeBase { void BuildEngineFromJson(bool use_fp16, int batch_size){ - TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, use_fp16, batch_size, calibrator_.get()); - for (size_t i = 0; i < input_nodes_.size(); ++i) { auto nid = input_nodes_[i]; const auto& node = nodes_[nid]; @@ -450,16 +447,14 @@ class TensorRTRuntime : public JSONRuntimeBase { void CreateCalibratorIfUsingInt8(const TensorRTEngineAndContext& engine_and_context) { // Get input names in binding order. std::vector input_names; - for(size_t i=0; i, TensorRTEngineAndContext, PairHash> trt_engine_cache_; From 89143490419fce27a8cc8a4944663fff4d3a546c Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Tue, 31 Aug 2021 04:44:32 +0800 Subject: [PATCH 16/58] fixing sanity issue --- src/runtime/contrib/tensorrt/tensorrt_builder.h | 8 ++++---- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 10 ++++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index 7de7b344b9c7..7b950f0dffb0 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -27,15 +27,15 @@ #include +#include +#include +#include + #include "../json/json_node.h" #include "NvInfer.h" #include "tensorrt_logger.h" #include "tensorrt_ops.h" -#include -#include -#include - namespace tvm { namespace runtime { namespace contrib { diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index b2662dca6eef..7809cae9c408 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -180,10 +180,11 @@ class TensorRTRuntime : public JSONRuntimeBase { } // add batch data to calibrator - if(num_calibration_batches_remaining_ > 0){ + if (num_calibration_batches_remaining_ > 0) { if (calibrator_ != nullptr) { - LOG(INFO) << "starting adding last " << num_calibration_batches_remaining_ << - "-th batch data to calibrator"; + LOG(INFO) << "Starting adding last " << + num_calibration_batches_remaining_ << + "-th batch data to the calibrator"; std::vector input_sizes(binding_sizes.begin(), binding_sizes.begin() + num_bindings); calibrator_->AddBatchData(bindings, input_sizes); @@ -285,7 +286,8 @@ class TensorRTRuntime : public JSONRuntimeBase { use_fp16, batch_size); // Build engine. - if (trt_engine_cache_.find(std::make_pair(symbol_name_, batch_size)) == trt_engine_cache_.end()) { + if (trt_engine_cache_.find(std::make_pair(symbol_name_, batch_size)) == + trt_engine_cache_.end()) { BuildEngineFromJson(use_fp16, batch_size); } if (use_int8) { From cb8fe8f12946a51883fd6029110e8a263d3af1ad Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Tue, 31 Aug 2021 04:50:06 +0800 Subject: [PATCH 17/58] fixing sainity issue --- src/runtime/contrib/tensorrt/tensorrt_calibrator.h | 5 +++-- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 10 ++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h index 25a214e4e208..7e57c2947689 100755 --- a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h +++ b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h @@ -25,11 +25,12 @@ #ifndef TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ #define TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ -#include "../../cuda/cuda_common.h" -#include "NvInfer.h" #include #include +#include "../../cuda/cuda_common.h" +#include "NvInfer.h" + namespace tvm { namespace runtime { diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 7809cae9c408..7cfc48ea4dd6 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -79,7 +79,9 @@ class TensorRTRuntime : public JSONRuntimeBase { const int extract_cali_num = dmlc::GetEnv("TENSORRT_NUM_CALI_INT8", 0); ICHECK(extract_cali_num != 0); num_calibration_batches_remaining_ = extract_cali_num; - LOG(INFO) << "settiing up " << num_calibration_batches_remaining_ << " sample data to calibrate data ... "; + LOG(INFO) << "settiing up " << + num_calibration_batches_remaining_ << + " sample data to calibrate data ... "; } } @@ -296,7 +298,6 @@ class TensorRTRuntime : public JSONRuntimeBase { if (calibrator_ == nullptr) { this->CreateCalibratorIfUsingInt8(engine_and_context); } - if (num_calibration_batches_remaining_ == 0) { engine_and_context.context->destroy(); engine_and_context.engine->destroy(); @@ -315,7 +316,7 @@ class TensorRTRuntime : public JSONRuntimeBase { - void BuildEngineFromJson(bool use_fp16, int batch_size){ + void BuildEngineFromJson(bool use_fp16, int batch_size) { TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, use_fp16, batch_size, calibrator_.get()); for (size_t i = 0; i < input_nodes_.size(); ++i) { @@ -343,13 +344,10 @@ class TensorRTRuntime : public JSONRuntimeBase { builder.AddOutput(outputs_[i], EntryID(outputs_[i])); } - TensorRTEngineAndContext engine_and_context = builder.BuildEngine(); trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; - } - /*! \brief If TVM_TENSORRT_CACHE_DIR is set, will check that directory for * already built TRT engines and load into trt_engine_cache_ so they don't * have to be built at first inference. From 6aa6051405a25c7b780dc7286076d97b5a06fd4c Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Tue, 31 Aug 2021 04:53:56 +0800 Subject: [PATCH 18/58] fixing sanity --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 7cfc48ea4dd6..60a97779b139 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -314,8 +314,6 @@ class TensorRTRuntime : public JSONRuntimeBase { return trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size)); } - - void BuildEngineFromJson(bool use_fp16, int batch_size) { TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, use_fp16, batch_size, calibrator_.get()); @@ -347,7 +345,7 @@ class TensorRTRuntime : public JSONRuntimeBase { TensorRTEngineAndContext engine_and_context = builder.BuildEngine(); trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; } - + /*! \brief If TVM_TENSORRT_CACHE_DIR is set, will check that directory for * already built TRT engines and load into trt_engine_cache_ so they don't * have to be built at first inference. @@ -458,7 +456,6 @@ class TensorRTRuntime : public JSONRuntimeBase { /*! \brief Map of function name and max batch size to TRT engine if built already. */ std::unordered_map, TensorRTEngineAndContext, PairHash> trt_engine_cache_; - /*! \brief Calibrator for INT8 mode. */ std::unique_ptr calibrator_; From f51ba116161266ee58299a5a4a290b293d11cd13 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Tue, 31 Aug 2021 05:38:44 +0800 Subject: [PATCH 19/58] clang format fixed --- src/runtime/contrib/tensorrt/tensorrt_builder.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index 7b950f0dffb0..6559e06cde0d 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -72,8 +72,8 @@ class TensorRTBuilder { * \param batch_size If use_implicit_batch, */ TensorRTBuilder(TensorRTLogger* logger, const std::vector& data_entry, - size_t max_workspace_size, bool use_implicit_batch, bool use_fp16, - int batch_size, nvinfer1::IInt8Calibrator* calibrator = nullptr); + size_t max_workspace_size, bool use_implicit_batch, bool use_fp16, int batch_size, + nvinfer1::IInt8Calibrator* calibrator = nullptr); /*! * \brief Add TensorRT input(s) for input node in network definition. From 19e151b63ec834955c766fe3be27d7e4a3a5c23f Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Tue, 31 Aug 2021 05:49:08 +0800 Subject: [PATCH 20/58] clang format fixing --- src/runtime/contrib/tensorrt/tensorrt_calibrator.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h index 7e57c2947689..15fa453c2527 100755 --- a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h +++ b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h @@ -61,8 +61,6 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { const std::vector& binding_sizes) { // Copy data from GPU std::vector data_host(bindings.size(), nullptr); - // LOG(INFO) << "bindings.size() is : " << bindings.size(); - // LOG(INFO) << "binding_sizes.size() is : " << binding_sizes.size(); for (size_t i = 0; i < bindings.size(); ++i) { data_host[i] = new float[batch_size_ * binding_sizes[i]]; CUDA_CALL(cudaMemcpy(static_cast(data_host[i]), bindings[i], From 3b6ef100897202028ac3299fa1bb51fac811ba2b Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Tue, 31 Aug 2021 06:06:38 +0800 Subject: [PATCH 21/58] clean trt cali --- src/runtime/contrib/tensorrt/tensorrt_calibrator.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h index 15fa453c2527..91b2ce107da3 100755 --- a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h +++ b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h @@ -25,9 +25,6 @@ #ifndef TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ #define TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ -#include -#include - #include "../../cuda/cuda_common.h" #include "NvInfer.h" @@ -61,6 +58,8 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { const std::vector& binding_sizes) { // Copy data from GPU std::vector data_host(bindings.size(), nullptr); + // LOG(INFO) << "bindings.size() is : " << bindings.size(); + // LOG(INFO) << "binding_sizes.size() is : " << binding_sizes.size(); for (size_t i = 0; i < bindings.size(); ++i) { data_host[i] = new float[batch_size_ * binding_sizes[i]]; CUDA_CALL(cudaMemcpy(static_cast(data_host[i]), bindings[i], @@ -139,4 +138,4 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { } // namespace runtime } // namespace tvm -#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ +#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ \ No newline at end of file From ecb43e0dd3874876d0a24db1a3c68e4ae2d9adb9 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Tue, 31 Aug 2021 06:26:34 +0800 Subject: [PATCH 22/58] try to fix clang format --- src/runtime/contrib/tensorrt/tensorrt_calibrator.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h index 91b2ce107da3..773ab79dce7f 100755 --- a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h +++ b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h @@ -25,6 +25,9 @@ #ifndef TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ #define TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ +#include +#include + #include "../../cuda/cuda_common.h" #include "NvInfer.h" @@ -137,5 +140,4 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { } // namespace runtime } // namespace tvm - -#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ \ No newline at end of file +#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ From 17bb56624b2497aeea80deb97cba9566a922509e Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Thu, 2 Sep 2021 05:07:47 +0800 Subject: [PATCH 23/58] fixed some comments --- .../contrib/tensorrt/tensorrt_builder.h | 3 +- .../contrib/tensorrt/tensorrt_calibrator.h | 55 +++++++++---------- .../contrib/tensorrt/tensorrt_runtime.cc | 28 ++++------ 3 files changed, 39 insertions(+), 47 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_builder.h b/src/runtime/contrib/tensorrt/tensorrt_builder.h index 6559e06cde0d..bf74630bce7f 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_builder.h +++ b/src/runtime/contrib/tensorrt/tensorrt_builder.h @@ -165,7 +165,8 @@ class TensorRTBuilder { /*! \brief Output names. */ std::vector network_output_names_; - // calibrator pointer + /*! \brief calibrator pointer to add batch data when using int8 mode */ + /*! \brief pointer will be nullptr when it is fp16 or fp32 precision */ nvinfer1::IInt8Calibrator* calibrator_; }; diff --git a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h index 773ab79dce7f..b6d87a6c0975 100755 --- a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h +++ b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h @@ -14,12 +14,10 @@ * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. - */ -/*! -* \file runtime/contrib/tensorrt/tensorrt_builder.h -* \brief Contains TensorRTBuilder class which can be used to convert a relay -* program into a TRT engine which can be used for inference. + * file runtime/contrib/tensorrt/tensorrt_builder.h + * brief Contains TensorRTBuilder class which can be used to convert a relay + * program into a TRT engine which can be used for inference. */ #ifndef TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ @@ -35,16 +33,15 @@ namespace tvm { namespace runtime { class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { - public: +public: TensorRTCalibrator(int batch_size, - const std::vector& input_names) - : batch_size_(batch_size), - num_batches_calibrated_(0), + const std::vector &input_names) + : batch_size_(batch_size), num_batches_calibrated_(0), input_names_(input_names) {} ~TensorRTCalibrator() { // Free calibration data - for (auto& inputs : data_) { + for (auto &inputs : data_) { for (size_t i = 0; i < inputs.size(); ++i) { delete[] inputs[i]; } @@ -55,17 +52,13 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { } } - /* - */ - void AddBatchData(const std::vector& bindings, - const std::vector& binding_sizes) { + void AddBatchData(const std::vector &bindings, + const std::vector &binding_sizes) { // Copy data from GPU - std::vector data_host(bindings.size(), nullptr); - // LOG(INFO) << "bindings.size() is : " << bindings.size(); - // LOG(INFO) << "binding_sizes.size() is : " << binding_sizes.size(); + std::vector data_host(bindings.size(), nullptr); for (size_t i = 0; i < bindings.size(); ++i) { data_host[i] = new float[batch_size_ * binding_sizes[i]]; - CUDA_CALL(cudaMemcpy(static_cast(data_host[i]), bindings[i], + CUDA_CALL(cudaMemcpy(static_cast(data_host[i]), bindings[i], batch_size_ * binding_sizes[i] * sizeof(float), cudaMemcpyDeviceToHost)); } @@ -79,7 +72,7 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { * \brief TensorRT will call this method to get next batch of data to * calibrate with. */ - bool getBatch(void* bindings[], const char* names[], + bool getBatch(void *bindings[], const char *names[], int nbBindings) override { AllocateBuffersIfNotAllocated(); CHECK_EQ(input_names_.size(), nbBindings); @@ -96,17 +89,18 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { return (num_batches_calibrated_ < data_.size()); } - const void* readCalibrationCache(size_t& length) override { - if (calibration_cache_.empty()) return nullptr; + const void *readCalibrationCache(size_t &length) override { + if (calibration_cache_.empty()) + return nullptr; length = calibration_cache_.size(); return calibration_cache_.data(); } - void writeCalibrationCache(const void* cache, size_t length) override { - calibration_cache_.assign(static_cast(cache), length); + void writeCalibrationCache(const void *cache, size_t length) override { + calibration_cache_.assign(static_cast(cache), length); } - private: +private: /*! \brief Batch size. */ int batch_size_; /*! \brief Number of batches already fed to calibrator. */ @@ -115,12 +109,12 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { std::string calibration_cache_; /*! \brief Data to be used for calibration. */ - std::vector> data_; + std::vector> data_; /*! \brief Number of elements for data to be used for calibration. */ std::vector> data_sizes_; /*! \brief Device buffers to be used for calibration. */ - std::vector buffers_; + std::vector buffers_; /*! \brief Names of inputs */ const std::vector input_names_; @@ -128,7 +122,8 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { /*! \brief Allocate device memory buffers. data_sizes_ must already have one * entry. */ void AllocateBuffersIfNotAllocated() { - if (!buffers_.empty()) return; + if (!buffers_.empty()) + return; CHECK_GE(data_sizes_.size(), 1); const int num_inputs = data_sizes_[0].size(); buffers_.assign(num_inputs, nullptr); @@ -138,6 +133,6 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { } }; -} // namespace runtime -} // namespace tvm -#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 60a97779b139..3603d182a14c 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -77,7 +77,9 @@ class TensorRTRuntime : public JSONRuntimeBase { const bool use_int8 = dmlc::GetEnv("TVM_TENSORRT_USE_INT8", false); if (use_int8) { const int extract_cali_num = dmlc::GetEnv("TENSORRT_NUM_CALI_INT8", 0); - ICHECK(extract_cali_num != 0); + ICHECK(extract_cali_num != 0) << "When using INT8 mode, environment variable TENSORRT_NUM_CALI_INT8" + << "must also be set to specify the number of inputs which will be" + << " used for calibration."; num_calibration_batches_remaining_ = extract_cali_num; LOG(INFO) << "settiing up " << num_calibration_batches_remaining_ << @@ -187,9 +189,7 @@ class TensorRTRuntime : public JSONRuntimeBase { LOG(INFO) << "Starting adding last " << num_calibration_batches_remaining_ << "-th batch data to the calibrator"; - std::vector input_sizes(binding_sizes.begin(), - binding_sizes.begin() + num_bindings); - calibrator_->AddBatchData(bindings, input_sizes); + calibrator_->AddBatchData(bindings, binding_sizes); num_calibration_batches_remaining_--; } return; @@ -268,10 +268,8 @@ class TensorRTRuntime : public JSONRuntimeBase { int compatible_engine_batch_size = -1; bool find_engine_flag = FindCompatibleEngine(batch_size, &compatible_engine_batch_size); const bool use_int8 = (dmlc::GetEnv("TVM_TENSORRT_USE_INT8", 0) != 0); - if (find_engine_flag && - (!use_int8 || - calibrator_ == nullptr || - (calibrator_ != nullptr && num_calibration_batches_remaining_ != 0))) { + const bool int8_calibration_not_used_or_not_complete = (calibrator_ != nullptr && num_calibration_batches_remaining_ != 0); + if (find_engine_flag && (!use_int8 || calibrator_ == nullptr || int8_calibration_not_used_or_not_complete)) { // A compatible engine already exists. return trt_engine_cache_.at(std::make_pair(symbol_name_, compatible_engine_batch_size)); } @@ -283,26 +281,23 @@ class TensorRTRuntime : public JSONRuntimeBase { } DLOG(INFO) << "Building new TensorRT engine for subgraph " << symbol_name_ << " with batch size " << batch_size; - const bool use_fp16 = dmlc::GetEnv("TVM_TENSORRT_USE_FP16", false); - TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, - use_fp16, batch_size); // Build engine. if (trt_engine_cache_.find(std::make_pair(symbol_name_, batch_size)) == trt_engine_cache_.end()) { - BuildEngineFromJson(use_fp16, batch_size); + BuildEngineFromJson(batch_size); } if (use_int8) { TensorRTEngineAndContext& engine_and_context = trt_engine_cache_[std::make_pair(symbol_name_, batch_size)]; if (calibrator_ == nullptr) { - this->CreateCalibratorIfUsingInt8(engine_and_context); + this->CreateInt8Calibrator(engine_and_context); } if (num_calibration_batches_remaining_ == 0) { engine_and_context.context->destroy(); engine_and_context.engine->destroy(); LOG(INFO) << "rebuild builder using INT8 mode"; - BuildEngineFromJson(use_fp16, batch_size); + BuildEngineFromJson(batch_size); calibrator_.reset(nullptr); LOG(INFO) << "finished rebuilding using INT8 mode ... "; } @@ -314,7 +309,8 @@ class TensorRTRuntime : public JSONRuntimeBase { return trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size)); } - void BuildEngineFromJson(bool use_fp16, int batch_size) { + void BuildEngineFromJson(int batch_size) { + const bool use_fp16 = dmlc::GetEnv("TVM_TENSORRT_USE_FP16", false); TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, use_fp16, batch_size, calibrator_.get()); for (size_t i = 0; i < input_nodes_.size(); ++i) { @@ -442,7 +438,7 @@ class TensorRTRuntime : public JSONRuntimeBase { return device_buffers_.at(binding_index); } - void CreateCalibratorIfUsingInt8(const TensorRTEngineAndContext& engine_and_context) { + void CreateInt8Calibrator(const TensorRTEngineAndContext& engine_and_context) { // Get input names in binding order. std::vector input_names; for (size_t i = 0; i < engine_and_context.inputs.size(); i++) { From dbd159405768738e3b696ff6a884c0c0a51c0f03 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Fri, 3 Sep 2021 05:12:15 +0800 Subject: [PATCH 24/58] remove double destroy engine codes --- .../contrib/tensorrt/tensorrt_runtime.cc | 23 +++++++------------ 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 3603d182a14c..63886f93e3c0 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -283,24 +283,17 @@ class TensorRTRuntime : public JSONRuntimeBase { << " with batch size " << batch_size; // Build engine. - if (trt_engine_cache_.find(std::make_pair(symbol_name_, batch_size)) == - trt_engine_cache_.end()) { + if (calibrator_ != nullptr && num_calibration_batches_remaining_ == 0) { + // Calibration complete. Delete fp32 engine and build int8 engine BuildEngineFromJson(batch_size); - } - if (use_int8) { - TensorRTEngineAndContext& engine_and_context = - trt_engine_cache_[std::make_pair(symbol_name_, batch_size)]; - if (calibrator_ == nullptr) { + calibrator_.reset(nullptr); + } else { + // Build new engine + BuildEngineFromJson(batch_size); + TensorRTEngineAndContext& engine_and_context = trt_engine_cache_[std::make_pair(symbol_name_, batch_size)]; + if (use_int8) { this->CreateInt8Calibrator(engine_and_context); } - if (num_calibration_batches_remaining_ == 0) { - engine_and_context.context->destroy(); - engine_and_context.engine->destroy(); - LOG(INFO) << "rebuild builder using INT8 mode"; - BuildEngineFromJson(batch_size); - calibrator_.reset(nullptr); - LOG(INFO) << "finished rebuilding using INT8 mode ... "; - } } LOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_ From 411504fea3acc6243cf1a8df49cd9bf28c22d44a Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Fri, 3 Sep 2021 05:14:39 +0800 Subject: [PATCH 25/58] modify comments --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 63886f93e3c0..9d4a4d44297a 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -284,7 +284,7 @@ class TensorRTRuntime : public JSONRuntimeBase { // Build engine. if (calibrator_ != nullptr && num_calibration_batches_remaining_ == 0) { - // Calibration complete. Delete fp32 engine and build int8 engine + // Calibration complete and build int8 engine BuildEngineFromJson(batch_size); calibrator_.reset(nullptr); } else { From 9ec455e6096bf6856f86bc7db4adddca095d9002 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Fri, 3 Sep 2021 05:25:51 +0800 Subject: [PATCH 26/58] add checking function --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 9d4a4d44297a..cd3ce692e8f0 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -75,6 +75,7 @@ class TensorRTRuntime : public JSONRuntimeBase { max_batch_size_(-1), multi_engine_mode_(false) { const bool use_int8 = dmlc::GetEnv("TVM_TENSORRT_USE_INT8", false); + multi_engine_mode_ = dmlc::GetEnv("TVM_TENSORRT_MULTI_ENGINE", false); if (use_int8) { const int extract_cali_num = dmlc::GetEnv("TENSORRT_NUM_CALI_INT8", 0); ICHECK(extract_cali_num != 0) << "When using INT8 mode, environment variable TENSORRT_NUM_CALI_INT8" @@ -84,6 +85,7 @@ class TensorRTRuntime : public JSONRuntimeBase { LOG(INFO) << "settiing up " << num_calibration_batches_remaining_ << " sample data to calibrate data ... "; + ICHECK(multi_engine_mode_ == false) << "When using int8 mode, multi-engine is not allowed"; } } @@ -106,7 +108,6 @@ class TensorRTRuntime : public JSONRuntimeBase { LoadGlobalAttributes(); if (GetCachedEnginesFromDisk()) return; SetupConstants(consts); - multi_engine_mode_ = dmlc::GetEnv("TVM_TENSORRT_MULTI_ENGINE", false); } void LoadGlobalAttributes() { From 55ead8b3b085a7a67119ffea55f0b8bc301de4c5 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 02:47:56 +0800 Subject: [PATCH 27/58] add trt int8 test --- .../python/contrib/test_tensorrt_int8_exp.py | 138 ++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 tests/python/contrib/test_tensorrt_int8_exp.py diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py new file mode 100644 index 000000000000..5254462806c1 --- /dev/null +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -0,0 +1,138 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# FP16 TRT command to run : TVM_TENSORRT_USE_FP16=1 python test_trt.py +# INT8 TRT command to run : TVM_TENSORRT_USE_INT8=1 TENSORRT_NUM_CALI_INT8=10 python test_trt.py +import tvm +from tvm import relay +from tvm.contrib.download import download_testdata +from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt + +# PyTorch imports +import torch +import torchvision +from torchvision import transforms + +# additonal imports +import os +import numpy as np +import cv2 +from PIL import Image +from scipy.spatial import distance + +def compare_tvm_torch_output(tvm_res, torch_res): + tvm_res = tvm_res.flatten() + torch_res = torch_res.flatten() + return np.max(np.abs(tvm_res-torch_res)) + +def cosine_distance(matrix1 , matrix2): + res = distance.cosine(matrix1, matrix2) + return res + +# model_name = "resnet34" +model_name = "mobilenet_v2" +model = getattr(torchvision.models, model_name)(pretrained=True) +model = model.eval() + +# We grab the TorchScripted model via tracing +input_shape = [1, 3, 224, 224] +input_data = torch.randn(input_shape) +scripted_model = torch.jit.trace(model, input_data).eval() +scripted_model.save("mobilenet_v2.pt") + +img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true" +img_path = download_testdata(img_url, "cat.png", module="data") +img = Image.open(img_path).resize((224, 224)) +my_preprocess = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] +) +img = my_preprocess(img) +img = np.expand_dims(img, 0) + +input_name = "input0" +shape_list = [(input_name, img.shape)] +mod, params = relay.frontend.from_pytorch(scripted_model, shape_list) + +# compile the model +target = "cuda" +dev = tvm.cuda(1) +mod, config = partition_for_tensorrt(mod, params) + +print("python script started building --------------") +with tvm.transform.PassContext(opt_level=3, config={'relay.ext.tensorrt.options': config}): + lib = relay.build(mod, target=target, params=params) +print("python script finsihed building -------------------") + + +dtype = "float32" +lib.export_library('compiled.so') +loaded_lib = tvm.runtime.load_module('compiled.so') +gen_module = tvm.contrib.graph_executor.GraphModule(loaded_lib['default'](dev)) + +num_cali_int8 = 0 +try: + num_cali_int8 = os.environ["TENSORRT_NUM_CALI_INT8"] + print("we are going to set {} times calibration in this case".format(num_cali_int8)) +except: + print("no TENSORRT_NUM_CALI_INT8 found in this case ... ") + +num_cali_int8 = int(num_cali_int8) +if num_cali_int8 != 0: + print("calibration steps ... ") + for i in range(num_cali_int8): + tvm_data = tvm.nd.array(img) + gen_module.set_input(input_name, tvm_data) + gen_module.run(data=tvm_data) + print("finished calibration step") + + +# get output of tvm model +print("test run ... ") +tvm_data = tvm.nd.array(img) +gen_module.set_input(input_name, tvm_data) +gen_module.run(data=tvm_data) +out = gen_module.get_output(0) + + +# check output of tvm and output of pytorch model are equal +data_np = img +device = "cuda:1" +torch_data = torch.from_numpy(data_np) +torch_data = torch_data.to(device) +model = scripted_model +model = model.eval() +model = model.to(device) +torch_output = model(torch_data) + + +max_diff = compare_tvm_torch_output(out.numpy(), torch_output.detach().cpu().numpy()) +print("the largest difference between two arrays: {}".format(str(max_diff))) +print("the cosine distance between torch output and trt int8 output of tvm : ") +cosine_distance_res = cosine_distance(out.numpy(), torch_output.detach().cpu().numpy()) +print(cosine_distance_res) + +# Evaluate +print("Evaluate inference time cost...") +ftimer = gen_module.module.time_evaluator("run", dev, repeat=10, min_repeat_ms=500) +prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond +message = "Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)) +print(message) \ No newline at end of file From c613c4546116633146ff9f0520ace90c129feadc Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 03:52:03 +0800 Subject: [PATCH 28/58] update trt int8 test file --- tests/python/contrib/test_tensorrt_int8_exp.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index 5254462806c1..308a30bd6349 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -34,17 +34,12 @@ from PIL import Image from scipy.spatial import distance -def compare_tvm_torch_output(tvm_res, torch_res): - tvm_res = tvm_res.flatten() - torch_res = torch_res.flatten() - return np.max(np.abs(tvm_res-torch_res)) - -def cosine_distance(matrix1 , matrix2): - res = distance.cosine(matrix1, matrix2) +def cosine_distance(a, b): + res = distance.cosine(a, b) return res -# model_name = "resnet34" -model_name = "mobilenet_v2" +# you can change model name into resnet18, mobilenet_v2 ... +model_name = "resnet34" model = getattr(torchvision.models, model_name)(pretrained=True) model = model.eval() @@ -123,9 +118,6 @@ def cosine_distance(matrix1 , matrix2): model = model.to(device) torch_output = model(torch_data) - -max_diff = compare_tvm_torch_output(out.numpy(), torch_output.detach().cpu().numpy()) -print("the largest difference between two arrays: {}".format(str(max_diff))) print("the cosine distance between torch output and trt int8 output of tvm : ") cosine_distance_res = cosine_distance(out.numpy(), torch_output.detach().cpu().numpy()) print(cosine_distance_res) From 0afbcc7328cf83ffd5deb6edc88ee92a4476aa43 Mon Sep 17 00:00:00 2001 From: Cuiqing Li Date: Tue, 7 Sep 2021 14:18:46 -0700 Subject: [PATCH 29/58] Update test_tensorrt_int8_exp.py --- tests/python/contrib/test_tensorrt_int8_exp.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index 308a30bd6349..403108607ee3 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -38,7 +38,6 @@ def cosine_distance(a, b): res = distance.cosine(a, b) return res -# you can change model name into resnet18, mobilenet_v2 ... model_name = "resnet34" model = getattr(torchvision.models, model_name)(pretrained=True) model = model.eval() @@ -127,4 +126,4 @@ def cosine_distance(a, b): ftimer = gen_module.module.time_evaluator("run", dev, repeat=10, min_repeat_ms=500) prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond message = "Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)) -print(message) \ No newline at end of file +print(message) From 2640ab344366a8e16593272fb3459281df64d994 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 10:45:07 +0800 Subject: [PATCH 30/58] update trt int8 fikle --- .../python/contrib/test_tensorrt_int8_exp.py | 191 +++++++++--------- 1 file changed, 93 insertions(+), 98 deletions(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index 308a30bd6349..d4c06ad95c8a 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -14,10 +14,15 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import pytest +import os +import numpy as np +import cv2 +from PIL import Image +from scipy.spatial import distance -# FP16 TRT command to run : TVM_TENSORRT_USE_FP16=1 python test_trt.py -# INT8 TRT command to run : TVM_TENSORRT_USE_INT8=1 TENSORRT_NUM_CALI_INT8=10 python test_trt.py import tvm +import tvm.relay.testing from tvm import relay from tvm.contrib.download import download_testdata from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt @@ -27,104 +32,94 @@ import torchvision from torchvision import transforms -# additonal imports -import os -import numpy as np -import cv2 -from PIL import Image -from scipy.spatial import distance def cosine_distance(a, b): res = distance.cosine(a, b) return res -# you can change model name into resnet18, mobilenet_v2 ... -model_name = "resnet34" -model = getattr(torchvision.models, model_name)(pretrained=True) -model = model.eval() - -# We grab the TorchScripted model via tracing -input_shape = [1, 3, 224, 224] -input_data = torch.randn(input_shape) -scripted_model = torch.jit.trace(model, input_data).eval() -scripted_model.save("mobilenet_v2.pt") - -img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true" -img_path = download_testdata(img_url, "cat.png", module="data") -img = Image.open(img_path).resize((224, 224)) -my_preprocess = transforms.Compose( - [ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ] -) -img = my_preprocess(img) -img = np.expand_dims(img, 0) - -input_name = "input0" -shape_list = [(input_name, img.shape)] -mod, params = relay.frontend.from_pytorch(scripted_model, shape_list) - -# compile the model -target = "cuda" -dev = tvm.cuda(1) -mod, config = partition_for_tensorrt(mod, params) - -print("python script started building --------------") -with tvm.transform.PassContext(opt_level=3, config={'relay.ext.tensorrt.options': config}): - lib = relay.build(mod, target=target, params=params) -print("python script finsihed building -------------------") - - -dtype = "float32" -lib.export_library('compiled.so') -loaded_lib = tvm.runtime.load_module('compiled.so') -gen_module = tvm.contrib.graph_executor.GraphModule(loaded_lib['default'](dev)) - -num_cali_int8 = 0 -try: - num_cali_int8 = os.environ["TENSORRT_NUM_CALI_INT8"] - print("we are going to set {} times calibration in this case".format(num_cali_int8)) -except: - print("no TENSORRT_NUM_CALI_INT8 found in this case ... ") - -num_cali_int8 = int(num_cali_int8) -if num_cali_int8 != 0: - print("calibration steps ... ") - for i in range(num_cali_int8): - tvm_data = tvm.nd.array(img) - gen_module.set_input(input_name, tvm_data) - gen_module.run(data=tvm_data) - print("finished calibration step") - - -# get output of tvm model -print("test run ... ") -tvm_data = tvm.nd.array(img) -gen_module.set_input(input_name, tvm_data) -gen_module.run(data=tvm_data) -out = gen_module.get_output(0) - - -# check output of tvm and output of pytorch model are equal -data_np = img -device = "cuda:1" -torch_data = torch.from_numpy(data_np) -torch_data = torch_data.to(device) -model = scripted_model -model = model.eval() -model = model.to(device) -torch_output = model(torch_data) - -print("the cosine distance between torch output and trt int8 output of tvm : ") -cosine_distance_res = cosine_distance(out.numpy(), torch_output.detach().cpu().numpy()) -print(cosine_distance_res) - -# Evaluate -print("Evaluate inference time cost...") -ftimer = gen_module.module.time_evaluator("run", dev, repeat=10, min_repeat_ms=500) -prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond -message = "Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)) -print(message) \ No newline at end of file +def test_trt_int8(): + """ + This Function is used to use tensorrt int8 to compile a resnet34 model, + and compare cosine distance between the output of the original model and trt int8 tvm ouput + + """ + os.environ["TVM_TENSORRT_USE_INT8"] = "1" + os.environ["TENSORRT_NUM_CALI_INT8"] = "10" + model_name = "resnet34" + model = getattr(torchvision.models, model_name)(pretrained=True) + model = model.eval() + + # We grab the TorchScripted model via tracing + input_shape = [1, 3, 224, 224] + input_data = torch.randn(input_shape) + scripted_model = torch.jit.trace(model, input_data).eval() + + img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true" + img_path = download_testdata(img_url, "cat.png", module="data") + img = Image.open(img_path).resize((224, 224)) + my_preprocess = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ) + img = my_preprocess(img) + img = np.expand_dims(img, 0) + + input_name = "input0" + shape_list = [(input_name, img.shape)] + mod, params = relay.frontend.from_pytorch(scripted_model, shape_list) + + # compile the model + target = "cuda" + dev = tvm.cuda(1) + mod, config = partition_for_tensorrt(mod, params) + with tvm.transform.PassContext(opt_level=3, config={'relay.ext.tensorrt.options': config}): + lib = relay.build(mod, target=target, params=params) + + + dtype = "float32" + lib.export_library('compiled.so') + loaded_lib = tvm.runtime.load_module('compiled.so') + gen_module = tvm.contrib.graph_executor.GraphModule(loaded_lib['default'](dev)) + + num_cali_int8 = int(os.environ["TENSORRT_NUM_CALI_INT8"]) + if num_cali_int8 != 0: + print("start calibrating data ... ") + for i in range(num_cali_int8): + tvm_data = tvm.nd.array(img) + gen_module.set_input(input_name, tvm_data) + gen_module.run(data=tvm_data) + print("finished calibrating data ... ") + + + # get output of tvm model + print("rebuild engine and test to run ... ") + tvm_data = tvm.nd.array(img) + gen_module.set_input(input_name, tvm_data) + gen_module.run(data=tvm_data) + out = gen_module.get_output(0) + + + # check output of tvm and output of pytorch model are equal + torch_data = torch.from_numpy(img) + model = scripted_model.eval() + torch_output = model(torch_data) + + print("the cosine distance between torch output and trt int8 output of tvm : ") + cosine_distance_res = cosine_distance(out.numpy(), torch_output.detach().cpu().numpy()) + assert cosine_distance_res <= 0.02 + + # Evaluate + print("Evaluate inference time cost...") + ftimer = gen_module.module.time_evaluator("run", dev, repeat=10, min_repeat_ms=500) + prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond + message = "Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)) + print(message) + + + +if __name__ == "__main__": + pytest.main([__file__]) From 2e4293a88fc0487b0cc201f3ae7074a4b0d67f05 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 10:48:18 +0800 Subject: [PATCH 31/58] change a little --- tests/python/contrib/test_tensorrt_int8_exp.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index b2e3efa3e1cd..8673df3c89cc 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -108,9 +108,8 @@ def test_trt_int8(): model = scripted_model.eval() torch_output = model(torch_data) - print("the cosine distance between torch output and trt int8 output of tvm : ") cosine_distance_res = cosine_distance(out.numpy(), torch_output.detach().cpu().numpy()) - assert cosine_distance_res <= 0.02 + assert cosine_distance_res <= 0.01 # Evaluate print("Evaluate inference time cost...") From 728276263786c9ece1245f191862f2e7179bf2d5 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 10:55:09 +0800 Subject: [PATCH 32/58] upate trt int8 file --- tests/python/contrib/test_tensorrt_int8_exp.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index 8673df3c89cc..756d2ceaec76 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -81,9 +81,7 @@ def test_trt_int8(): dtype = "float32" - lib.export_library('compiled.so') - loaded_lib = tvm.runtime.load_module('compiled.so') - gen_module = tvm.contrib.graph_executor.GraphModule(loaded_lib['default'](dev)) + gen_module = tvm.contrib.graph_executor.GraphModule(lib['default'](dev)) num_cali_int8 = int(os.environ["TENSORRT_NUM_CALI_INT8"]) if num_cali_int8 != 0: @@ -119,4 +117,5 @@ def test_trt_int8(): print(message) if __name__ == "__main__": - pytest.main([__file__]) + test_trt_int8() + # pytest.main([__file__]) From 5645dad5da90ee48c1661156c2c601ba3a94bee0 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 10:55:57 +0800 Subject: [PATCH 33/58] upate trt int8 file --- tests/python/contrib/test_tensorrt_int8_exp.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index 756d2ceaec76..67067494488f 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -117,5 +117,4 @@ def test_trt_int8(): print(message) if __name__ == "__main__": - test_trt_int8() - # pytest.main([__file__]) + pytest.main([__file__]) From 9756f7b9087a50330455448283dcb84b4713d14b Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 11:42:30 +0800 Subject: [PATCH 34/58] fixing ci --- src/runtime/contrib/tensorrt/tensorrt_calibrator.h | 10 +++++----- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 12 +++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h index b6d87a6c0975..fdb49f71729a 100755 --- a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h +++ b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h @@ -33,7 +33,7 @@ namespace tvm { namespace runtime { class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { -public: + public: TensorRTCalibrator(int batch_size, const std::vector &input_names) : batch_size_(batch_size), num_batches_calibrated_(0), @@ -100,7 +100,7 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { calibration_cache_.assign(static_cast(cache), length); } -private: + private: /*! \brief Batch size. */ int batch_size_; /*! \brief Number of batches already fed to calibrator. */ @@ -133,6 +133,6 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { } }; -} // namespace runtime -} // namespace tvm -#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_CONTRIB_TENSORRT_TENSORRT_CALIBRATOR_H_ diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index cd3ce692e8f0..8272c807d694 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -78,9 +78,9 @@ class TensorRTRuntime : public JSONRuntimeBase { multi_engine_mode_ = dmlc::GetEnv("TVM_TENSORRT_MULTI_ENGINE", false); if (use_int8) { const int extract_cali_num = dmlc::GetEnv("TENSORRT_NUM_CALI_INT8", 0); - ICHECK(extract_cali_num != 0) << "When using INT8 mode, environment variable TENSORRT_NUM_CALI_INT8" - << "must also be set to specify the number of inputs which will be" - << " used for calibration."; + ICHECK(extract_cali_num != 0) << "When using INT8 mode, " + << "environment variable TENSORRT_NUM_CALI_INT8" + << "must also be set to specify the number of calibration times"; num_calibration_batches_remaining_ = extract_cali_num; LOG(INFO) << "settiing up " << num_calibration_batches_remaining_ << @@ -269,8 +269,10 @@ class TensorRTRuntime : public JSONRuntimeBase { int compatible_engine_batch_size = -1; bool find_engine_flag = FindCompatibleEngine(batch_size, &compatible_engine_batch_size); const bool use_int8 = (dmlc::GetEnv("TVM_TENSORRT_USE_INT8", 0) != 0); - const bool int8_calibration_not_used_or_not_complete = (calibrator_ != nullptr && num_calibration_batches_remaining_ != 0); - if (find_engine_flag && (!use_int8 || calibrator_ == nullptr || int8_calibration_not_used_or_not_complete)) { + const bool int8_calibration_not_used_or_not_complete = + (calibrator_ != nullptr && num_calibration_batches_remaining_ != 0); + if (find_engine_flag && (!use_int8 || calibrator_ == nullptr + || int8_calibration_not_used_or_not_complete)) { // A compatible engine already exists. return trt_engine_cache_.at(std::make_pair(symbol_name_, compatible_engine_batch_size)); } From fddbd436704b15be949d31b6fc3cddd67f82c354 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 11:50:14 +0800 Subject: [PATCH 35/58] fixing ci --- .../contrib/tensorrt/tensorrt_runtime.cc | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 8272c807d694..b3018021e40d 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -74,18 +74,19 @@ class TensorRTRuntime : public JSONRuntimeBase { max_workspace_size_(size_t(1) << 30), max_batch_size_(-1), multi_engine_mode_(false) { - const bool use_int8 = dmlc::GetEnv("TVM_TENSORRT_USE_INT8", false); - multi_engine_mode_ = dmlc::GetEnv("TVM_TENSORRT_MULTI_ENGINE", false); - if (use_int8) { - const int extract_cali_num = dmlc::GetEnv("TENSORRT_NUM_CALI_INT8", 0); - ICHECK(extract_cali_num != 0) << "When using INT8 mode, " - << "environment variable TENSORRT_NUM_CALI_INT8" - << "must also be set to specify the number of calibration times"; - num_calibration_batches_remaining_ = extract_cali_num; - LOG(INFO) << "settiing up " << - num_calibration_batches_remaining_ << - " sample data to calibrate data ... "; - ICHECK(multi_engine_mode_ == false) << "When using int8 mode, multi-engine is not allowed"; + const bool use_int8 = dmlc::GetEnv("TVM_TENSORRT_USE_INT8", false); + multi_engine_mode_ = dmlc::GetEnv("TVM_TENSORRT_MULTI_ENGINE", false); + if (use_int8) { + const int extract_cali_num = dmlc::GetEnv("TENSORRT_NUM_CALI_INT8", 0); + ICHECK(extract_cali_num != 0) << "When using INT8 mode, " + << "environment variable TENSORRT_NUM_CALI_INT8" + << "must also be set to specify the number of " + << "calibration times"; + num_calibration_batches_remaining_ = extract_cali_num; + LOG(INFO) << "settiing up " << + num_calibration_batches_remaining_ << + " sample data to calibrate data ... "; + ICHECK(multi_engine_mode_ == false) << "When using int8 mode, multi-engine is not allowed"; } } @@ -271,7 +272,7 @@ class TensorRTRuntime : public JSONRuntimeBase { const bool use_int8 = (dmlc::GetEnv("TVM_TENSORRT_USE_INT8", 0) != 0); const bool int8_calibration_not_used_or_not_complete = (calibrator_ != nullptr && num_calibration_batches_remaining_ != 0); - if (find_engine_flag && (!use_int8 || calibrator_ == nullptr + if (find_engine_flag && (!use_int8 || calibrator_ == nullptr || int8_calibration_not_used_or_not_complete)) { // A compatible engine already exists. return trt_engine_cache_.at(std::make_pair(symbol_name_, compatible_engine_batch_size)); @@ -293,7 +294,8 @@ class TensorRTRuntime : public JSONRuntimeBase { } else { // Build new engine BuildEngineFromJson(batch_size); - TensorRTEngineAndContext& engine_and_context = trt_engine_cache_[std::make_pair(symbol_name_, batch_size)]; + TensorRTEngineAndContext& engine_and_context = + trt_engine_cache_[std::make_pair(symbol_name_, batch_size)]; if (use_int8) { this->CreateInt8Calibrator(engine_and_context); } From 4b56ac8fbeb3cec546eb11385a2fbfec1c266228 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 12:00:12 +0800 Subject: [PATCH 36/58] fixing ci --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index b3018021e40d..50486360c784 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -86,7 +86,8 @@ class TensorRTRuntime : public JSONRuntimeBase { LOG(INFO) << "settiing up " << num_calibration_batches_remaining_ << " sample data to calibrate data ... "; - ICHECK(multi_engine_mode_ == false) << "When using int8 mode, multi-engine is not allowed"; + ICHECK(multi_engine_mode_ == false) << "When using int8 mode, " + << "multi-engine is not allowed"; } } @@ -294,7 +295,7 @@ class TensorRTRuntime : public JSONRuntimeBase { } else { // Build new engine BuildEngineFromJson(batch_size); - TensorRTEngineAndContext& engine_and_context = + TensorRTEngineAndContext& engine_and_context = trt_engine_cache_[std::make_pair(symbol_name_, batch_size)]; if (use_int8) { this->CreateInt8Calibrator(engine_and_context); From 06797a0ffa94f77c8a0a5447e55926c91c98f994 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 12:07:58 +0800 Subject: [PATCH 37/58] fixing ci --- src/runtime/contrib/tensorrt/tensorrt_calibrator.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h index fdb49f71729a..3e2020bfdf06 100755 --- a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h +++ b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h @@ -34,10 +34,8 @@ namespace runtime { class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { public: - TensorRTCalibrator(int batch_size, - const std::vector &input_names) - : batch_size_(batch_size), num_batches_calibrated_(0), - input_names_(input_names) {} + TensorRTCalibrator(int batch_size, const std::vector &input_names) + : batch_size_(batch_size), num_batches_calibrated_(0), input_names_(input_names) {} ~TensorRTCalibrator() { // Free calibration data From 947d22dfff2bb8719a059ac5871194795708a267 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 12:16:26 +0800 Subject: [PATCH 38/58] fixing ci --- .../contrib/tensorrt/tensorrt_calibrator.h | 27 ++++++++----------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h index 3e2020bfdf06..e466f76b6f46 100755 --- a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h +++ b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h @@ -34,12 +34,12 @@ namespace runtime { class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { public: - TensorRTCalibrator(int batch_size, const std::vector &input_names) + TensorRTCalibrator(int batch_size, const std::vector& input_names) : batch_size_(batch_size), num_batches_calibrated_(0), input_names_(input_names) {} ~TensorRTCalibrator() { // Free calibration data - for (auto &inputs : data_) { + for (auto& inputs : data_) { for (size_t i = 0; i < inputs.size(); ++i) { delete[] inputs[i]; } @@ -50,15 +50,13 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { } } - void AddBatchData(const std::vector &bindings, - const std::vector &binding_sizes) { + void AddBatchData(const std::vector& bindings, const std::vector& binding_sizes) { // Copy data from GPU std::vector data_host(bindings.size(), nullptr); for (size_t i = 0; i < bindings.size(); ++i) { data_host[i] = new float[batch_size_ * binding_sizes[i]]; - CUDA_CALL(cudaMemcpy(static_cast(data_host[i]), bindings[i], - batch_size_ * binding_sizes[i] * sizeof(float), - cudaMemcpyDeviceToHost)); + CUDA_CALL(cudaMemcpy(static_cast(data_host[i]), bindings[i], + batch_size_ * binding_sizes[i] * sizeof(float), cudaMemcpyDeviceToHost)); } data_.push_back(data_host); data_sizes_.push_back(binding_sizes); @@ -70,14 +68,12 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { * \brief TensorRT will call this method to get next batch of data to * calibrate with. */ - bool getBatch(void *bindings[], const char *names[], - int nbBindings) override { + bool getBatch(void* bindings[], const char* names[], int nbBindings) override { AllocateBuffersIfNotAllocated(); CHECK_EQ(input_names_.size(), nbBindings); for (size_t i = 0; i < input_names_.size(); ++i) { CHECK_EQ(input_names_[i], names[i]); - CUDA_CALL(cudaMemcpy( - buffers_[i], data_[num_batches_calibrated_][i], + CUDA_CALL(cudaMemcpy(buffers_[i], data_[num_batches_calibrated_][i], batch_size_ * data_sizes_[num_batches_calibrated_][i] * sizeof(float), cudaMemcpyHostToDevice)); bindings[i] = buffers_[i]; @@ -87,15 +83,14 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { return (num_batches_calibrated_ < data_.size()); } - const void *readCalibrationCache(size_t &length) override { - if (calibration_cache_.empty()) - return nullptr; + const void *readCalibrationCache(size_t& length) override { + if (calibration_cache_.empty()) return nullptr; length = calibration_cache_.size(); return calibration_cache_.data(); } - void writeCalibrationCache(const void *cache, size_t length) override { - calibration_cache_.assign(static_cast(cache), length); + void writeCalibrationCache(const void* cache, size_t length) override { + calibration_cache_.assign(static_cast(cache), length); } private: From d77dda0021f85278c422835353bba26f97465d8c Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 12:24:14 +0800 Subject: [PATCH 39/58] fixing ci issue --- .../contrib/tensorrt/tensorrt_calibrator.h | 17 +++++----- .../contrib/tensorrt/tensorrt_runtime.cc | 32 +++++++++---------- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h index e466f76b6f46..1e340d287629 100755 --- a/src/runtime/contrib/tensorrt/tensorrt_calibrator.h +++ b/src/runtime/contrib/tensorrt/tensorrt_calibrator.h @@ -50,9 +50,9 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { } } - void AddBatchData(const std::vector& bindings, const std::vector& binding_sizes) { + void AddBatchData(const std::vector& bindings, const std::vector& binding_sizes) { // Copy data from GPU - std::vector data_host(bindings.size(), nullptr); + std::vector data_host(bindings.size(), nullptr); for (size_t i = 0; i < bindings.size(); ++i) { data_host[i] = new float[batch_size_ * binding_sizes[i]]; CUDA_CALL(cudaMemcpy(static_cast(data_host[i]), bindings[i], @@ -74,8 +74,8 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { for (size_t i = 0; i < input_names_.size(); ++i) { CHECK_EQ(input_names_[i], names[i]); CUDA_CALL(cudaMemcpy(buffers_[i], data_[num_batches_calibrated_][i], - batch_size_ * data_sizes_[num_batches_calibrated_][i] * sizeof(float), - cudaMemcpyHostToDevice)); + batch_size_ * data_sizes_[num_batches_calibrated_][i] * sizeof(float), + cudaMemcpyHostToDevice)); bindings[i] = buffers_[i]; } num_batches_calibrated_++; @@ -83,7 +83,7 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { return (num_batches_calibrated_ < data_.size()); } - const void *readCalibrationCache(size_t& length) override { + const void* readCalibrationCache(size_t& length) override { if (calibration_cache_.empty()) return nullptr; length = calibration_cache_.size(); return calibration_cache_.data(); @@ -102,12 +102,12 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { std::string calibration_cache_; /*! \brief Data to be used for calibration. */ - std::vector> data_; + std::vector> data_; /*! \brief Number of elements for data to be used for calibration. */ std::vector> data_sizes_; /*! \brief Device buffers to be used for calibration. */ - std::vector buffers_; + std::vector buffers_; /*! \brief Names of inputs */ const std::vector input_names_; @@ -115,8 +115,7 @@ class TensorRTCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { /*! \brief Allocate device memory buffers. data_sizes_ must already have one * entry. */ void AllocateBuffersIfNotAllocated() { - if (!buffers_.empty()) - return; + if (!buffers_.empty()) return; CHECK_GE(data_sizes_.size(), 1); const int num_inputs = data_sizes_[0].size(); buffers_.assign(num_inputs, nullptr); diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 50486360c784..7ed1921bc878 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -74,22 +74,22 @@ class TensorRTRuntime : public JSONRuntimeBase { max_workspace_size_(size_t(1) << 30), max_batch_size_(-1), multi_engine_mode_(false) { - const bool use_int8 = dmlc::GetEnv("TVM_TENSORRT_USE_INT8", false); - multi_engine_mode_ = dmlc::GetEnv("TVM_TENSORRT_MULTI_ENGINE", false); - if (use_int8) { - const int extract_cali_num = dmlc::GetEnv("TENSORRT_NUM_CALI_INT8", 0); - ICHECK(extract_cali_num != 0) << "When using INT8 mode, " - << "environment variable TENSORRT_NUM_CALI_INT8" - << "must also be set to specify the number of " - << "calibration times"; - num_calibration_batches_remaining_ = extract_cali_num; - LOG(INFO) << "settiing up " << - num_calibration_batches_remaining_ << - " sample data to calibrate data ... "; - ICHECK(multi_engine_mode_ == false) << "When using int8 mode, " - << "multi-engine is not allowed"; - } - } + const bool use_int8 = dmlc::GetEnv("TVM_TENSORRT_USE_INT8", false); + multi_engine_mode_ = dmlc::GetEnv("TVM_TENSORRT_MULTI_ENGINE", false); + if (use_int8) { + const int extract_cali_num = dmlc::GetEnv("TENSORRT_NUM_CALI_INT8", 0); + ICHECK(extract_cali_num != 0) << "When using INT8 mode, " + << "environment variable TENSORRT_NUM_CALI_INT8" + << "must also be set to specify the number of " + << "calibration times"; + num_calibration_batches_remaining_ = extract_cali_num; + LOG(INFO) << "settiing up " << + num_calibration_batches_remaining_ << + " sample data to calibrate data ... "; + ICHECK(multi_engine_mode_ == false) << "When using int8 mode, " + << "multi-engine is not allowed"; + } + } /*! * \brief The type key of the module. From 9a515f86b6a519364850befbab950ea725c5592c Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 12:32:45 +0800 Subject: [PATCH 40/58] fixing ci issue --- .../contrib/tensorrt/tensorrt_runtime.cc | 68 +++++++++---------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 7ed1921bc878..791d06b48f20 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -83,13 +83,12 @@ class TensorRTRuntime : public JSONRuntimeBase { << "must also be set to specify the number of " << "calibration times"; num_calibration_batches_remaining_ = extract_cali_num; - LOG(INFO) << "settiing up " << - num_calibration_batches_remaining_ << + LOG(INFO) << "settiing up " << num_calibration_batches_remaining_ << " sample data to calibrate data ... "; ICHECK(multi_engine_mode_ == false) << "When using int8 mode, " << "multi-engine is not allowed"; - } } + } /*! * \brief The type key of the module. @@ -189,9 +188,8 @@ class TensorRTRuntime : public JSONRuntimeBase { // add batch data to calibrator if (num_calibration_batches_remaining_ > 0) { if (calibrator_ != nullptr) { - LOG(INFO) << "Starting adding last " << - num_calibration_batches_remaining_ << - "-th batch data to the calibrator"; + LOG(INFO) << "Starting adding last " << num_calibration_batches_remaining_ + << "-th batch data to the calibrator"; calibrator_->AddBatchData(bindings, binding_sizes); num_calibration_batches_remaining_--; } @@ -273,8 +271,8 @@ class TensorRTRuntime : public JSONRuntimeBase { const bool use_int8 = (dmlc::GetEnv("TVM_TENSORRT_USE_INT8", 0) != 0); const bool int8_calibration_not_used_or_not_complete = (calibrator_ != nullptr && num_calibration_batches_remaining_ != 0); - if (find_engine_flag && (!use_int8 || calibrator_ == nullptr - || int8_calibration_not_used_or_not_complete)) { + if (find_engine_flag && + (!use_int8 || calibrator_ == nullptr || int8_calibration_not_used_or_not_complete)) { // A compatible engine already exists. return trt_engine_cache_.at(std::make_pair(symbol_name_, compatible_engine_batch_size)); } @@ -296,49 +294,49 @@ class TensorRTRuntime : public JSONRuntimeBase { // Build new engine BuildEngineFromJson(batch_size); TensorRTEngineAndContext& engine_and_context = - trt_engine_cache_[std::make_pair(symbol_name_, batch_size)]; + trt_engine_cache_[std::make_pair(symbol_name_, batch_size)]; if (use_int8) { this->CreateInt8Calibrator(engine_and_context); } } LOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_ - << " with batch size " << batch_size; + << " with batch size " << batch_size; CacheEngineToDisk(); return trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size)); } void BuildEngineFromJson(int batch_size) { - const bool use_fp16 = dmlc::GetEnv("TVM_TENSORRT_USE_FP16", false); - TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, + const bool use_fp16 = dmlc::GetEnv("TVM_TENSORRT_USE_FP16", false); + TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, use_fp16, batch_size, calibrator_.get()); - for (size_t i = 0; i < input_nodes_.size(); ++i) { - auto nid = input_nodes_[i]; - const auto& node = nodes_[nid]; - std::string name = node.GetOpName(); - if (node.GetOpType() == "input") { - builder.AddInput(nid, EntryID(nid, 0), node); - } else { - ICHECK_EQ(node.GetOpType(), "const"); - uint32_t eid = EntryID(nid, 0); - builder.AddConstant(nid, data_entry_[eid]); - } + for (size_t i = 0; i < input_nodes_.size(); ++i) { + auto nid = input_nodes_[i]; + const auto& node = nodes_[nid]; + std::string name = node.GetOpName(); + if (node.GetOpType() == "input") { + builder.AddInput(nid, EntryID(nid, 0), node); + } else { + ICHECK_EQ(node.GetOpType(), "const"); + uint32_t eid = EntryID(nid, 0); + builder.AddConstant(nid, data_entry_[eid]); } + } - // Add layers. - for (size_t nid = 0; nid < nodes_.size(); ++nid) { - const auto& node = nodes_[nid]; - if (node.GetOpType() != "kernel") continue; - builder.AddLayer(nid, node); - } + // Add layers. + for (size_t nid = 0; nid < nodes_.size(); ++nid) { + const auto& node = nodes_[nid]; + if (node.GetOpType() != "kernel") continue; + builder.AddLayer(nid, node); + } - // Add outputs. - for (size_t i = 0; i < outputs_.size(); ++i) { - builder.AddOutput(outputs_[i], EntryID(outputs_[i])); - } + // Add outputs. + for (size_t i = 0; i < outputs_.size(); ++i) { + builder.AddOutput(outputs_[i], EntryID(outputs_[i])); + } - TensorRTEngineAndContext engine_and_context = builder.BuildEngine(); - trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; + TensorRTEngineAndContext engine_and_context = builder.BuildEngine(); + trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; } /*! \brief If TVM_TENSORRT_CACHE_DIR is set, will check that directory for From 53802b569e051f0a7499cd067bd823251ad577ab Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 12:36:11 +0800 Subject: [PATCH 41/58] fixing ci --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 791d06b48f20..6be9e527fbdf 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -88,7 +88,7 @@ class TensorRTRuntime : public JSONRuntimeBase { ICHECK(multi_engine_mode_ == false) << "When using int8 mode, " << "multi-engine is not allowed"; } - } + } /*! * \brief The type key of the module. From ccb74c7fc593d54c6254d87543e70d1d350d6c55 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 12:42:08 +0800 Subject: [PATCH 42/58] fixing ci issue --- .../contrib/tensorrt/tensorrt_runtime.cc | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 6be9e527fbdf..367c8820ee88 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -83,8 +83,8 @@ class TensorRTRuntime : public JSONRuntimeBase { << "must also be set to specify the number of " << "calibration times"; num_calibration_batches_remaining_ = extract_cali_num; - LOG(INFO) << "settiing up " << num_calibration_batches_remaining_ << - " sample data to calibrate data ... "; + LOG(INFO) << "settiing up " << num_calibration_batches_remaining_ + << " sample data to calibrate data ... "; ICHECK(multi_engine_mode_ == false) << "When using int8 mode, " << "multi-engine is not allowed"; } @@ -196,7 +196,6 @@ class TensorRTRuntime : public JSONRuntimeBase { return; } - // Setup output bindings. for (size_t i = 0; i < outputs_.size(); ++i) { uint32_t eid = EntryID(outputs_[i]); @@ -211,7 +210,6 @@ class TensorRTRuntime : public JSONRuntimeBase { } } - #if TRT_VERSION_GE(6, 0, 1) if (use_implicit_batch_) { ICHECK(context->execute(batch_size, bindings.data())) << "Running TensorRT failed."; @@ -270,7 +268,7 @@ class TensorRTRuntime : public JSONRuntimeBase { bool find_engine_flag = FindCompatibleEngine(batch_size, &compatible_engine_batch_size); const bool use_int8 = (dmlc::GetEnv("TVM_TENSORRT_USE_INT8", 0) != 0); const bool int8_calibration_not_used_or_not_complete = - (calibrator_ != nullptr && num_calibration_batches_remaining_ != 0); + (calibrator_ != nullptr && num_calibration_batches_remaining_ != 0); if (find_engine_flag && (!use_int8 || calibrator_ == nullptr || int8_calibration_not_used_or_not_complete)) { // A compatible engine already exists. @@ -311,17 +309,17 @@ class TensorRTRuntime : public JSONRuntimeBase { TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, use_fp16, batch_size, calibrator_.get()); for (size_t i = 0; i < input_nodes_.size(); ++i) { - auto nid = input_nodes_[i]; - const auto& node = nodes_[nid]; - std::string name = node.GetOpName(); - if (node.GetOpType() == "input") { - builder.AddInput(nid, EntryID(nid, 0), node); - } else { - ICHECK_EQ(node.GetOpType(), "const"); - uint32_t eid = EntryID(nid, 0); - builder.AddConstant(nid, data_entry_[eid]); - } - } + auto nid = input_nodes_[i]; + const auto& node = nodes_[nid]; + std::string name = node.GetOpName(); + if (node.GetOpType() == "input") { + builder.AddInput(nid, EntryID(nid, 0), node); + } else { + ICHECK_EQ(node.GetOpType(), "const"); + uint32_t eid = EntryID(nid, 0); + builder.AddConstant(nid, data_entry_[eid]); + } + } // Add layers. for (size_t nid = 0; nid < nodes_.size(); ++nid) { From c1f0fafb0f05e30de4985adfdaa27f92f5c08b12 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 12:45:05 +0800 Subject: [PATCH 43/58] fixing ci --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 367c8820ee88..67281363937b 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -83,7 +83,7 @@ class TensorRTRuntime : public JSONRuntimeBase { << "must also be set to specify the number of " << "calibration times"; num_calibration_batches_remaining_ = extract_cali_num; - LOG(INFO) << "settiing up " << num_calibration_batches_remaining_ + LOG(INFO) << "settiing up " << num_calibration_batches_remaining_ << " sample data to calibrate data ... "; ICHECK(multi_engine_mode_ == false) << "When using int8 mode, " << "multi-engine is not allowed"; From 81761b462bcfc3a4e5de5ecb281d8921a11f2020 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 12:47:59 +0800 Subject: [PATCH 44/58] fixing ci problem --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 67281363937b..c24f88ebabf1 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -313,11 +313,11 @@ class TensorRTRuntime : public JSONRuntimeBase { const auto& node = nodes_[nid]; std::string name = node.GetOpName(); if (node.GetOpType() == "input") { - builder.AddInput(nid, EntryID(nid, 0), node); + builder.AddInput(nid, EntryID(nid, 0), node); } else { - ICHECK_EQ(node.GetOpType(), "const"); - uint32_t eid = EntryID(nid, 0); - builder.AddConstant(nid, data_entry_[eid]); + ICHECK_EQ(node.GetOpType(), "const"); + uint32_t eid = EntryID(nid, 0); + builder.AddConstant(nid, data_entry_[eid]); } } From bf30e8e81f89efd426499f4075557c8a867955b3 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 12:53:46 +0800 Subject: [PATCH 45/58] fixing ci --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index c24f88ebabf1..11cafcd21f09 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -54,7 +54,6 @@ struct PairHash { } }; - using namespace tvm::runtime::json; class TensorRTRuntime : public JSONRuntimeBase { From 63512ad3eaa339774ae2c641fe4153abde520147 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 13:00:08 +0800 Subject: [PATCH 46/58] upate trt python int8 test file --- tests/python/contrib/test_tensorrt_int8_exp.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index 67067494488f..7863c908f424 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -37,6 +37,7 @@ def cosine_distance(a, b): res = distance.cosine(a, b) return res + def test_trt_int8(): """ This Function is used to use tensorrt int8 to compile a resnet34 model, @@ -79,7 +80,6 @@ def test_trt_int8(): with tvm.transform.PassContext(opt_level=3, config={'relay.ext.tensorrt.options': config}): lib = relay.build(mod, target=target, params=params) - dtype = "float32" gen_module = tvm.contrib.graph_executor.GraphModule(lib['default'](dev)) @@ -92,7 +92,6 @@ def test_trt_int8(): gen_module.run(data=tvm_data) print("finished calibrating data ... ") - # get output of tvm model print("rebuild engine and test to run ... ") tvm_data = tvm.nd.array(img) @@ -100,7 +99,6 @@ def test_trt_int8(): gen_module.run(data=tvm_data) out = gen_module.get_output(0) - # check output of tvm and output of pytorch model are equal torch_data = torch.from_numpy(img) model = scripted_model.eval() @@ -113,8 +111,12 @@ def test_trt_int8(): print("Evaluate inference time cost...") ftimer = gen_module.module.time_evaluator("run", dev, repeat=10, min_repeat_ms=500) prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond - message = "Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)) + message = "Mean inference time (std dev): %.2f ms (%.2f ms)" % ( + np.mean(prof_res), + np.std(prof_res) + ) print(message) + if __name__ == "__main__": pytest.main([__file__]) From d3ac8c9c6f734531649ebf5333a4c9cced203c89 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 13:04:47 +0800 Subject: [PATCH 47/58] fixed ci --- tests/python/contrib/test_tensorrt_int8_exp.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index 7863c908f424..750b9ffde725 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -77,11 +77,11 @@ def test_trt_int8(): target = "cuda" dev = tvm.cuda(1) mod, config = partition_for_tensorrt(mod, params) - with tvm.transform.PassContext(opt_level=3, config={'relay.ext.tensorrt.options': config}): + with tvm.transform.PassContext(opt_level=3, config={"relay.ext.tensorrt.options": config}): lib = relay.build(mod, target=target, params=params) dtype = "float32" - gen_module = tvm.contrib.graph_executor.GraphModule(lib['default'](dev)) + gen_module = tvm.contrib.graph_executor.GraphModule(lib["default"](dev)) num_cali_int8 = int(os.environ["TENSORRT_NUM_CALI_INT8"]) if num_cali_int8 != 0: @@ -92,7 +92,7 @@ def test_trt_int8(): gen_module.run(data=tvm_data) print("finished calibrating data ... ") - # get output of tvm model + # get output of tvm model print("rebuild engine and test to run ... ") tvm_data = tvm.nd.array(img) gen_module.set_input(input_name, tvm_data) @@ -113,7 +113,7 @@ def test_trt_int8(): prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond message = "Mean inference time (std dev): %.2f ms (%.2f ms)" % ( np.mean(prof_res), - np.std(prof_res) + np.std(prof_res), ) print(message) From ac589799c1c1ef7082ad0db4264feeb7d6671f8c Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 13:07:24 +0800 Subject: [PATCH 48/58] fixed ci --- tests/python/contrib/test_tensorrt_int8_exp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index 750b9ffde725..a732854a1b55 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -112,7 +112,7 @@ def test_trt_int8(): ftimer = gen_module.module.time_evaluator("run", dev, repeat=10, min_repeat_ms=500) prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond message = "Mean inference time (std dev): %.2f ms (%.2f ms)" % ( - np.mean(prof_res), + np.mean(prof_res), np.std(prof_res), ) print(message) From 90eabe1cbf1027b163c8aba3aa0410dd2e135ba6 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 14:09:50 +0800 Subject: [PATCH 49/58] fix gpu build --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 11cafcd21f09..23b0adf0670c 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -75,13 +75,12 @@ class TensorRTRuntime : public JSONRuntimeBase { multi_engine_mode_(false) { const bool use_int8 = dmlc::GetEnv("TVM_TENSORRT_USE_INT8", false); multi_engine_mode_ = dmlc::GetEnv("TVM_TENSORRT_MULTI_ENGINE", false); + num_calibration_batches_remaining_ = dmlc::GetEnv("TENSORRT_NUM_CALI_INT8", 0); if (use_int8) { - const int extract_cali_num = dmlc::GetEnv("TENSORRT_NUM_CALI_INT8", 0); - ICHECK(extract_cali_num != 0) << "When using INT8 mode, " + ICHECK(num_calibration_batches_remaining_ != 0) << "When using INT8 mode, " << "environment variable TENSORRT_NUM_CALI_INT8" << "must also be set to specify the number of " << "calibration times"; - num_calibration_batches_remaining_ = extract_cali_num; LOG(INFO) << "settiing up " << num_calibration_batches_remaining_ << " sample data to calibrate data ... "; ICHECK(multi_engine_mode_ == false) << "When using int8 mode, " @@ -449,8 +448,6 @@ class TensorRTRuntime : public JSONRuntimeBase { /*! \brief Calibrator for INT8 mode. */ std::unique_ptr calibrator_; - /*! \brief Number of calibration batches until we are done. */ - int num_calibration_batches_remaining_; /*! \brief Map of inding index to GPU buffers for inputs and outputs. Only used when target device * is not "cuda". Since TensorRT execution can only read data from GPU, we need to copy data from @@ -481,6 +478,9 @@ class TensorRTRuntime : public JSONRuntimeBase { size_t max_workspace_size_; + /*! \brief Number of calibration batches until we are done. */ + int num_calibration_batches_remaining_; + /*! \brief Highest batch size that an engine has been built for, used in single-engine mode only * (multi_engine_mode=false). */ int max_batch_size_; From 89c8eeb687155e2391f7c43d10024c7351eb067c Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 14:13:43 +0800 Subject: [PATCH 50/58] fixed ci --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 23b0adf0670c..a5779f739dac 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -77,10 +77,11 @@ class TensorRTRuntime : public JSONRuntimeBase { multi_engine_mode_ = dmlc::GetEnv("TVM_TENSORRT_MULTI_ENGINE", false); num_calibration_batches_remaining_ = dmlc::GetEnv("TENSORRT_NUM_CALI_INT8", 0); if (use_int8) { - ICHECK(num_calibration_batches_remaining_ != 0) << "When using INT8 mode, " - << "environment variable TENSORRT_NUM_CALI_INT8" - << "must also be set to specify the number of " - << "calibration times"; + ICHECK(num_calibration_batches_remaining_ != 0) + << "When using INT8 mode, " + << "environment variable TENSORRT_NUM_CALI_INT8" + << "must also be set to specify the number of " + << "calibration times"; LOG(INFO) << "settiing up " << num_calibration_batches_remaining_ << " sample data to calibrate data ... "; ICHECK(multi_engine_mode_ == false) << "When using int8 mode, " From 30648d6c87fe7f5e8e6af2f948eb6d983108ffbc Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 15:39:44 +0800 Subject: [PATCH 51/58] update trt int8 test file --- .../python/contrib/test_tensorrt_int8_exp.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index a732854a1b55..dbbf37a0fe86 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -31,8 +31,29 @@ import torch import torchvision from torchvision import transforms +from test_tensorrt import +def skip_codegen_test(): + """Skip test if TensorRT and CUDA codegen are not present""" + if not tvm.runtime.enabled("cuda") or not tvm.cuda(0).exist: + print("Skip because CUDA is not enabled.") + return True + if not tvm.get_global_func("relay.ext.tensorrt", True): + print("Skip because TensorRT codegen is not available.") + return True + return False + + +def skip_runtime_test(): + if not tvm.runtime.enabled("cuda") or not tvm.cuda(0).exist: + print("Skip because CUDA is not enabled.") + return True + if not tensorrt.is_tensorrt_runtime_enabled(): + print("Skip because TensorRT runtime is not available.") + return True + return False + def cosine_distance(a, b): res = distance.cosine(a, b) return res @@ -44,6 +65,9 @@ def test_trt_int8(): and compare cosine distance between the output of the original model and trt int8 tvm ouput """ + if skip_codegen_test() or skip_runtime_test(): + return + os.environ["TVM_TENSORRT_USE_INT8"] = "1" os.environ["TENSORRT_NUM_CALI_INT8"] = "10" model_name = "resnet34" From 3c85b9a32096fc31210a46c87d4f4b5c564d3b97 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 15:48:02 +0800 Subject: [PATCH 52/58] fix bug --- tests/python/contrib/test_tensorrt_int8_exp.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index dbbf37a0fe86..58f9ae29624b 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -31,7 +31,6 @@ import torch import torchvision from torchvision import transforms -from test_tensorrt import def skip_codegen_test(): @@ -67,7 +66,7 @@ def test_trt_int8(): """ if skip_codegen_test() or skip_runtime_test(): return - + os.environ["TVM_TENSORRT_USE_INT8"] = "1" os.environ["TENSORRT_NUM_CALI_INT8"] = "10" model_name = "resnet34" From 41be39a4333c1d4aea73752ac044c9cf6755e520 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 15:50:36 +0800 Subject: [PATCH 53/58] fix bug --- tests/python/contrib/test_tensorrt_int8_exp.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index 58f9ae29624b..50de0d2f2a57 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -26,6 +26,7 @@ from tvm import relay from tvm.contrib.download import download_testdata from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt +from tvm.relay.op.contrib import tensorrt # PyTorch imports import torch @@ -142,4 +143,5 @@ def test_trt_int8(): if __name__ == "__main__": - pytest.main([__file__]) + test_trt_int8() + # pytest.main([__file__]) From c7d2bcc199e37736e0def082e5cbd6a265db0686 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 15:53:30 +0800 Subject: [PATCH 54/58] update trtint8 file --- tests/python/contrib/test_tensorrt_int8_exp.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index 50de0d2f2a57..eeb219da3a79 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -143,5 +143,4 @@ def test_trt_int8(): if __name__ == "__main__": - test_trt_int8() - # pytest.main([__file__]) + pytest.main([__file__]) From 2d8ce428b6a9f7d967edf7c13bc4abf539f4e349 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 15:57:38 +0800 Subject: [PATCH 55/58] reformat --- tests/python/contrib/test_tensorrt_int8_exp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index eeb219da3a79..5b18995d331d 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -54,6 +54,7 @@ def skip_runtime_test(): return True return False + def cosine_distance(a, b): res = distance.cosine(a, b) return res From 63cf965cd3154820b634b2caa5fc76c5fac69cd7 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 17:36:38 +0800 Subject: [PATCH 56/58] update trt int8 file --- .../python/contrib/test_tensorrt_int8_exp.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index 5b18995d331d..ed25d1533647 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -17,9 +17,6 @@ import pytest import os import numpy as np -import cv2 -from PIL import Image -from scipy.spatial import distance import tvm import tvm.relay.testing @@ -55,11 +52,6 @@ def skip_runtime_test(): return False -def cosine_distance(a, b): - res = distance.cosine(a, b) - return res - - def test_trt_int8(): """ This Function is used to use tensorrt int8 to compile a resnet34 model, @@ -68,7 +60,14 @@ def test_trt_int8(): """ if skip_codegen_test() or skip_runtime_test(): return - + + try: + from PIL import Image + from scipy.spatial import distance + except: + print("install scipy and Image python package") + return + os.environ["TVM_TENSORRT_USE_INT8"] = "1" os.environ["TENSORRT_NUM_CALI_INT8"] = "10" model_name = "resnet34" @@ -129,7 +128,7 @@ def test_trt_int8(): model = scripted_model.eval() torch_output = model(torch_data) - cosine_distance_res = cosine_distance(out.numpy(), torch_output.detach().cpu().numpy()) + cosine_distance_res = distance.cosine(out.numpy(), torch_output.detach().cpu().numpy()) assert cosine_distance_res <= 0.01 # Evaluate From 5685800dcb104e45714b2f567f5bffd49b9da29d Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Wed, 8 Sep 2021 17:40:11 +0800 Subject: [PATCH 57/58] update --- tests/python/contrib/test_tensorrt_int8_exp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index ed25d1533647..ed79b2e5e2e7 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -60,14 +60,14 @@ def test_trt_int8(): """ if skip_codegen_test() or skip_runtime_test(): return - + try: from PIL import Image from scipy.spatial import distance except: print("install scipy and Image python package") return - + os.environ["TVM_TENSORRT_USE_INT8"] = "1" os.environ["TENSORRT_NUM_CALI_INT8"] = "10" model_name = "resnet34" From c0e931d1b50f70b9da6c1e2c7b92ac170d79a090 Mon Sep 17 00:00:00 2001 From: "cuiqing.li" Date: Thu, 9 Sep 2021 01:33:07 +0800 Subject: [PATCH 58/58] modify --- tests/python/contrib/test_tensorrt_int8_exp.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/python/contrib/test_tensorrt_int8_exp.py b/tests/python/contrib/test_tensorrt_int8_exp.py index ed79b2e5e2e7..84360e92d33b 100644 --- a/tests/python/contrib/test_tensorrt_int8_exp.py +++ b/tests/python/contrib/test_tensorrt_int8_exp.py @@ -25,11 +25,6 @@ from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt from tvm.relay.op.contrib import tensorrt -# PyTorch imports -import torch -import torchvision -from torchvision import transforms - def skip_codegen_test(): """Skip test if TensorRT and CUDA codegen are not present""" @@ -65,7 +60,15 @@ def test_trt_int8(): from PIL import Image from scipy.spatial import distance except: - print("install scipy and Image python package") + print("please install scipy and Image python packages") + return + + try: + import torch + import torchvision + from torchvision import transforms + except: + print("please install pytorch python package") return os.environ["TVM_TENSORRT_USE_INT8"] = "1"