From 2a3e8357a1801b486d3f0f3d49a9f2bc8c7e00f5 Mon Sep 17 00:00:00 2001 From: caenorst Date: Mon, 6 May 2019 16:36:59 -0400 Subject: [PATCH 1/3] prevent TRT_Logger to be destroyed before TRT engine --- .../subgraph/tensorrt/onnx_to_tensorrt.cc | 10 ++--- .../subgraph/tensorrt/onnx_to_tensorrt.h | 43 ++++++++----------- src/operator/subgraph/tensorrt/tensorrt-inl.h | 4 ++ src/operator/subgraph/tensorrt/tensorrt.cc | 4 +- 4 files changed, 31 insertions(+), 30 deletions(-) diff --git a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc index 7dbc54bc1a63..82ab52cc9f23 100644 --- a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc +++ b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc @@ -83,7 +83,7 @@ void PrintVersion() { << NV_TENSORRT_PATCH << endl; } -std::tuple onnxToTrtCtx( +std::tuple onnxToTrtCtx( const std::string& onnx_model, int32_t max_batch_size, size_t max_workspace_size, @@ -91,10 +91,10 @@ std::tuple onnxToTrtCtx( bool debug_builder) { GOOGLE_PROTOBUF_VERIFY_VERSION; - TRT_Logger trt_logger(verbosity); - auto trt_builder = InferObject(nvinfer1::createInferBuilder(trt_logger)); + TRT_Logger* trt_logger = new TRT_Logger(verbosity); + auto trt_builder = InferObject(nvinfer1::createInferBuilder(*trt_logger)); auto trt_network = InferObject(trt_builder->createNetwork()); - auto trt_parser = nvonnxparser::createParser(trt_network.get(), trt_logger); + auto trt_parser = nvonnxparser::createParser(trt_network.get(), *trt_logger); ::ONNX_NAMESPACE::ModelProto parsed_model; // We check for a valid parse, but the main effect is the side effect // of populating parsed_model @@ -140,7 +140,7 @@ std::tuple onnxToTrtCtx( trt_builder->setMaxWorkspaceSize(max_workspace_size); trt_builder->setDebugSync(debug_builder); nvinfer1::ICudaEngine* trt_engine = trt_builder->buildCudaEngine(*trt_network.get()); - return std::make_tuple(trt_engine, trt_parser); + return std::make_tuple(trt_engine, trt_parser, trt_logger); } } // namespace onnx_to_tensorrt diff --git a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h index 3e8ea1bf9ee1..73add5f23980 100644 --- a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h +++ b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h @@ -41,32 +41,28 @@ namespace onnx_to_tensorrt { class TRT_Logger : public nvinfer1::ILogger { - nvinfer1::ILogger::Severity _verbosity; - std::ostream* _ostream; + nvinfer1::ILogger::Severity _verbosity; + std::ostream* _ostream; public: - TRT_Logger(Severity verbosity = Severity::kWARNING, - std::ostream& ostream = std::cout) - : _verbosity(verbosity), _ostream(&ostream) {} - void log(Severity severity, const char* msg) override { - if ( severity <= _verbosity ) { - time_t rawtime = std::time(0); - char buf[256]; - strftime(&buf[0], 256, - "%Y-%m-%d %H:%M:%S", - std::gmtime(&rawtime)); - const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? " BUG" : - severity == Severity::kERROR ? " ERROR" : - severity == Severity::kWARNING ? "WARNING" : - severity == Severity::kINFO ? " INFO" : - "UNKNOWN"); - (*_ostream) << "[" << buf << " " << sevstr << "] " - << msg - << std::endl; - } - } + TRT_Logger(Severity verbosity = Severity::kWARNING, + std::ostream& ostream = std::cout) : + _verbosity(verbosity), _ostream(&ostream) {} + void log(Severity severity, const char* msg) override { + if (severity <= _verbosity) { + time_t rawtime = std::time(0); + char buf[256]; + strftime(&buf[0], 256, "%Y-%m-%d %H:%M:%S", std::gmtime(&rawtime)); + const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? " BUG" : + severity == Severity::kERROR ? " ERROR" : + severity == Severity::kWARNING ? "WARNING" : + severity == Severity::kINFO ? " INFO" : + "UNKNOWN"); + (*_ostream) << "[" << buf << " " << sevstr << "] " << msg << std::endl; + } + } }; -std::tuple onnxToTrtCtx( +std::tuple onnxToTrtCtx( const std::string& onnx_model, int32_t max_batch_size = 32, size_t max_workspace_size = 1L << 30, @@ -75,5 +71,4 @@ std::tuple onnxToTrtCtx( } // namespace onnx_to_tensorrt #endif // MXNET_USE_TENSORRT - #endif // MXNET_OPERATOR_SUBGRAPH_TENSORRT_ONNX_TO_TENSORRT_H_ diff --git a/src/operator/subgraph/tensorrt/tensorrt-inl.h b/src/operator/subgraph/tensorrt/tensorrt-inl.h index e258d892aaba..c24d94dc01cb 100644 --- a/src/operator/subgraph/tensorrt/tensorrt-inl.h +++ b/src/operator/subgraph/tensorrt/tensorrt-inl.h @@ -53,6 +53,7 @@ struct TRTParam { struct TRTEngineParam { TRTEngineParam(nvinfer1::ICudaEngine* trt_engine, nvonnxparser::IParser* _parser, + onnx_to_tensorrt::TRT_Logger* _logger, const std::unordered_map input_map, const std::unordered_map output_map) { binding_order = std::make_shared > >(); @@ -67,6 +68,7 @@ struct TRTEngineParam { binding_order->emplace_back(output_map.at(binding_name), false); } } + trt_logger = _logger; trt_executor = trt_engine->createExecutionContext(); trt_parser = _parser; } @@ -74,9 +76,11 @@ struct TRTEngineParam { ~TRTEngineParam() { trt_parser->destroy(); trt_executor->destroy(); + delete trt_logger; } nvinfer1::IExecutionContext* trt_executor; nvonnxparser::IParser* trt_parser; + onnx_to_tensorrt::TRT_Logger* trt_logger; std::shared_ptr > > binding_order; std::shared_ptr > bindings; }; diff --git a/src/operator/subgraph/tensorrt/tensorrt.cc b/src/operator/subgraph/tensorrt/tensorrt.cc index 30fcee007cfc..85d16b7e5169 100644 --- a/src/operator/subgraph/tensorrt/tensorrt.cc +++ b/src/operator/subgraph/tensorrt/tensorrt.cc @@ -312,7 +312,9 @@ OpStatePtr TRTCreateState(const nnvm::NodeAttrs& attrs, Context ctx, graph.attrs["shape"] = std::make_shared(std::move(shapes)); auto onnx_graph = op::nnvm_to_onnx::ConvertNnvmGraphToOnnx(graph, ¶ms_map); auto trt_tuple = ::onnx_to_tensorrt::onnxToTrtCtx(onnx_graph, max_batch_size, 1 << 30); - return OpStatePtr::Create(std::get<0>(trt_tuple), std::get<1>(trt_tuple), + return OpStatePtr::Create(std::get<0>(trt_tuple), + std::get<1>(trt_tuple), + std::get<2>(trt_tuple), inputs_to_idx, outputs_to_idx); } From a6d099d4a0794aa03fdb262b32b2fa8ad352991e Mon Sep 17 00:00:00 2001 From: cfujitsang Date: Thu, 27 Jun 2019 23:51:52 -0400 Subject: [PATCH 2/3] use unique_ptr for trt_logger/parser/engine/executor ownership --- .../subgraph/tensorrt/onnx_to_tensorrt.cc | 35 ++++++------------- .../subgraph/tensorrt/onnx_to_tensorrt.h | 25 ++++++++++++- src/operator/subgraph/tensorrt/tensorrt-inl.h | 29 +++++++-------- src/operator/subgraph/tensorrt/tensorrt.cc | 6 ++-- 4 files changed, 51 insertions(+), 44 deletions(-) diff --git a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc index 82ab52cc9f23..7f087b1e140e 100644 --- a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc +++ b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc @@ -48,23 +48,6 @@ using std::endl; namespace onnx_to_tensorrt { -struct InferDeleter { - template - void operator()(T* obj) const { - if ( obj ) { - obj->destroy(); - } - } -}; - -template -inline std::shared_ptr InferObject(T* obj) { - if ( !obj ) { - throw std::runtime_error("Failed to create object"); - } - return std::shared_ptr(obj, InferDeleter()); -} - std::string onnx_ir_version_string(int64_t ir_version = onnx::IR_VERSION) { int onnx_ir_major = ir_version / 1000000; int onnx_ir_minor = ir_version % 1000000 / 10000; @@ -83,7 +66,9 @@ void PrintVersion() { << NV_TENSORRT_PATCH << endl; } -std::tuple onnxToTrtCtx( +std::tuple, + unique_ptr, + std::unique_ptr > onnxToTrtCtx( const std::string& onnx_model, int32_t max_batch_size, size_t max_workspace_size, @@ -91,10 +76,10 @@ std::tuple onnxToTr bool debug_builder) { GOOGLE_PROTOBUF_VERIFY_VERSION; - TRT_Logger* trt_logger = new TRT_Logger(verbosity); - auto trt_builder = InferObject(nvinfer1::createInferBuilder(*trt_logger)); - auto trt_network = InferObject(trt_builder->createNetwork()); - auto trt_parser = nvonnxparser::createParser(trt_network.get(), *trt_logger); + auto trt_logger = std::unique_ptr(new TRT_Logger(verbosity)); + auto trt_builder = nvinfer1::createInferBuilder(*trt_logger); + auto trt_network = trt_builder->createNetwork(); + auto trt_parser = InferObject(nvonnxparser::createParser(trt_network, *trt_logger)); ::ONNX_NAMESPACE::ModelProto parsed_model; // We check for a valid parse, but the main effect is the side effect // of populating parsed_model @@ -139,8 +124,10 @@ std::tuple onnxToTr trt_builder->setMaxBatchSize(max_batch_size); trt_builder->setMaxWorkspaceSize(max_workspace_size); trt_builder->setDebugSync(debug_builder); - nvinfer1::ICudaEngine* trt_engine = trt_builder->buildCudaEngine(*trt_network.get()); - return std::make_tuple(trt_engine, trt_parser, trt_logger); + unique_ptr trt_engine = InferObject(trt_builder->buildCudaEngine(*trt_network)); + trt_builder->destroy(); + trt_network->destroy(); + return std::make_tuple(std::move(trt_engine), std::move(trt_parser), std::move(trt_logger)); } } // namespace onnx_to_tensorrt diff --git a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h index 73add5f23980..b89422f59069 100644 --- a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h +++ b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.h @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -40,6 +41,26 @@ namespace onnx_to_tensorrt { +struct InferDeleter { + template + void operator()(T* obj) const { + if ( obj ) { + obj->destroy(); + } + } +}; + +template +using unique_ptr = std::unique_ptr; + +template +inline unique_ptr InferObject(T* obj) { + if ( !obj ) { + throw std::runtime_error("Failed to create object"); + } + return unique_ptr(obj, InferDeleter()); +} + class TRT_Logger : public nvinfer1::ILogger { nvinfer1::ILogger::Severity _verbosity; std::ostream* _ostream; @@ -62,7 +83,9 @@ class TRT_Logger : public nvinfer1::ILogger { } }; -std::tuple onnxToTrtCtx( +std::tuple, + unique_ptr, + std::unique_ptr > onnxToTrtCtx( const std::string& onnx_model, int32_t max_batch_size = 32, size_t max_workspace_size = 1L << 30, diff --git a/src/operator/subgraph/tensorrt/tensorrt-inl.h b/src/operator/subgraph/tensorrt/tensorrt-inl.h index c24d94dc01cb..c175ac4d2aa3 100644 --- a/src/operator/subgraph/tensorrt/tensorrt-inl.h +++ b/src/operator/subgraph/tensorrt/tensorrt-inl.h @@ -51,11 +51,14 @@ struct TRTParam { }; struct TRTEngineParam { - TRTEngineParam(nvinfer1::ICudaEngine* trt_engine, - nvonnxparser::IParser* _parser, - onnx_to_tensorrt::TRT_Logger* _logger, - const std::unordered_map input_map, - const std::unordered_map output_map) { + TRTEngineParam(onnx_to_tensorrt::unique_ptr _trt_engine, + onnx_to_tensorrt::unique_ptr _trt_parser, + std::unique_ptr _trt_logger, + const std::unordered_map& input_map, + const std::unordered_map& output_map) { + trt_engine = std::move(_trt_engine); + trt_logger = std::move(_trt_logger); + trt_parser = std::move(_trt_parser); binding_order = std::make_shared > >(); bindings = std::make_shared >(); binding_order->reserve(trt_engine->getNbBindings()); @@ -68,19 +71,13 @@ struct TRTEngineParam { binding_order->emplace_back(output_map.at(binding_name), false); } } - trt_logger = _logger; - trt_executor = trt_engine->createExecutionContext(); - trt_parser = _parser; + trt_executor = onnx_to_tensorrt::InferObject(trt_engine->createExecutionContext()); } - ~TRTEngineParam() { - trt_parser->destroy(); - trt_executor->destroy(); - delete trt_logger; - } - nvinfer1::IExecutionContext* trt_executor; - nvonnxparser::IParser* trt_parser; - onnx_to_tensorrt::TRT_Logger* trt_logger; + onnx_to_tensorrt::unique_ptr trt_engine; + onnx_to_tensorrt::unique_ptr trt_executor; + onnx_to_tensorrt::unique_ptr trt_parser; + std::unique_ptr trt_logger; std::shared_ptr > > binding_order; std::shared_ptr > bindings; }; diff --git a/src/operator/subgraph/tensorrt/tensorrt.cc b/src/operator/subgraph/tensorrt/tensorrt.cc index 85d16b7e5169..77838c3f995a 100644 --- a/src/operator/subgraph/tensorrt/tensorrt.cc +++ b/src/operator/subgraph/tensorrt/tensorrt.cc @@ -312,9 +312,9 @@ OpStatePtr TRTCreateState(const nnvm::NodeAttrs& attrs, Context ctx, graph.attrs["shape"] = std::make_shared(std::move(shapes)); auto onnx_graph = op::nnvm_to_onnx::ConvertNnvmGraphToOnnx(graph, ¶ms_map); auto trt_tuple = ::onnx_to_tensorrt::onnxToTrtCtx(onnx_graph, max_batch_size, 1 << 30); - return OpStatePtr::Create(std::get<0>(trt_tuple), - std::get<1>(trt_tuple), - std::get<2>(trt_tuple), + return OpStatePtr::Create(std::move(std::get<0>(trt_tuple)), + std::move(std::get<1>(trt_tuple)), + std::move(std::get<2>(trt_tuple)), inputs_to_idx, outputs_to_idx); } From 2aa64232e9f7fc992f9a5ad95cacb23364c6b99d Mon Sep 17 00:00:00 2001 From: cfujitsang Date: Fri, 28 Jun 2019 01:28:24 -0400 Subject: [PATCH 3/3] reduce line length for lint --- src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc index 7f087b1e140e..27f6da436a8f 100644 --- a/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc +++ b/src/operator/subgraph/tensorrt/onnx_to_tensorrt.cc @@ -124,7 +124,7 @@ std::tuple, trt_builder->setMaxBatchSize(max_batch_size); trt_builder->setMaxWorkspaceSize(max_workspace_size); trt_builder->setDebugSync(debug_builder); - unique_ptr trt_engine = InferObject(trt_builder->buildCudaEngine(*trt_network)); + auto trt_engine = InferObject(trt_builder->buildCudaEngine(*trt_network)); trt_builder->destroy(); trt_network->destroy(); return std::make_tuple(std::move(trt_engine), std::move(trt_parser), std::move(trt_logger));