Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/runtime/contrib/json/json_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ class JSONGraphNode {
*/
template <typename T>
T GetAttr(const std::string& key) const {
ICHECK_GT(attrs_.count(key), 0U) << "Key: " << key << "is not found";
ICHECK_GT(attrs_.count(key), 0U) << "Key: " << key << " is not found";
return dmlc::get<T>(attrs_.at(key));
}

Expand Down
7 changes: 6 additions & 1 deletion src/runtime/contrib/tensorrt/tensorrt_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,14 +171,19 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() {
CleanUp();

// Allocate I/O buffers on GPU for TVM inputs which are on a different context.
std::vector<runtime::NDArray> device_buffers = CreateDeviceBuffers(engine);
return {engine, context, network_input_names_, network_output_names_, device_buffers};
}

std::vector<runtime::NDArray> TensorRTBuilder::CreateDeviceBuffers(nvinfer1::ICudaEngine* engine) {
std::vector<runtime::NDArray> device_buffers(engine->getNbBindings());
for (size_t i = 0; i < network_input_names_.size(); ++i) {
AllocateDeviceBuffer(engine, network_input_names_[i], &device_buffers);
}
for (size_t i = 0; i < network_output_names_.size(); ++i) {
AllocateDeviceBuffer(engine, network_output_names_[i], &device_buffers);
}
return {engine, context, network_input_names_, network_output_names_, device_buffers};
return device_buffers;
}

nvinfer1::Weights TensorRTBuilder::GetDLTensorAsWeights(const DLTensor* dptr,
Expand Down
6 changes: 6 additions & 0 deletions src/runtime/contrib/tensorrt/tensorrt_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ class TensorRTBuilder {
*/
TensorRTEngineAndContext BuildEngine();

/*!
* \brief Create device buffers.
* \param engine_and_context The pointer pointing at TensorRTEngineAndContext.
*/
std::vector<runtime::NDArray> CreateDeviceBuffers(nvinfer1::ICudaEngine* engine);

private:
/*! \brief Convert a DLTensor to a TensorRT weight. */
nvinfer1::Weights GetDLTensorAsWeights(const DLTensor* dptr, DLDeviceType src_device);
Expand Down
20 changes: 18 additions & 2 deletions src/runtime/contrib/tensorrt/tensorrt_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ class TensorRTRuntime : public JSONRuntimeBase {
ICHECK_EQ(consts.size(), const_idx_.size())
<< "The number of input constants must match the number of required.";
LoadGlobalAttributes();
if (GetCachedEnginesFromDisk()) return;
SetupConstants(consts);
GetCachedEnginesFromDisk();
}

void LoadGlobalAttributes() {
Expand Down Expand Up @@ -178,7 +178,13 @@ class TensorRTRuntime : public JSONRuntimeBase {
*/
void BuildEngine() {
batch_size_ = data_entry_[input_var_eid_[0]]->shape[0];
if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) return;
if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) {
TensorRTEngineAndContext& engine_and_context =
trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_));
if (!engine_and_context.device_buffers.empty()) {
return;
}
}
DLOG(INFO) << "Building new TensorRT engine for subgraph " << symbol_name_
<< " with batch size " << batch_size_;
const bool use_fp16 = dmlc::GetEnv("TVM_TENSORRT_USE_FP16", false);
Expand Down Expand Up @@ -211,6 +217,16 @@ class TensorRTRuntime : public JSONRuntimeBase {
builder.AddOutput(outputs_[i], EntryID(outputs_[i]));
}

// Allocate Device Buffers
if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) {
TensorRTEngineAndContext& engine_and_context =
trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_));
if (engine_and_context.device_buffers.empty()) {
engine_and_context.device_buffers = builder.CreateDeviceBuffers(engine_and_context.engine);
return;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are building the TRT network in the TensorRTBuilder, but exiting before BuildEngine is called. This means the resources used by builder won't ever be freed (TensorRTBuilder::CleanUp()) needs to be called.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also shouldnt have to rebuild the whole nextwork just to allocate the buffers.

Copy link
Contributor Author

@lsy643 lsy643 Feb 1, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the CleanUp is added, there will be a segmentation fault and so I don't call CleanUp.

In the BuildEngine function from tensorrt_runtime.cc, the whole network has not been actually rebuilt since the function returns beforebuilder.BuildEngine gets called.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the best solution is to move CreateDeviceBuffers out of TensorRTBuilder and into the runtime module. That way we can call it without the unnecessary allocations and creations done by TensorRTBuilder

}
}

// Build engine.
trt_engine_cache_[std::make_pair(symbol_name_, batch_size_)] = builder.BuildEngine();
DLOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_
Expand Down