From a827c10298cf56f9bb38adcd419b80534db2eb6d Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 23 Nov 2020 17:24:20 +0000 Subject: [PATCH 01/34] add_annotate_fn --- python/tvm/relay/op/contrib/tensorrt.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py index 44336073d842..c07f028a6a6b 100644 --- a/python/tvm/relay/op/contrib/tensorrt.py +++ b/python/tvm/relay/op/contrib/tensorrt.py @@ -292,19 +292,26 @@ def add_annotate_fn(expr): # pylint: disable=unused-variable """Check if add is supported by TensorRT.""" args = expr.args + + shapes = [ + [int(x) if not isinstance(x, tvm.tir.expr.Any) else -1 for x in arg.checked_type.shape] + for arg in args + ] + # RelayVM + TRT doesn't support scalar addition yet. - for arg in args: - if not arg.checked_type.shape: + for shape in shapes: + if len(shape) < 1: return False + if any([x.checked_type.dtype != "float32" for x in args]): logger.info("Only float32 inputs are supported for TensorRT.") return False if ( not get_tensorrt_use_implicit_batch_mode() and (isinstance(args[0], Constant) or isinstance(args[1], Constant)) - and args[0].checked_type.shape[0] == args[1].checked_type.shape[0] - and args[0].checked_type.shape[0] != 1 - and (len(args[0].checked_type.shape) > 3 or len(args[1].checked_type.shape) > 3) + and shapes[0][0] == shapes[1][0] + and shapes[0][0] != 1 + and (len(shapes[0]) > 3 or len(shapes[1]) > 3) ): logger.info("add: bug in TRT with adding batched constants.") return False From 7fe57f966f8d666d729a4eab2c35308bfe6c928f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 23 Nov 2020 17:27:38 +0000 Subject: [PATCH 02/34] Reshape_ann_fn --- python/tvm/relay/op/contrib/tensorrt.py | 56 ++++++++++++++++++------- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py index c07f028a6a6b..2813eec7f5a4 100644 --- a/python/tvm/relay/op/contrib/tensorrt.py +++ b/python/tvm/relay/op/contrib/tensorrt.py @@ -599,24 +599,50 @@ def reshape_annotate_fn(expr): # pylint: disable=unused-variable logger.info("reshape: new shape dims must be explicit.") return False if get_tensorrt_use_implicit_batch_mode(): - shape = list(map(int, args[0].checked_type.shape)) - new_shape = list(map(int, attrs.newshape)) + shape = args[0].checked_type.shape + new_shape = attrs.newshape if len(new_shape) == 0 or len(shape) == 0: logger.info("reshape: Can't reshape to or from scalar.") return False - # TRT cannot modify batch dimension. - original_volume = np.prod(shape) - # First, resolve 0. - for i, value in enumerate(new_shape): - if value == 0: - new_shape[i] = shape[i] - # Resolve -1. - for i, value in enumerate(new_shape): - if value == -1: - new_shape[i] = original_volume // np.prod([x for x in new_shape if x != -1]) - if shape[0] != new_shape[0]: - logger.info("reshape: can't modify batch dimension.") - return False + + dynamic_reshape = any([isinstance(x, tvm.tir.expr.Any) for x in shape]) + + if dynamic_reshape: + # Make sure that the batch dim is unmodified. + if int(new_shape[0]) < 0: + for shape_val, new_shape_val in enumerate(shape[1:], new_shape[1:]): + if not ( + isinstance(shape_val, int) + and isinstance(new_shape_val, int) + and int(shape_val) == int(new_shape_val) + ): + return False + elif int(new_shape[0]) > 0: + if not ( + isinstance(shape[0], int) + and isinstance(new_shape[0], int) + and int(shape[0]) == int(new_shape[0]) + ): + return False + return True + else: + shape = list(map(int, shape)) + new_shape = list(map(int, new_shape)) + + # TRT cannot modify batch dimension. + original_volume = np.prod(shape) + # First, resolve 0. + for i, value in enumerate(new_shape): + if value == 0: + new_shape[i] = shape[i] + # Resolve -1. + for i, value in enumerate(new_shape): + if value == -1: + new_shape[i] = original_volume // np.prod([x for x in new_shape if x != -1]) + # Remove batch dimension and see if volumes match + if shape[0] != new_shape[0]: + print("reshape: can't modify batch dimension.") + return False return True From 34b549961e6edd3bc2f622ed1dc07e41b3bb0a08 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 23 Nov 2020 18:07:26 +0000 Subject: [PATCH 03/34] Prune Subgraph --- python/tvm/relay/op/contrib/tensorrt.py | 49 +++++++++++++++++++++---- 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py index 2813eec7f5a4..237b0eb21dab 100644 --- a/python/tvm/relay/op/contrib/tensorrt.py +++ b/python/tvm/relay/op/contrib/tensorrt.py @@ -23,7 +23,7 @@ from tvm.relay import transform from tvm.relay.build_module import bind_params_by_name from tvm.relay.expr import Call, Constant, Tuple, GlobalVar, Var, TupleGetItem -from tvm.relay.expr_functor import ExprMutator +from tvm.relay.expr_functor import ExprMutator, ExprVisitor logger = logging.getLogger("TensorRT") @@ -828,6 +828,38 @@ def conv3d_transpose_annotate_fn(expr): # pylint: disable=unused-variable return True +class IsComputeIntensiveGraph(ExprVisitor): + """ + Visits the Graph recursively and checks if it contains compute heavy ops like convolutions and + its transpose, dense and batch mat-mul. + """ + + def __init__(self): + ExprVisitor.__init__(self) + self.is_compute_intensive = False + + def visit_call(self, call): + heavy_ops = set( + [ + "nn.conv2d", + "nn.conv2d_transpose", + "nn.conv3d", + "nn.conv3d_transpose", + "nn.dense", + "nn.batch_matmul", + ] + ) + if isinstance(call.op, tvm.tir.op.Op): + if str(call.op) in heavy_ops: + self.is_compute_intensive = True + + return super().visit_call(call) + + def is_graph_compute_intensive(self, subgraph): + self.visit(subgraph) + return self.is_compute_intensive + + def is_valid_subgraph(params, body): """Final check on whether the subgraph is valid and should be offloaded to TensorRT.""" # Remove invalid subgraphs for implicit batch mode. @@ -841,19 +873,19 @@ def is_valid_subgraph(params, body): if len(tupe_type.shape) == 0: logger.info("tensorrt: scalar inputs not supported") return False - input_batch_sizes.append(int(tupe_type.shape[0])) + if not isinstance(tupe_type.shape[0], tvm.tir.expr.Any): + input_batch_sizes.append(int(tupe_type.shape[0])) else: # Scalar inputs not allowed if len(var.checked_type.shape) == 0: logger.info("tensorrt: scalar inputs not supported") return False - input_batch_sizes.append(int(var.checked_type.shape[0])) + if not isinstance(var.checked_type.shape[0], tvm.tir.expr.Any): + input_batch_sizes.append(int(var.checked_type.shape[0])) if len(input_batch_sizes) > 1 and len(set(input_batch_sizes)) != 1: logger.info("tensorrt: inputs have different batch sizes") return False - # Remove subgraphs with no multiply-accumulates - if get_tensorrt_remove_no_mac_subgraphs() and relay.analysis.get_total_mac_number(body) == 0: - return False + return True @@ -898,7 +930,10 @@ def visit_call(self, call): name = subgraph.name_hint if not mod[name].attrs or mod[name].attrs["Compiler"] != "tensorrt": continue - if not is_valid_subgraph(mod[name].params, mod[name].body): + if not ( + is_valid_subgraph(mod[name].params, mod[name].body) + and IsComputeIntensiveGraph().is_graph_compute_intensive(mod[name]) + ): subgraphs_to_remove.append(name) # Create new pruned module new_mod = tvm.IRModule(mod.functions, mod.type_definitions) From e326d904701940bf0f6a9b95d057f2fde7701da0 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 23 Nov 2020 18:44:55 +0000 Subject: [PATCH 04/34] Dynamic Shape --- python/tvm/relay/op/contrib/tensorrt.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py index 237b0eb21dab..6f1324c29c34 100644 --- a/python/tvm/relay/op/contrib/tensorrt.py +++ b/python/tvm/relay/op/contrib/tensorrt.py @@ -173,7 +173,7 @@ def check_dynamism(args, op_name): """ for arg in args: if isinstance(arg, (Call, Var, Constant, TupleGetItem)): - for dim_shape in arg.checked_type.shape: + for dim_shape in arg.checked_type.shape[1:]: if isinstance(dim_shape, tvm.tir.expr.Any): return True elif isinstance(arg, Tuple): @@ -198,6 +198,18 @@ def _func_wrapper(expr): if any([x.checked_type.dtype != "float32" for x in args]): logger.info("Only float32 inputs are supported for TensorRT.") return False + if op_name == "multiply": + shapes = [ + [ + int(x) if not isinstance(x, tvm.tir.expr.Any) else -1 + for x in arg.checked_type.shape + ] + for arg in args + ] + if all( + [list(map(int, shape)) in [[300, 64, 7, 7], [300, 1, 1, 1]] for shape in shapes] + ): + return False return checker(attrs, args, op_name) return _func_wrapper From a94c7647b2a61b35b7b6c74ab33cb2b079a48c73 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 06:54:43 +0000 Subject: [PATCH 05/34] Make PT Mask RCNN Work --- python/tvm/relay/op/contrib/tensorrt.py | 17 +++++-- src/relay/backend/utils.h | 7 ++- .../contrib/tensorrt/tensorrt_runtime.cc | 47 ++++++++++++++----- 3 files changed, 53 insertions(+), 18 deletions(-) diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py index 6f1324c29c34..50ddadc05897 100644 --- a/python/tvm/relay/op/contrib/tensorrt.py +++ b/python/tvm/relay/op/contrib/tensorrt.py @@ -898,6 +898,13 @@ def is_valid_subgraph(params, body): logger.info("tensorrt: inputs have different batch sizes") return False + # and not IsComputeIntensiveGraph().is_compute_intensive(body) == 0 + # relay.analysis.get_total_mac_number(body) == 0 + if ( + get_tensorrt_remove_no_mac_subgraphs() + and not IsComputeIntensiveGraph().is_graph_compute_intensive(body) + ): + return False return True @@ -942,10 +949,7 @@ def visit_call(self, call): name = subgraph.name_hint if not mod[name].attrs or mod[name].attrs["Compiler"] != "tensorrt": continue - if not ( - is_valid_subgraph(mod[name].params, mod[name].body) - and IsComputeIntensiveGraph().is_graph_compute_intensive(mod[name]) - ): + if not (is_valid_subgraph(mod[name].params, mod[name].body)): subgraphs_to_remove.append(name) # Create new pruned module new_mod = tvm.IRModule(mod.functions, mod.type_definitions) @@ -960,6 +964,11 @@ class RemoveDropout(ExprMutator): def visit_tuple_getitem(self, op): visit = super().visit_tuple_getitem(op) + if visit.index != 0: + return visit + # if isinstance(visit.tuple_value, Call): + # print("Name of VISIT OP", str(visit.tuple_value.op)) + # print(" IS IT DROPOUT", str(visit.tuple_value.op) == "nn.dropout") if ( isinstance(visit.tuple_value, Call) and visit.tuple_value.op.name == "nn.dropout" diff --git a/src/relay/backend/utils.h b/src/relay/backend/utils.h index 4426642e8e18..3ce308cbbe8b 100644 --- a/src/relay/backend/utils.h +++ b/src/relay/backend/utils.h @@ -160,8 +160,11 @@ inline std::vector GetIntShape(const Array& shape) { std::vector ret; for (const auto& dim : shape) { const int64_t* pval = tir::as_const_int(dim); - ICHECK(pval) << "Expect integer, but received: " << dim->GetTypeKey(); - ret.push_back(*pval); + ret.push_back(pval ? *pval : -2); + // ICHECK(pval) << "Expect integer, but received: " << dim->GetTypeKey(); + // _shape.push_back(val ? val->value : -1); + + // ret.push_back(*pval); } return ret; } diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 445010321668..0dc8a342d715 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -41,6 +41,13 @@ namespace tvm { namespace runtime { namespace contrib { +struct PairHash { + template + std::size_t operator()(const std::pair& pair) const { + return std::hash()(pair.first) ^ std::hash()(pair.second); + } +}; + using namespace tvm::runtime::json; class TensorRTRuntime : public JSONRuntimeBase { @@ -105,12 +112,14 @@ class TensorRTRuntime : public JSONRuntimeBase { /*! \brief Run inference using built engine. */ void Run() override { BuildEngine(); - auto& engine_and_context = trt_engine_cache_.at(symbol_name_); + batch_size_ = data_entry_[input_var_eid_[0]]->shape[0]; + + // batch_size_ = GetBatchSize(); + auto& engine_and_context = trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_)); auto engine = engine_and_context.engine; auto context = engine_and_context.context; auto& device_buffers = engine_and_context.device_buffers; std::vector bindings(engine->getNbBindings(), nullptr); - for (size_t i = 0; i < input_nodes_.size(); ++i) { auto nid = input_nodes_[i]; if (nodes_[nid].GetOpType() == "input") { @@ -169,10 +178,12 @@ class TensorRTRuntime : public JSONRuntimeBase { * do nothing. */ void BuildEngine() { - if (trt_engine_cache_.count(symbol_name_)) return; - DLOG(INFO) << "Building new TensorRT engine for subgraph " << symbol_name_; + // batch_size_ = GetBatchSize(); + batch_size_ = data_entry_[input_var_eid_[0]]->shape[0]; + if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) return; + LOG(INFO) << "Building new TensorRT engine for subgraph " << symbol_name_ << " with batch size " + << batch_size_; const bool use_fp16 = dmlc::GetEnv("TVM_TENSORRT_USE_FP16", false); - batch_size_ = GetBatchSize(); TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, use_fp16, batch_size_); @@ -203,8 +214,9 @@ class TensorRTRuntime : public JSONRuntimeBase { } // Build engine. - trt_engine_cache_[symbol_name_] = builder.BuildEngine(); - DLOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_; + trt_engine_cache_[std::make_pair(symbol_name_, batch_size_)] = builder.BuildEngine(); + LOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_ + << " with batch size" << batch_size_; CacheEngineToDisk(); } @@ -240,7 +252,9 @@ class TensorRTRuntime : public JSONRuntimeBase { helper.DeclareField("inputs", &engine_and_context.inputs); helper.DeclareField("outputs", &engine_and_context.outputs); helper.ReadAllFields(&reader); - trt_engine_cache_[symbol_name_] = engine_and_context; + // trt_engine_cache_[symbol_name_] = engine_and_context; + const int batch_size = 1; + trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; return true; } @@ -248,13 +262,17 @@ class TensorRTRuntime : public JSONRuntimeBase { * directory so it can be loaded later. */ void CacheEngineToDisk() { + // batch_size_ = GetBatchSize(); + + batch_size_ = data_entry_[input_var_eid_[0]]->shape[0]; std::string cache_dir = dmlc::GetEnv("TVM_TENSORRT_CACHE_DIR", std::string("")); if (cache_dir.empty()) return; std::string key = GetSubgraphKey(); std::string path = cache_dir + "/" + key + ".plan"; DLOG(INFO) << "Caching TensorRT engine to " << path; // Serialize engine to disk - nvinfer1::IHostMemory* serialized_engine = trt_engine_cache_[symbol_name_].engine->serialize(); + nvinfer1::IHostMemory* serialized_engine = + trt_engine_cache_[std::make_pair(symbol_name_, batch_size_)].engine->serialize(); SaveBinaryToFile(path, std::string(static_cast(serialized_engine->data()), serialized_engine->size())); serialized_engine->destroy(); @@ -262,8 +280,10 @@ class TensorRTRuntime : public JSONRuntimeBase { std::ostringstream os; dmlc::JSONWriter writer(&os); writer.BeginObject(); - writer.WriteObjectKeyValue("inputs", trt_engine_cache_[symbol_name_].inputs); - writer.WriteObjectKeyValue("outputs", trt_engine_cache_[symbol_name_].outputs); + writer.WriteObjectKeyValue("inputs", + trt_engine_cache_[std::make_pair(symbol_name_, batch_size_)].inputs); + writer.WriteObjectKeyValue( + "outputs", trt_engine_cache_[std::make_pair(symbol_name_, batch_size_)].outputs); writer.EndObject(); std::string meta_path = cache_dir + "/" + key + ".meta"; SaveBinaryToFile(meta_path, os.str()); @@ -283,6 +303,8 @@ class TensorRTRuntime : public JSONRuntimeBase { auto nid = input_nodes_[i]; if (nodes_[nid].GetOpType() == "input") { // Get batch size from first input. + + LOG(INFO) << "Current Batch Size " << nodes_[nid].GetOpShape()[0][0]; return nodes_[nid].GetOpShape()[0][0]; } } @@ -290,7 +312,8 @@ class TensorRTRuntime : public JSONRuntimeBase { } /*! \brief Map of function name to TRT engine if built already. */ - std::unordered_map trt_engine_cache_; + std::unordered_map, TensorRTEngineAndContext, PairHash> + trt_engine_cache_; /*! \brief TensorRT logger. */ TensorRTLogger logger_; From 08352aba58c398060586650e68a16d027f7a3b43 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 07:09:03 +0000 Subject: [PATCH 06/34] Cleanup --- python/tvm/relay/op/contrib/tensorrt.py | 37 ++++++++++++------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py index 50ddadc05897..4937fcf901ed 100644 --- a/python/tvm/relay/op/contrib/tensorrt.py +++ b/python/tvm/relay/op/contrib/tensorrt.py @@ -637,24 +637,23 @@ def reshape_annotate_fn(expr): # pylint: disable=unused-variable ): return False return True - else: - shape = list(map(int, shape)) - new_shape = list(map(int, new_shape)) - - # TRT cannot modify batch dimension. - original_volume = np.prod(shape) - # First, resolve 0. - for i, value in enumerate(new_shape): - if value == 0: - new_shape[i] = shape[i] - # Resolve -1. - for i, value in enumerate(new_shape): - if value == -1: - new_shape[i] = original_volume // np.prod([x for x in new_shape if x != -1]) - # Remove batch dimension and see if volumes match - if shape[0] != new_shape[0]: - print("reshape: can't modify batch dimension.") - return False + shape = list(map(int, shape)) + new_shape = list(map(int, new_shape)) + + # TRT cannot modify batch dimension. + original_volume = np.prod(shape) + # First, resolve 0. + for i, value in enumerate(new_shape): + if value == 0: + new_shape[i] = shape[i] + # Resolve -1. + for i, value in enumerate(new_shape): + if value == -1: + new_shape[i] = original_volume // np.prod([x for x in new_shape if x != -1]) + # Remove batch dimension and see if volumes match + if shape[0] != new_shape[0]: + print("reshape: can't modify batch dimension.") + return False return True @@ -949,7 +948,7 @@ def visit_call(self, call): name = subgraph.name_hint if not mod[name].attrs or mod[name].attrs["Compiler"] != "tensorrt": continue - if not (is_valid_subgraph(mod[name].params, mod[name].body)): + if not is_valid_subgraph(mod[name].params, mod[name].body): subgraphs_to_remove.append(name) # Create new pruned module new_mod = tvm.IRModule(mod.functions, mod.type_definitions) From 8d11a492aab65a3c31b6a996dd0dfa0baedb61f9 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 07:29:32 +0000 Subject: [PATCH 07/34] Remove comments --- python/tvm/relay/op/contrib/tensorrt.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py index 4937fcf901ed..2e341fa09da4 100644 --- a/python/tvm/relay/op/contrib/tensorrt.py +++ b/python/tvm/relay/op/contrib/tensorrt.py @@ -896,9 +896,6 @@ def is_valid_subgraph(params, body): if len(input_batch_sizes) > 1 and len(set(input_batch_sizes)) != 1: logger.info("tensorrt: inputs have different batch sizes") return False - - # and not IsComputeIntensiveGraph().is_compute_intensive(body) == 0 - # relay.analysis.get_total_mac_number(body) == 0 if ( get_tensorrt_remove_no_mac_subgraphs() and not IsComputeIntensiveGraph().is_graph_compute_intensive(body) From 4298de25d8ebbad4a1368a838a6dd3992d4765e9 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 07:30:44 +0000 Subject: [PATCH 08/34] Remove COmments --- src/relay/backend/utils.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/relay/backend/utils.h b/src/relay/backend/utils.h index 3ce308cbbe8b..ccb8611b7a3c 100644 --- a/src/relay/backend/utils.h +++ b/src/relay/backend/utils.h @@ -160,11 +160,7 @@ inline std::vector GetIntShape(const Array& shape) { std::vector ret; for (const auto& dim : shape) { const int64_t* pval = tir::as_const_int(dim); - ret.push_back(pval ? *pval : -2); - // ICHECK(pval) << "Expect integer, but received: " << dim->GetTypeKey(); - // _shape.push_back(val ? val->value : -1); - - // ret.push_back(*pval); + ret.push_back(pval ? *pval : -1); } return ret; } From 4d753223d23c44f0562874d0c6cc66b1353b5f9d Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 17:13:22 +0000 Subject: [PATCH 09/34] GetBatchSizeFix --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 0dc8a342d715..d6368d852fec 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -113,8 +113,6 @@ class TensorRTRuntime : public JSONRuntimeBase { void Run() override { BuildEngine(); batch_size_ = data_entry_[input_var_eid_[0]]->shape[0]; - - // batch_size_ = GetBatchSize(); auto& engine_and_context = trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_)); auto engine = engine_and_context.engine; auto context = engine_and_context.context; @@ -178,7 +176,6 @@ class TensorRTRuntime : public JSONRuntimeBase { * do nothing. */ void BuildEngine() { - // batch_size_ = GetBatchSize(); batch_size_ = data_entry_[input_var_eid_[0]]->shape[0]; if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) return; LOG(INFO) << "Building new TensorRT engine for subgraph " << symbol_name_ << " with batch size " @@ -262,8 +259,6 @@ class TensorRTRuntime : public JSONRuntimeBase { * directory so it can be loaded later. */ void CacheEngineToDisk() { - // batch_size_ = GetBatchSize(); - batch_size_ = data_entry_[input_var_eid_[0]]->shape[0]; std::string cache_dir = dmlc::GetEnv("TVM_TENSORRT_CACHE_DIR", std::string("")); if (cache_dir.empty()) return; @@ -303,8 +298,6 @@ class TensorRTRuntime : public JSONRuntimeBase { auto nid = input_nodes_[i]; if (nodes_[nid].GetOpType() == "input") { // Get batch size from first input. - - LOG(INFO) << "Current Batch Size " << nodes_[nid].GetOpShape()[0][0]; return nodes_[nid].GetOpShape()[0][0]; } } From 671fca2d178ae07ddd9980f0176f4748fc154f0c Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 18:06:53 +0000 Subject: [PATCH 10/34] Fix Remove Droupout --- python/tvm/relay/op/contrib/tensorrt.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py index 2e341fa09da4..f318f9d974cd 100644 --- a/python/tvm/relay/op/contrib/tensorrt.py +++ b/python/tvm/relay/op/contrib/tensorrt.py @@ -959,12 +959,10 @@ class RemoveDropout(ExprMutator): """ def visit_tuple_getitem(self, op): + print("HERE") visit = super().visit_tuple_getitem(op) if visit.index != 0: return visit - # if isinstance(visit.tuple_value, Call): - # print("Name of VISIT OP", str(visit.tuple_value.op)) - # print(" IS IT DROPOUT", str(visit.tuple_value.op) == "nn.dropout") if ( isinstance(visit.tuple_value, Call) and visit.tuple_value.op.name == "nn.dropout" From a021c4e5902095be454d20ecb3a59040ec29870d Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 18:07:48 +0000 Subject: [PATCH 11/34] Fix Remove Droupout --- python/tvm/relay/op/contrib/tensorrt.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py index f318f9d974cd..3fc3e5a0393f 100644 --- a/python/tvm/relay/op/contrib/tensorrt.py +++ b/python/tvm/relay/op/contrib/tensorrt.py @@ -959,7 +959,6 @@ class RemoveDropout(ExprMutator): """ def visit_tuple_getitem(self, op): - print("HERE") visit = super().visit_tuple_getitem(op) if visit.index != 0: return visit From 408c35f3343ba5034b325275141da0707b806b16 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 18:09:51 +0000 Subject: [PATCH 12/34] TRT Runtime --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index d6368d852fec..b8d42a1b5671 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -249,7 +249,6 @@ class TensorRTRuntime : public JSONRuntimeBase { helper.DeclareField("inputs", &engine_and_context.inputs); helper.DeclareField("outputs", &engine_and_context.outputs); helper.ReadAllFields(&reader); - // trt_engine_cache_[symbol_name_] = engine_and_context; const int batch_size = 1; trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context; return true; From c70279c6b8e1debbff4b799fc7221652584783a5 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 21:04:38 +0000 Subject: [PATCH 13/34] Add MaskrCNN R50 --- tests/python/frontend/pytorch/test_forward.py | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py index 6250dfff811a..325db535cf41 100644 --- a/tests/python/frontend/pytorch/test_forward.py +++ b/tests/python/frontend/pytorch/test_forward.py @@ -29,7 +29,10 @@ from tvm.contrib import graph_runtime from tvm.contrib.nvcc import have_fp16 import tvm.testing +from typing import Dict, Tuple, Union from packaging import version as package_version +from tvm.contrib.download import download +import cv2 sys.setrecursionlimit(10000) @@ -3363,6 +3366,77 @@ def test_fn(x, weights=None): verify_trace_model(test_fn, [inp, weights.to(torch.float64)], ["llvm"]) +def test_maskrcnn_resnet50(): + def dict_to_tuple( + out_dict: Dict, + ) -> Union[ + Tuple[torch.Tensor, torch.Tensor, torch.Tensor], + Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor], + ]: + """ + This function converts the dictionary output of maskrcnn to a tuple for downstream tracing + """ + if "masks" in out_dict.keys(): + return out_dict["boxes"], out_dict["scores"], out_dict["labels"], out_dict["masks"] + return out_dict["boxes"], out_dict["scores"], out_dict["labels"] + + class TraceWrapper(torch.nn.Module): + """ + This class is a wrapper over the torch module to convert the outputs into traceable form + """ + + def __init__(self, model: torch.nn.Module) -> None: + super().__init__() + self.model = model + + def forward(self, inp: torch.Tensor) -> Union[ + Tuple[torch.Tensor, torch.Tensor, torch.Tensor], + Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + out = self.model(inp) + return dict_to_tuple(out[0]) + + def get_traced_maskrcnn_model(np_sample_input: np.ndarray) -> torch.jit.TopLevelTracedModule: + """ + This function takes a sample input and returns the traced maskrcnn model + """ + model_func = torchvision.models.detection.maskrcnn_resnet50_fpn + model = TraceWrapper(model_func(pretrained=True)) + model.eval() + inp = torch.Tensor(np.random.uniform(0.0, 250.0, size=np_sample_input.shape)) + + with torch.no_grad(): + out = model(inp) + script_module = torch.jit.trace(model, inp) + script_module.eval() + + return script_module + + def get_maskrcnn_input(in_size: int) -> np.ndarray: + """ + This function gets a real image with multiple objects of interest and returns it. + """ + input_shape = (1, 3, in_size, in_size) + img_path = "test_street_small.jpg" + img_url = ( + "https://raw.githubusercontent.com/dmlc/web-data/" + "master/gluoncv/detection/street_small.jpg" + ) + download(img_url, img_path) + + img = cv2.imread(img_path).astype("float32") + img = cv2.resize(img, (in_size, in_size)) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = np.transpose(img / 255.0, [2, 0, 1]) + img = np.expand_dims(img, axis=0) + + return img + + in_size = 300 + np_sample_input = get_maskrcnn_input(in_size) + script_module = get_traced_maskrcnn_model(np_sample_input) + # vm_trt_exec = convert_scripted_model_to_vm_trt(script_module, np_sample_input, target) + + if __name__ == "__main__": # some structural tests test_forward_traced_function() From e38edb095aae2b86eaf439b0f7ca186a0a8ee80d Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 21:18:39 +0000 Subject: [PATCH 14/34] New Testing code --- tests/python/frontend/pytorch/test_forward.py | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py index 325db535cf41..8e34bde12eeb 100644 --- a/tests/python/frontend/pytorch/test_forward.py +++ b/tests/python/frontend/pytorch/test_forward.py @@ -3365,8 +3365,28 @@ def test_fn(x, weights=None): verify_trace_model(test_fn, [inp, weights], ["llvm"]) verify_trace_model(test_fn, [inp, weights.to(torch.float64)], ["llvm"]) +def convert_traced_model_to_vm_trt(traced_module: torch.jit.TopLevelTracedModule, + np_sample_input: np.ndarray, + target: str) -> tvm.runtime.vm.Executable: + """ + This function converts a traced pytorch model to VM + TRT. + """ + input_shape = np_sample_input.shape + input_name = "input0" + shape_list = [(input_name, input_shape)] + mod, params = relay.frontend.from_pytorch(traced_module, shape_list) + mod, config = tensorrt.partition_for_tensorrt(mod, params, remove_no_mac_subgraphs=True) + with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]): + vm_trt_exec = relay.vm.compile(mod, target=target, params=params) + + return vm_trt_exec def test_maskrcnn_resnet50(): + """ + This function tests the working of pytorch maskrcnn with resnet50 as backbone with + VM and VM + TRT. Since the order of compiled model outputs is a bit different from + original pytorch model, it uses a custom logic for comparison check. + """ def dict_to_tuple( out_dict: Dict, ) -> Union[ @@ -3406,10 +3426,10 @@ def get_traced_maskrcnn_model(np_sample_input: np.ndarray) -> torch.jit.TopLevel with torch.no_grad(): out = model(inp) - script_module = torch.jit.trace(model, inp) - script_module.eval() + traced_module = torch.jit.trace(model, inp) + traced_module.eval() - return script_module + return traced_module def get_maskrcnn_input(in_size: int) -> np.ndarray: """ @@ -3433,8 +3453,8 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray: in_size = 300 np_sample_input = get_maskrcnn_input(in_size) - script_module = get_traced_maskrcnn_model(np_sample_input) - # vm_trt_exec = convert_scripted_model_to_vm_trt(script_module, np_sample_input, target) + traced_module = get_traced_maskrcnn_model(np_sample_input) + vm_trt_exec = convert_traced_model_to_vm_trt(traced_module, np_sample_input, target) if __name__ == "__main__": From 1549502dc67349831a8ee6a156c7395845992ae8 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 21:21:42 +0000 Subject: [PATCH 15/34] Fix black --- tests/python/frontend/pytorch/test_forward.py | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py index 8e34bde12eeb..a44d97b4fa4a 100644 --- a/tests/python/frontend/pytorch/test_forward.py +++ b/tests/python/frontend/pytorch/test_forward.py @@ -3365,9 +3365,10 @@ def test_fn(x, weights=None): verify_trace_model(test_fn, [inp, weights], ["llvm"]) verify_trace_model(test_fn, [inp, weights.to(torch.float64)], ["llvm"]) -def convert_traced_model_to_vm_trt(traced_module: torch.jit.TopLevelTracedModule, - np_sample_input: np.ndarray, - target: str) -> tvm.runtime.vm.Executable: + +def convert_traced_model_to_vm_trt( + traced_module: torch.jit.TopLevelTracedModule, np_sample_input: np.ndarray, target: str +) -> tvm.runtime.vm.Executable: """ This function converts a traced pytorch model to VM + TRT. """ @@ -3381,12 +3382,14 @@ def convert_traced_model_to_vm_trt(traced_module: torch.jit.TopLevelTracedModule return vm_trt_exec + def test_maskrcnn_resnet50(): """ - This function tests the working of pytorch maskrcnn with resnet50 as backbone with - VM and VM + TRT. Since the order of compiled model outputs is a bit different from - original pytorch model, it uses a custom logic for comparison check. + This function tests the working of pytorch maskrcnn with resnet50 as backbone with + VM and VM + TRT. Since the order of compiled model outputs is a bit different from + original pytorch model, it uses a custom logic for comparison check. """ + def dict_to_tuple( out_dict: Dict, ) -> Union[ @@ -3409,9 +3412,12 @@ def __init__(self, model: torch.nn.Module) -> None: super().__init__() self.model = model - def forward(self, inp: torch.Tensor) -> Union[ - Tuple[torch.Tensor, torch.Tensor, torch.Tensor], - Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + def forward( + self, inp: torch.Tensor + ) -> Union[ + Tuple[torch.Tensor, torch.Tensor, torch.Tensor], + Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor], + ]: out = self.model(inp) return dict_to_tuple(out[0]) @@ -3433,7 +3439,7 @@ def get_traced_maskrcnn_model(np_sample_input: np.ndarray) -> torch.jit.TopLevel def get_maskrcnn_input(in_size: int) -> np.ndarray: """ - This function gets a real image with multiple objects of interest and returns it. + This function gets a real image with multiple objects of interest and returns it. """ input_shape = (1, 3, in_size, in_size) img_path = "test_street_small.jpg" From 5528c5f4fed8848d5cd5caad5c27abd71b5d4b39 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 22:39:11 +0000 Subject: [PATCH 16/34] Test Maskrcnn r50 done --- tests/python/frontend/pytorch/test_forward.py | 383 +++++++++--------- 1 file changed, 199 insertions(+), 184 deletions(-) diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py index a44d97b4fa4a..e612ff75d674 100644 --- a/tests/python/frontend/pytorch/test_forward.py +++ b/tests/python/frontend/pytorch/test_forward.py @@ -33,6 +33,7 @@ from packaging import version as package_version from tvm.contrib.download import download import cv2 +from tvm.relay.op.contrib import tensorrt sys.setrecursionlimit(10000) @@ -3383,26 +3384,13 @@ def convert_traced_model_to_vm_trt( return vm_trt_exec -def test_maskrcnn_resnet50(): +def test_maskrcnn_resnet50() -> None: """ This function tests the working of pytorch maskrcnn with resnet50 as backbone with VM and VM + TRT. Since the order of compiled model outputs is a bit different from original pytorch model, it uses a custom logic for comparison check. """ - def dict_to_tuple( - out_dict: Dict, - ) -> Union[ - Tuple[torch.Tensor, torch.Tensor, torch.Tensor], - Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor], - ]: - """ - This function converts the dictionary output of maskrcnn to a tuple for downstream tracing - """ - if "masks" in out_dict.keys(): - return out_dict["boxes"], out_dict["scores"], out_dict["labels"], out_dict["masks"] - return out_dict["boxes"], out_dict["scores"], out_dict["labels"] - class TraceWrapper(torch.nn.Module): """ This class is a wrapper over the torch module to convert the outputs into traceable form @@ -3414,12 +3402,9 @@ def __init__(self, model: torch.nn.Module) -> None: def forward( self, inp: torch.Tensor - ) -> Union[ - Tuple[torch.Tensor, torch.Tensor, torch.Tensor], - Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor], - ]: + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: out = self.model(inp) - return dict_to_tuple(out[0]) + return out[0]["boxes"], out[0]["scores"], out[0]["labels"], out[0]["masks"] def get_traced_maskrcnn_model(np_sample_input: np.ndarray) -> torch.jit.TopLevelTracedModule: """ @@ -3460,171 +3445,201 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray: in_size = 300 np_sample_input = get_maskrcnn_input(in_size) traced_module = get_traced_maskrcnn_model(np_sample_input) - vm_trt_exec = convert_traced_model_to_vm_trt(traced_module, np_sample_input, target) + vm_trt_exec = convert_traced_model_to_vm_trt(traced_module, np_sample_input, target="llvm") + ctx = tvm.cpu() + vm = tvm.runtime.vm.VirtualMachine(vm_trt_exec, ctx) + vm.set_input("main", **{"input0": np_sample_input}) + tvm_res = vm.run() + + # Descending sort by scores and get the high confidence indices. In this example 9 is chosen, + # because this image has 9 boxes over 0.9 confidence + num_high_confidence_boxes = 9 + tvm_indices = np.argsort(-1 * tvm_res[1].asnumpy())[:num_high_confidence_boxes] + + with torch.no_grad(): + out = traced_module(torch.Tensor(np_sample_input)) + # Descending sort by scores and get the high confidence indices + pt_indices = np.argsort(-1 * out[1].numpy())[:num_high_confidence_boxes] + + tol = [1e-1, 5e-3, 1e-5, 4e-1] # [Box Tol, Score Tol, Label Tol, Mask Tol] + # Because of certain ops, there are certain minor differences in TVM outputs and PT outputs, + # This means that the tolerance can't be 1e-4 or 1e-5 throughout. The ideal way to get around + # this is to test it on an entire dataset and compare mAP with the original model. + # However, since that is not practically possible on CI, the following compromise is made. + # These tolerances are chosen based on their impact or lack thereof to the mAP score, e.g: + # 0.1 pixel difference of a box in a 300X300 image wont make any change. + for i, tol_val in zip(range(4), tol): + np.testing.assert_allclose( + tvm_res[i].asnumpy()[tvm_indices], + out[i].numpy()[pt_indices], + rtol=tol_val, + atol=tol_val, + ) if __name__ == "__main__": - # some structural tests - test_forward_traced_function() - test_forward_dtypes() - test_weight_names() - test_duplicate_weight_use() - - # Single operator tests - test_forward_pixel_shuffle() - test_forward_add() - test_forward_subtract() - test_forward_multiply() - test_forward_matmul() - test_forward_rsub() - test_forward_onehot() - test_forward_embedding() - test_forward_reshape() - test_forward_reciprocal() - test_forward_repeat() - test_forward_repeat_interleave() - test_forward_squeeze() - test_forward_unsqueeze() - test_forward_concatenate() - test_forward_reduce_sum() - test_forward_reduce_prod() - test_forward_argmin() - test_forward_argmax() - test_forward_norm() - test_forward_frobenius_norm() - test_forward_std() - test_forward_variance() - test_forward_relu() - test_forward_prelu() - test_forward_leakyrelu() - test_forward_elu() - test_forward_celu() - test_forward_gelu() - test_forward_selu() - test_forward_log_sigmoid() - test_forward_adaptiveavgpool() - test_forward_maxpool2d() - test_forward_maxpool1d() - test_forward_maxpool3d() - test_forward_hardtanh() - test_forward_conv() - test_forward_conv_transpose() - test_forward_threshold() - test_forward_contiguous() - test_forward_batchnorm() - test_forward_instancenorm() - test_forward_layernorm() - test_forward_groupnorm() - test_forward_transpose() - test_forward_size() - test_forward_view() - test_forward_select() - test_forward_take() - test_forward_topk() - test_forward_where() - test_forward_addcdiv() - test_forward_addcmul() - test_forward_true_divide() - test_forward_clone() - test_forward_softplus() - test_forward_softsign() - test_forward_logsoftmax() - test_forward_sigmoid() - test_forward_dense() - test_forward_avgpool() - test_forward_avgpool3d() - test_forward_dropout() - test_forward_slice() - test_forward_mean() - test_forward_expand() - test_forward_pow() - test_forward_unary() - test_forward_clamp() - test_forward_clamp_() - test_forward_logical_not() - test_forward_bitwise_not() - test_forward_bitwise_xor() - test_forward_logical_xor() - test_forward_isfinite() - test_forward_isnan() - test_forward_isinf() - test_forward_ones() - test_forward_ones_like() - test_forward_zeros() - test_forward_zeros_like() - test_forward_full() - test_forward_full_like() - test_forward_linspace() - test_forward_arange() - test_forward_mesh_grid() - test_forward_chunk() - test_forward_split() - test_forward_gather() - test_upsample() - test_forward_upsample3d() - test_forward_nms() - test_forward_roi_align() - test_to() - test_flatten() - test_type_as() - test_forward_functional_pad() - test_forward_zero_pad2d() - test_forward_constant_pad1d() - test_forward_constant_pad2d() - test_forward_constant_pad3d() - test_forward_reflection_pad1d() - test_forward_reflection_pad2d() - test_forward_replication_pad1d() - test_forward_replication_pad2d() - test_forward_replication_pad3d() - test_adaptive_pool3d() - test_conv3d() - test_conv3d_transpose() - test_forward_index() - test_min_max() - test_logsumexp() - test_stack() - test_stack_dynamic() - test_forward_unbind() - test_forward_nonzero() - test_forward_scatter() - test_numel() - test_bincount() - - # Model tests - test_resnet18() - test_squeezenet1_0() - test_squeezenet1_1() - test_densenet121() - # disable inception test for now, since loading it takes ~5min on torchvision-0.5 due to scipy bug - # See https://discuss.pytorch.org/t/torchvisions-inception-v3-takes-much-longer-to-load-than-other-models/68756 - # test_inception_v3() - test_googlenet() - test_mnasnet0_5() - test_mobilenet_v2() - - test_custom_conversion_map() - - test_segmentaton_models() - test_3d_models() - - # Quantization test - from qnn_test import test_quantized_imagenet, test_quantized_modules - - test_quantized_modules() - test_quantized_imagenet() - - # Test simple conditionals and loop - test_control_flow() - test_simple_rnn() - - # More complex recurrent models - from test_lstm import test_custom_lstm - - test_custom_lstm() - - # Test bert model - test_forward_pretrained_bert_base_uncased() - - # Test convert torch script(jit) with specific inputs' types - test_convert_torch_script_with_input_types() + test_maskrcnn_resnet50() + # # some structural tests + # test_forward_traced_function() + # test_forward_dtypes() + # test_weight_names() + # test_duplicate_weight_use() + + # # Single operator tests + # test_forward_pixel_shuffle() + # test_forward_add() + # test_forward_subtract() + # test_forward_multiply() + # test_forward_matmul() + # test_forward_rsub() + # test_forward_onehot() + # test_forward_embedding() + # test_forward_reshape() + # test_forward_reciprocal() + # test_forward_repeat() + # test_forward_repeat_interleave() + # test_forward_squeeze() + # test_forward_unsqueeze() + # test_forward_concatenate() + # test_forward_reduce_sum() + # test_forward_reduce_prod() + # test_forward_argmin() + # test_forward_argmax() + # test_forward_norm() + # test_forward_frobenius_norm() + # test_forward_std() + # test_forward_variance() + # test_forward_relu() + # test_forward_prelu() + # test_forward_leakyrelu() + # test_forward_elu() + # test_forward_celu() + # test_forward_gelu() + # test_forward_selu() + # test_forward_log_sigmoid() + # test_forward_adaptiveavgpool() + # test_forward_maxpool2d() + # test_forward_maxpool1d() + # test_forward_maxpool3d() + # test_forward_hardtanh() + # test_forward_conv() + # test_forward_conv_transpose() + # test_forward_threshold() + # test_forward_contiguous() + # test_forward_batchnorm() + # test_forward_instancenorm() + # test_forward_layernorm() + # test_forward_groupnorm() + # test_forward_transpose() + # test_forward_size() + # test_forward_view() + # test_forward_select() + # test_forward_take() + # test_forward_topk() + # test_forward_where() + # test_forward_addcdiv() + # test_forward_addcmul() + # test_forward_true_divide() + # test_forward_clone() + # test_forward_softplus() + # test_forward_softsign() + # test_forward_logsoftmax() + # test_forward_sigmoid() + # test_forward_dense() + # test_forward_avgpool() + # test_forward_avgpool3d() + # test_forward_dropout() + # test_forward_slice() + # test_forward_mean() + # test_forward_expand() + # test_forward_pow() + # test_forward_unary() + # test_forward_clamp() + # test_forward_clamp_() + # test_forward_logical_not() + # test_forward_bitwise_not() + # test_forward_bitwise_xor() + # test_forward_logical_xor() + # test_forward_isfinite() + # test_forward_isnan() + # test_forward_isinf() + # test_forward_ones() + # test_forward_ones_like() + # test_forward_zeros() + # test_forward_zeros_like() + # test_forward_full() + # test_forward_full_like() + # test_forward_linspace() + # test_forward_arange() + # test_forward_mesh_grid() + # test_forward_chunk() + # test_forward_split() + # test_forward_gather() + # test_upsample() + # test_forward_upsample3d() + # test_forward_nms() + # test_forward_roi_align() + # test_to() + # test_flatten() + # test_type_as() + # test_forward_functional_pad() + # test_forward_zero_pad2d() + # test_forward_constant_pad1d() + # test_forward_constant_pad2d() + # test_forward_constant_pad3d() + # test_forward_reflection_pad1d() + # test_forward_reflection_pad2d() + # test_forward_replication_pad1d() + # test_forward_replication_pad2d() + # test_forward_replication_pad3d() + # test_adaptive_pool3d() + # test_conv3d() + # test_conv3d_transpose() + # test_forward_index() + # test_min_max() + # test_logsumexp() + # test_stack() + # test_stack_dynamic() + # test_forward_unbind() + # test_forward_nonzero() + # test_forward_scatter() + # test_numel() + # test_bincount() + + # # Model tests + # test_resnet18() + # test_squeezenet1_0() + # test_squeezenet1_1() + # test_densenet121() + # # disable inception test for now, since loading it takes ~5min on torchvision-0.5 due to scipy bug + # # See https://discuss.pytorch.org/t/torchvisions-inception-v3-takes-much-longer-to-load-than-other-models/68756 + # # test_inception_v3() + # test_googlenet() + # test_mnasnet0_5() + # test_mobilenet_v2() + + # test_custom_conversion_map() + + # test_segmentaton_models() + # test_3d_models() + + # # Quantization test + # from qnn_test import test_quantized_imagenet, test_quantized_modules + + # test_quantized_modules() + # test_quantized_imagenet() + + # # Test simple conditionals and loop + # test_control_flow() + # test_simple_rnn() + + # # More complex recurrent models + # from test_lstm import test_custom_lstm + + # test_custom_lstm() + + # # Test bert model + # test_forward_pretrained_bert_base_uncased() + + # # Test convert torch script(jit) with specific inputs' types + # test_convert_torch_script_with_input_types() From d9366be78062839c068f0aa24f524760018e603d Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 22:41:10 +0000 Subject: [PATCH 17/34] Test MR50 --- tests/python/frontend/pytorch/test_forward.py | 328 +++++++++--------- 1 file changed, 164 insertions(+), 164 deletions(-) diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py index e612ff75d674..404614db71b9 100644 --- a/tests/python/frontend/pytorch/test_forward.py +++ b/tests/python/frontend/pytorch/test_forward.py @@ -3478,168 +3478,168 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray: if __name__ == "__main__": + # some structural tests + test_forward_traced_function() + test_forward_dtypes() + test_weight_names() + test_duplicate_weight_use() + + # Single operator tests + test_forward_pixel_shuffle() + test_forward_add() + test_forward_subtract() + test_forward_multiply() + test_forward_matmul() + test_forward_rsub() + test_forward_onehot() + test_forward_embedding() + test_forward_reshape() + test_forward_reciprocal() + test_forward_repeat() + test_forward_repeat_interleave() + test_forward_squeeze() + test_forward_unsqueeze() + test_forward_concatenate() + test_forward_reduce_sum() + test_forward_reduce_prod() + test_forward_argmin() + test_forward_argmax() + test_forward_norm() + test_forward_frobenius_norm() + test_forward_std() + test_forward_variance() + test_forward_relu() + test_forward_prelu() + test_forward_leakyrelu() + test_forward_elu() + test_forward_celu() + test_forward_gelu() + test_forward_selu() + test_forward_log_sigmoid() + test_forward_adaptiveavgpool() + test_forward_maxpool2d() + test_forward_maxpool1d() + test_forward_maxpool3d() + test_forward_hardtanh() + test_forward_conv() + test_forward_conv_transpose() + test_forward_threshold() + test_forward_contiguous() + test_forward_batchnorm() + test_forward_instancenorm() + test_forward_layernorm() + test_forward_groupnorm() + test_forward_transpose() + test_forward_size() + test_forward_view() + test_forward_select() + test_forward_take() + test_forward_topk() + test_forward_where() + test_forward_addcdiv() + test_forward_addcmul() + test_forward_true_divide() + test_forward_clone() + test_forward_softplus() + test_forward_softsign() + test_forward_logsoftmax() + test_forward_sigmoid() + test_forward_dense() + test_forward_avgpool() + test_forward_avgpool3d() + test_forward_dropout() + test_forward_slice() + test_forward_mean() + test_forward_expand() + test_forward_pow() + test_forward_unary() + test_forward_clamp() + test_forward_clamp_() + test_forward_logical_not() + test_forward_bitwise_not() + test_forward_bitwise_xor() + test_forward_logical_xor() + test_forward_isfinite() + test_forward_isnan() + test_forward_isinf() + test_forward_ones() + test_forward_ones_like() + test_forward_zeros() + test_forward_zeros_like() + test_forward_full() + test_forward_full_like() + test_forward_linspace() + test_forward_arange() + test_forward_mesh_grid() + test_forward_chunk() + test_forward_split() + test_forward_gather() + test_upsample() + test_forward_upsample3d() + test_forward_nms() + test_forward_roi_align() + test_to() + test_flatten() + test_type_as() + test_forward_functional_pad() + test_forward_zero_pad2d() + test_forward_constant_pad1d() + test_forward_constant_pad2d() + test_forward_constant_pad3d() + test_forward_reflection_pad1d() + test_forward_reflection_pad2d() + test_forward_replication_pad1d() + test_forward_replication_pad2d() + test_forward_replication_pad3d() + test_adaptive_pool3d() + test_conv3d() + test_conv3d_transpose() + test_forward_index() + test_min_max() + test_logsumexp() + test_stack() + test_stack_dynamic() + test_forward_unbind() + test_forward_nonzero() + test_forward_scatter() + test_numel() + test_bincount() + + # Model tests + test_resnet18() + test_squeezenet1_0() + test_squeezenet1_1() + test_densenet121() + # disable inception test for now, since loading it takes ~5min on torchvision-0.5 due to scipy bug + # See https://discuss.pytorch.org/t/torchvisions-inception-v3-takes-much-longer-to-load-than-other-models/68756 + # test_inception_v3() + test_googlenet() + test_mnasnet0_5() + test_mobilenet_v2() + + test_custom_conversion_map() + + test_segmentaton_models() + test_3d_models() test_maskrcnn_resnet50() - # # some structural tests - # test_forward_traced_function() - # test_forward_dtypes() - # test_weight_names() - # test_duplicate_weight_use() - - # # Single operator tests - # test_forward_pixel_shuffle() - # test_forward_add() - # test_forward_subtract() - # test_forward_multiply() - # test_forward_matmul() - # test_forward_rsub() - # test_forward_onehot() - # test_forward_embedding() - # test_forward_reshape() - # test_forward_reciprocal() - # test_forward_repeat() - # test_forward_repeat_interleave() - # test_forward_squeeze() - # test_forward_unsqueeze() - # test_forward_concatenate() - # test_forward_reduce_sum() - # test_forward_reduce_prod() - # test_forward_argmin() - # test_forward_argmax() - # test_forward_norm() - # test_forward_frobenius_norm() - # test_forward_std() - # test_forward_variance() - # test_forward_relu() - # test_forward_prelu() - # test_forward_leakyrelu() - # test_forward_elu() - # test_forward_celu() - # test_forward_gelu() - # test_forward_selu() - # test_forward_log_sigmoid() - # test_forward_adaptiveavgpool() - # test_forward_maxpool2d() - # test_forward_maxpool1d() - # test_forward_maxpool3d() - # test_forward_hardtanh() - # test_forward_conv() - # test_forward_conv_transpose() - # test_forward_threshold() - # test_forward_contiguous() - # test_forward_batchnorm() - # test_forward_instancenorm() - # test_forward_layernorm() - # test_forward_groupnorm() - # test_forward_transpose() - # test_forward_size() - # test_forward_view() - # test_forward_select() - # test_forward_take() - # test_forward_topk() - # test_forward_where() - # test_forward_addcdiv() - # test_forward_addcmul() - # test_forward_true_divide() - # test_forward_clone() - # test_forward_softplus() - # test_forward_softsign() - # test_forward_logsoftmax() - # test_forward_sigmoid() - # test_forward_dense() - # test_forward_avgpool() - # test_forward_avgpool3d() - # test_forward_dropout() - # test_forward_slice() - # test_forward_mean() - # test_forward_expand() - # test_forward_pow() - # test_forward_unary() - # test_forward_clamp() - # test_forward_clamp_() - # test_forward_logical_not() - # test_forward_bitwise_not() - # test_forward_bitwise_xor() - # test_forward_logical_xor() - # test_forward_isfinite() - # test_forward_isnan() - # test_forward_isinf() - # test_forward_ones() - # test_forward_ones_like() - # test_forward_zeros() - # test_forward_zeros_like() - # test_forward_full() - # test_forward_full_like() - # test_forward_linspace() - # test_forward_arange() - # test_forward_mesh_grid() - # test_forward_chunk() - # test_forward_split() - # test_forward_gather() - # test_upsample() - # test_forward_upsample3d() - # test_forward_nms() - # test_forward_roi_align() - # test_to() - # test_flatten() - # test_type_as() - # test_forward_functional_pad() - # test_forward_zero_pad2d() - # test_forward_constant_pad1d() - # test_forward_constant_pad2d() - # test_forward_constant_pad3d() - # test_forward_reflection_pad1d() - # test_forward_reflection_pad2d() - # test_forward_replication_pad1d() - # test_forward_replication_pad2d() - # test_forward_replication_pad3d() - # test_adaptive_pool3d() - # test_conv3d() - # test_conv3d_transpose() - # test_forward_index() - # test_min_max() - # test_logsumexp() - # test_stack() - # test_stack_dynamic() - # test_forward_unbind() - # test_forward_nonzero() - # test_forward_scatter() - # test_numel() - # test_bincount() - - # # Model tests - # test_resnet18() - # test_squeezenet1_0() - # test_squeezenet1_1() - # test_densenet121() - # # disable inception test for now, since loading it takes ~5min on torchvision-0.5 due to scipy bug - # # See https://discuss.pytorch.org/t/torchvisions-inception-v3-takes-much-longer-to-load-than-other-models/68756 - # # test_inception_v3() - # test_googlenet() - # test_mnasnet0_5() - # test_mobilenet_v2() - - # test_custom_conversion_map() - - # test_segmentaton_models() - # test_3d_models() - - # # Quantization test - # from qnn_test import test_quantized_imagenet, test_quantized_modules - - # test_quantized_modules() - # test_quantized_imagenet() - - # # Test simple conditionals and loop - # test_control_flow() - # test_simple_rnn() - - # # More complex recurrent models - # from test_lstm import test_custom_lstm - - # test_custom_lstm() - - # # Test bert model - # test_forward_pretrained_bert_base_uncased() - - # # Test convert torch script(jit) with specific inputs' types - # test_convert_torch_script_with_input_types() + + # Quantization test + from qnn_test import test_quantized_imagenet, test_quantized_modules + + test_quantized_modules() + test_quantized_imagenet() + + # Test simple conditionals and loop + test_control_flow() + test_simple_rnn() + + # More complex recurrent models + from test_lstm import test_custom_lstm + + test_custom_lstm() + + # Test bert model + test_forward_pretrained_bert_base_uncased() + + # Test convert torch script(jit) with specific inputs' types + test_convert_torch_script_with_input_types() From 13be588c7b0bc602f1ec78f05dbb0c503dddac5e Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 22:54:22 +0000 Subject: [PATCH 18/34] Space typo --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index b8d42a1b5671..9654d699217b 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -213,7 +213,7 @@ class TensorRTRuntime : public JSONRuntimeBase { // Build engine. trt_engine_cache_[std::make_pair(symbol_name_, batch_size_)] = builder.BuildEngine(); LOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_ - << " with batch size" << batch_size_; + << " with batch size " << batch_size_; CacheEngineToDisk(); } From 87c92f8deb230166079b0a26f7779da5a7cd186e Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 24 Nov 2020 22:56:59 +0000 Subject: [PATCH 19/34] Change Log to Dlog --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 9654d699217b..805e7e1bc7c3 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -178,8 +178,8 @@ class TensorRTRuntime : public JSONRuntimeBase { void BuildEngine() { batch_size_ = data_entry_[input_var_eid_[0]]->shape[0]; if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) return; - LOG(INFO) << "Building new TensorRT engine for subgraph " << symbol_name_ << " with batch size " - << batch_size_; + DLOG(INFO) << "Building new TensorRT engine for subgraph " << symbol_name_ + << " with batch size " << batch_size_; const bool use_fp16 = dmlc::GetEnv("TVM_TENSORRT_USE_FP16", false); TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_, use_fp16, batch_size_); @@ -212,8 +212,8 @@ class TensorRTRuntime : public JSONRuntimeBase { // Build engine. trt_engine_cache_[std::make_pair(symbol_name_, batch_size_)] = builder.BuildEngine(); - LOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_ - << " with batch size " << batch_size_; + DLOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_ + << " with batch size " << batch_size_; CacheEngineToDisk(); } From 230c1257c18b32134b63212dd6a06193d6088614 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 25 Nov 2020 17:56:20 +0000 Subject: [PATCH 20/34] Move test to tensorrt.py --- tests/python/contrib/test_tensorrt.py | 99 +++++++++++++++++++ tests/python/frontend/pytorch/test_forward.py | 94 ------------------ 2 files changed, 99 insertions(+), 94 deletions(-) diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py index 8b61323a71ad..d32268d5f80d 100644 --- a/tests/python/contrib/test_tensorrt.py +++ b/tests/python/contrib/test_tensorrt.py @@ -26,6 +26,10 @@ from tvm.contrib import graph_runtime, utils from tvm.runtime.vm import VirtualMachine from tvm.relay import Any, GlobalVar, transform +from typing import Dict, Tuple, Union +from tvm.contrib.download import download +import cv2 +from tvm.relay.op.contrib import tensorrt def skip_codegen_test(): @@ -1034,5 +1038,100 @@ def set_func_attr(func, compile_name, symbol_name): tvm.ir.assert_structural_equal(mod_trt, mod_exp, map_free_vars=True) +def test_maskrcnn_resnet50() -> None: + """ + This function tests the working of pytorch maskrcnn with resnet50 as backbone with + VM and VM + TRT. Since the order of compiled model outputs is a bit different from + original pytorch model, it uses a custom logic for comparison check. + """ + if skip_codegen_test() or skip_runtime_test(): + return + + class TraceWrapper(torch.nn.Module): + """ + This class is a wrapper over the torch module to convert the outputs into traceable form + """ + + def __init__(self, model: torch.nn.Module) -> None: + super().__init__() + self.model = model + + def forward( + self, inp: torch.Tensor + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + out = self.model(inp) + return out[0]["boxes"], out[0]["scores"], out[0]["labels"], out[0]["masks"] + + def get_traced_maskrcnn_model(np_sample_input: np.ndarray) -> torch.jit.TopLevelTracedModule: + """ + This function takes a sample input and returns the traced maskrcnn model + """ + model_func = torchvision.models.detection.maskrcnn_resnet50_fpn + model = TraceWrapper(model_func(pretrained=True)) + model.eval() + inp = torch.Tensor(np.random.uniform(0.0, 250.0, size=np_sample_input.shape)) + + with torch.no_grad(): + out = model(inp) + traced_module = torch.jit.trace(model, inp) + traced_module.eval() + + return traced_module + + def get_maskrcnn_input(in_size: int) -> np.ndarray: + """ + This function gets a real image with multiple objects of interest and returns it. + """ + input_shape = (1, 3, in_size, in_size) + img_path = "test_street_small.jpg" + img_url = ( + "https://raw.githubusercontent.com/dmlc/web-data/" + "master/gluoncv/detection/street_small.jpg" + ) + download(img_url, img_path) + + img = cv2.imread(img_path).astype("float32") + img = cv2.resize(img, (in_size, in_size)) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = np.transpose(img / 255.0, [2, 0, 1]) + img = np.expand_dims(img, axis=0) + + return img + + in_size = 300 + np_sample_input = get_maskrcnn_input(in_size) + traced_module = get_traced_maskrcnn_model(np_sample_input) + vm_trt_exec = convert_traced_model_to_vm_trt(traced_module, np_sample_input, target="llvm") + ctx = tvm.cpu() + vm = tvm.runtime.vm.VirtualMachine(vm_trt_exec, ctx) + vm.set_input("main", **{"input0": np_sample_input}) + tvm_res = vm.run() + + # Descending sort by scores and get the high confidence indices. In this example 9 is chosen, + # because this image has 9 boxes over 0.9 confidence + num_high_confidence_boxes = 9 + tvm_indices = np.argsort(-1 * tvm_res[1].asnumpy())[:num_high_confidence_boxes] + + with torch.no_grad(): + out = traced_module(torch.Tensor(np_sample_input)) + # Descending sort by scores and get the high confidence indices + pt_indices = np.argsort(-1 * out[1].numpy())[:num_high_confidence_boxes] + + tol = [1e-1, 5e-3, 1e-5, 4e-1] # [Box Tol, Score Tol, Label Tol, Mask Tol] + # Because of certain ops, there are certain minor differences in TVM outputs and PT outputs, + # This means that the tolerance can't be 1e-4 or 1e-5 throughout. The ideal way to get around + # this is to test it on an entire dataset and compare mAP with the original model. + # However, since that is not practically possible on CI, the following compromise is made. + # These tolerances are chosen based on their impact or lack thereof to the mAP score, e.g: + # 0.1 pixel difference of a box in a 300X300 image wont make any change. + for i, tol_val in zip(range(4), tol): + np.testing.assert_allclose( + tvm_res[i].asnumpy()[tvm_indices], + out[i].numpy()[pt_indices], + rtol=tol_val, + atol=tol_val, + ) + + if __name__ == "__main__": pytest.main([__file__]) diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py index 404614db71b9..362046fc5817 100644 --- a/tests/python/frontend/pytorch/test_forward.py +++ b/tests/python/frontend/pytorch/test_forward.py @@ -3384,99 +3384,6 @@ def convert_traced_model_to_vm_trt( return vm_trt_exec -def test_maskrcnn_resnet50() -> None: - """ - This function tests the working of pytorch maskrcnn with resnet50 as backbone with - VM and VM + TRT. Since the order of compiled model outputs is a bit different from - original pytorch model, it uses a custom logic for comparison check. - """ - - class TraceWrapper(torch.nn.Module): - """ - This class is a wrapper over the torch module to convert the outputs into traceable form - """ - - def __init__(self, model: torch.nn.Module) -> None: - super().__init__() - self.model = model - - def forward( - self, inp: torch.Tensor - ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: - out = self.model(inp) - return out[0]["boxes"], out[0]["scores"], out[0]["labels"], out[0]["masks"] - - def get_traced_maskrcnn_model(np_sample_input: np.ndarray) -> torch.jit.TopLevelTracedModule: - """ - This function takes a sample input and returns the traced maskrcnn model - """ - model_func = torchvision.models.detection.maskrcnn_resnet50_fpn - model = TraceWrapper(model_func(pretrained=True)) - model.eval() - inp = torch.Tensor(np.random.uniform(0.0, 250.0, size=np_sample_input.shape)) - - with torch.no_grad(): - out = model(inp) - traced_module = torch.jit.trace(model, inp) - traced_module.eval() - - return traced_module - - def get_maskrcnn_input(in_size: int) -> np.ndarray: - """ - This function gets a real image with multiple objects of interest and returns it. - """ - input_shape = (1, 3, in_size, in_size) - img_path = "test_street_small.jpg" - img_url = ( - "https://raw.githubusercontent.com/dmlc/web-data/" - "master/gluoncv/detection/street_small.jpg" - ) - download(img_url, img_path) - - img = cv2.imread(img_path).astype("float32") - img = cv2.resize(img, (in_size, in_size)) - img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - img = np.transpose(img / 255.0, [2, 0, 1]) - img = np.expand_dims(img, axis=0) - - return img - - in_size = 300 - np_sample_input = get_maskrcnn_input(in_size) - traced_module = get_traced_maskrcnn_model(np_sample_input) - vm_trt_exec = convert_traced_model_to_vm_trt(traced_module, np_sample_input, target="llvm") - ctx = tvm.cpu() - vm = tvm.runtime.vm.VirtualMachine(vm_trt_exec, ctx) - vm.set_input("main", **{"input0": np_sample_input}) - tvm_res = vm.run() - - # Descending sort by scores and get the high confidence indices. In this example 9 is chosen, - # because this image has 9 boxes over 0.9 confidence - num_high_confidence_boxes = 9 - tvm_indices = np.argsort(-1 * tvm_res[1].asnumpy())[:num_high_confidence_boxes] - - with torch.no_grad(): - out = traced_module(torch.Tensor(np_sample_input)) - # Descending sort by scores and get the high confidence indices - pt_indices = np.argsort(-1 * out[1].numpy())[:num_high_confidence_boxes] - - tol = [1e-1, 5e-3, 1e-5, 4e-1] # [Box Tol, Score Tol, Label Tol, Mask Tol] - # Because of certain ops, there are certain minor differences in TVM outputs and PT outputs, - # This means that the tolerance can't be 1e-4 or 1e-5 throughout. The ideal way to get around - # this is to test it on an entire dataset and compare mAP with the original model. - # However, since that is not practically possible on CI, the following compromise is made. - # These tolerances are chosen based on their impact or lack thereof to the mAP score, e.g: - # 0.1 pixel difference of a box in a 300X300 image wont make any change. - for i, tol_val in zip(range(4), tol): - np.testing.assert_allclose( - tvm_res[i].asnumpy()[tvm_indices], - out[i].numpy()[pt_indices], - rtol=tol_val, - atol=tol_val, - ) - - if __name__ == "__main__": # some structural tests test_forward_traced_function() @@ -3621,7 +3528,6 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray: test_segmentaton_models() test_3d_models() - test_maskrcnn_resnet50() # Quantization test from qnn_test import test_quantized_imagenet, test_quantized_modules From d17c4d02bdf72eb7b65f337d66989f0d23d8f262 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 25 Nov 2020 17:57:02 +0000 Subject: [PATCH 21/34] Remove imports --- tests/python/frontend/pytorch/test_forward.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py index 362046fc5817..f4a960bbfd55 100644 --- a/tests/python/frontend/pytorch/test_forward.py +++ b/tests/python/frontend/pytorch/test_forward.py @@ -29,11 +29,7 @@ from tvm.contrib import graph_runtime from tvm.contrib.nvcc import have_fp16 import tvm.testing -from typing import Dict, Tuple, Union from packaging import version as package_version -from tvm.contrib.download import download -import cv2 -from tvm.relay.op.contrib import tensorrt sys.setrecursionlimit(10000) From 5bdc028f9bc6ee08ed37de8aaf2afec711e9ddf3 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 25 Nov 2020 17:58:25 +0000 Subject: [PATCH 22/34] Remove function --- tests/python/frontend/pytorch/test_forward.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py index f4a960bbfd55..6250dfff811a 100644 --- a/tests/python/frontend/pytorch/test_forward.py +++ b/tests/python/frontend/pytorch/test_forward.py @@ -3363,23 +3363,6 @@ def test_fn(x, weights=None): verify_trace_model(test_fn, [inp, weights.to(torch.float64)], ["llvm"]) -def convert_traced_model_to_vm_trt( - traced_module: torch.jit.TopLevelTracedModule, np_sample_input: np.ndarray, target: str -) -> tvm.runtime.vm.Executable: - """ - This function converts a traced pytorch model to VM + TRT. - """ - input_shape = np_sample_input.shape - input_name = "input0" - shape_list = [(input_name, input_shape)] - mod, params = relay.frontend.from_pytorch(traced_module, shape_list) - mod, config = tensorrt.partition_for_tensorrt(mod, params, remove_no_mac_subgraphs=True) - with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]): - vm_trt_exec = relay.vm.compile(mod, target=target, params=params) - - return vm_trt_exec - - if __name__ == "__main__": # some structural tests test_forward_traced_function() From cab81b7c4177edfaa682dfcbb4b7ebff4c37cd52 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 25 Nov 2020 17:58:55 +0000 Subject: [PATCH 23/34] Add it to trt --- tests/python/contrib/test_tensorrt.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py index d32268d5f80d..b3a8e1070033 100644 --- a/tests/python/contrib/test_tensorrt.py +++ b/tests/python/contrib/test_tensorrt.py @@ -1038,6 +1038,23 @@ def set_func_attr(func, compile_name, symbol_name): tvm.ir.assert_structural_equal(mod_trt, mod_exp, map_free_vars=True) +def convert_traced_model_to_vm_trt( + traced_module: torch.jit.TopLevelTracedModule, np_sample_input: np.ndarray, target: str +) -> tvm.runtime.vm.Executable: + """ + This function converts a traced pytorch model to VM + TRT. + """ + input_shape = np_sample_input.shape + input_name = "input0" + shape_list = [(input_name, input_shape)] + mod, params = relay.frontend.from_pytorch(traced_module, shape_list) + mod, config = tensorrt.partition_for_tensorrt(mod, params, remove_no_mac_subgraphs=True) + with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]): + vm_trt_exec = relay.vm.compile(mod, target=target, params=params) + + return vm_trt_exec + + def test_maskrcnn_resnet50() -> None: """ This function tests the working of pytorch maskrcnn with resnet50 as backbone with From 555505df262b9e7e2b5d0f0f4e89ce39d521f2a7 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 25 Nov 2020 20:41:51 +0000 Subject: [PATCH 24/34] import error --- tests/python/contrib/test_tensorrt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py index b3a8e1070033..1a0fddc23ce7 100644 --- a/tests/python/contrib/test_tensorrt.py +++ b/tests/python/contrib/test_tensorrt.py @@ -28,7 +28,6 @@ from tvm.relay import Any, GlobalVar, transform from typing import Dict, Tuple, Union from tvm.contrib.download import download -import cv2 from tvm.relay.op.contrib import tensorrt @@ -1106,6 +1105,7 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray: "master/gluoncv/detection/street_small.jpg" ) download(img_url, img_path) + import cv2 img = cv2.imread(img_path).astype("float32") img = cv2.resize(img, (in_size, in_size)) From ee41e0775877932335b10680532ad2ec1b691e75 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 30 Nov 2020 08:05:54 +0000 Subject: [PATCH 25/34] Imports --- tests/python/contrib/test_tensorrt.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py index 1a0fddc23ce7..6b4e5c0fce97 100644 --- a/tests/python/contrib/test_tensorrt.py +++ b/tests/python/contrib/test_tensorrt.py @@ -29,6 +29,8 @@ from typing import Dict, Tuple, Union from tvm.contrib.download import download from tvm.relay.op.contrib import tensorrt +import torch +import torchvision def skip_codegen_test(): @@ -1151,4 +1153,5 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray: if __name__ == "__main__": - pytest.main([__file__]) + test_maskrcnn_resnet50() + # pytest.main([__file__]) From 51b5c8acaa456c4a1157926ff8f86dcaf142cc97 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 30 Nov 2020 08:57:57 +0000 Subject: [PATCH 26/34] Add torch to CI --- tests/scripts/task_ci_python_setup.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/scripts/task_ci_python_setup.sh b/tests/scripts/task_ci_python_setup.sh index fe88ac650cc8..9d9f03ef71d8 100755 --- a/tests/scripts/task_ci_python_setup.sh +++ b/tests/scripts/task_ci_python_setup.sh @@ -31,3 +31,4 @@ set -o pipefail echo "Addtiional setup in" ${CI_IMAGE_NAME} python3 -m pip install --user tlcpack-sphinx-addon==0.1.2 synr==0.2.1 +python3 -m pip install --user torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html From 6fea65875838be02271033fb7cb075a4dd40b536 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 30 Nov 2020 17:49:22 +0000 Subject: [PATCH 27/34] trt_test --- tests/python/contrib/test_tensorrt.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py index 6b4e5c0fce97..6510f14adcde 100644 --- a/tests/python/contrib/test_tensorrt.py +++ b/tests/python/contrib/test_tensorrt.py @@ -21,6 +21,9 @@ import tvm import tvm.relay.testing +import torch +import torchvision + from tvm import relay from tvm.relay.op.contrib import tensorrt from tvm.contrib import graph_runtime, utils @@ -29,8 +32,6 @@ from typing import Dict, Tuple, Union from tvm.contrib.download import download from tvm.relay.op.contrib import tensorrt -import torch -import torchvision def skip_codegen_test(): @@ -1153,5 +1154,4 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray: if __name__ == "__main__": - test_maskrcnn_resnet50() - # pytest.main([__file__]) + pytest.main([__file__]) From ea4af58486b33c61376b936c3a5a086ba8e70e15 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 30 Nov 2020 20:34:25 +0000 Subject: [PATCH 28/34] Check test --- tests/python/contrib/test_tensorrt.py | 31 ++++++++++++++++----------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py index 6510f14adcde..ccc3dadc4c45 100644 --- a/tests/python/contrib/test_tensorrt.py +++ b/tests/python/contrib/test_tensorrt.py @@ -21,8 +21,6 @@ import tvm import tvm.relay.testing -import torch -import torchvision from tvm import relay from tvm.relay.op.contrib import tensorrt @@ -1040,7 +1038,20 @@ def set_func_attr(func, compile_name, symbol_name): tvm.ir.assert_structural_equal(mod_trt, mod_exp, map_free_vars=True) -def convert_traced_model_to_vm_trt( + +def test_maskrcnn_resnet50() -> None: + """ + This function tests the working of pytorch maskrcnn with resnet50 as backbone with + VM and VM + TRT. Since the order of compiled model outputs is a bit different from + original pytorch model, it uses a custom logic for comparison check. + """ + if skip_codegen_test(): + return + + import torch + import torchvision + + def convert_traced_model_to_vm_trt( traced_module: torch.jit.TopLevelTracedModule, np_sample_input: np.ndarray, target: str ) -> tvm.runtime.vm.Executable: """ @@ -1056,16 +1067,6 @@ def convert_traced_model_to_vm_trt( return vm_trt_exec - -def test_maskrcnn_resnet50() -> None: - """ - This function tests the working of pytorch maskrcnn with resnet50 as backbone with - VM and VM + TRT. Since the order of compiled model outputs is a bit different from - original pytorch model, it uses a custom logic for comparison check. - """ - if skip_codegen_test() or skip_runtime_test(): - return - class TraceWrapper(torch.nn.Module): """ This class is a wrapper over the torch module to convert the outputs into traceable form @@ -1122,6 +1123,10 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray: np_sample_input = get_maskrcnn_input(in_size) traced_module = get_traced_maskrcnn_model(np_sample_input) vm_trt_exec = convert_traced_model_to_vm_trt(traced_module, np_sample_input, target="llvm") + + if skip_runtime_test(): + return + ctx = tvm.cpu() vm = tvm.runtime.vm.VirtualMachine(vm_trt_exec, ctx) vm.set_input("main", **{"input0": np_sample_input}) From 49a4fa707fab5801474701f32f6b8fa801798fbd Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 30 Nov 2020 20:35:34 +0000 Subject: [PATCH 29/34] Revert Pytorch install --- tests/scripts/task_ci_python_setup.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/scripts/task_ci_python_setup.sh b/tests/scripts/task_ci_python_setup.sh index 9d9f03ef71d8..fe88ac650cc8 100755 --- a/tests/scripts/task_ci_python_setup.sh +++ b/tests/scripts/task_ci_python_setup.sh @@ -31,4 +31,3 @@ set -o pipefail echo "Addtiional setup in" ${CI_IMAGE_NAME} python3 -m pip install --user tlcpack-sphinx-addon==0.1.2 synr==0.2.1 -python3 -m pip install --user torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html From 28bc758299cdde0e0dc9a683ec550a289eede7d4 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 30 Nov 2020 20:44:21 +0000 Subject: [PATCH 30/34] Fix --- tests/python/contrib/test_tensorrt.py | 29 +++++++++++++-------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py index ccc3dadc4c45..67d2e917b7dc 100644 --- a/tests/python/contrib/test_tensorrt.py +++ b/tests/python/contrib/test_tensorrt.py @@ -1038,7 +1038,6 @@ def set_func_attr(func, compile_name, symbol_name): tvm.ir.assert_structural_equal(mod_trt, mod_exp, map_free_vars=True) - def test_maskrcnn_resnet50() -> None: """ This function tests the working of pytorch maskrcnn with resnet50 as backbone with @@ -1052,20 +1051,20 @@ def test_maskrcnn_resnet50() -> None: import torchvision def convert_traced_model_to_vm_trt( - traced_module: torch.jit.TopLevelTracedModule, np_sample_input: np.ndarray, target: str -) -> tvm.runtime.vm.Executable: - """ - This function converts a traced pytorch model to VM + TRT. - """ - input_shape = np_sample_input.shape - input_name = "input0" - shape_list = [(input_name, input_shape)] - mod, params = relay.frontend.from_pytorch(traced_module, shape_list) - mod, config = tensorrt.partition_for_tensorrt(mod, params, remove_no_mac_subgraphs=True) - with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]): - vm_trt_exec = relay.vm.compile(mod, target=target, params=params) + traced_module: torch.jit.TopLevelTracedModule, np_sample_input: np.ndarray, target: str + ) -> tvm.runtime.vm.Executable: + """ + This function converts a traced pytorch model to VM + TRT. + """ + input_shape = np_sample_input.shape + input_name = "input0" + shape_list = [(input_name, input_shape)] + mod, params = relay.frontend.from_pytorch(traced_module, shape_list) + mod, config = tensorrt.partition_for_tensorrt(mod, params, remove_no_mac_subgraphs=True) + with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]): + vm_trt_exec = relay.vm.compile(mod, target=target, params=params) - return vm_trt_exec + return vm_trt_exec class TraceWrapper(torch.nn.Module): """ @@ -1125,7 +1124,7 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray: vm_trt_exec = convert_traced_model_to_vm_trt(traced_module, np_sample_input, target="llvm") if skip_runtime_test(): - return + return ctx = tvm.cpu() vm = tvm.runtime.vm.VirtualMachine(vm_trt_exec, ctx) From cf3016bfcac8ce1031f81792736d0c18bee64455 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 30 Nov 2020 21:02:31 +0000 Subject: [PATCH 31/34] test dynamic batch --- tests/python/contrib/test_tensorrt.py | 62 +++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py index 67d2e917b7dc..4be6d84f47b1 100644 --- a/tests/python/contrib/test_tensorrt.py +++ b/tests/python/contrib/test_tensorrt.py @@ -1038,6 +1038,68 @@ def set_func_attr(func, compile_name, symbol_name): tvm.ir.assert_structural_equal(mod_trt, mod_exp, map_free_vars=True) +def test_tensorrt_dynamic_batch(): + if skip_codegen_test(): + return + + batches_to_test = [1, 1, 2, 3, 1, 3, 2] + x_shape = (relay.Any(), 1, 8, 8) + x_data = np.ones([max(batches_to_test)] + list(x_shape)[1:]).astype("float32") + result_dict = {} + for use_trt in [True, False]: + x = relay.var("x", shape=x_shape, dtype="float32") + out = relay.nn.relu(x) + f = relay.Function([x], out) + mod = tvm.IRModule() + mod["main"] = f + if use_trt: + mod = relay.tensorrt.EnableTrt(mod) + + if not skip_runtime_test(): + with relay.build_config(opt_level=3): + relay_exec = relay.create_executor("vm", mod=mod, ctx=tvm.cpu(0), target="llvm") + + for i, batch_size in enumerate(batches_to_test): + result_dict[(i, use_trt)] = relay_exec.evaluate()(x_data[:batch_size, ...]) + + if not skip_runtime_test(): + for i in range(len(batches_to_test)): + assert_result_matches(result_dict[(i, True)], result_dict[(i, False)]) + + +def test_tensorrt_dynamic_batch_conv(): + if skip_codegen_test(): + return + batches_to_test = [1, 1, 2, 3, 1, 3, 2] + x_shape = (relay.Any(), 32, 8, 8) + x_data = np.ones([max(batches_to_test)] + list(x_shape)[1:]).astype("float32") + k_shape = (16, 32, 3, 3) + params = {"kernel": np.random.uniform(-1, 1, k_shape).astype("float32")} + result_dict = {} + for use_trt in [True, False]: + x = relay.var("x", shape=x_shape, dtype="float32") + kernel = relay.var("kernel", shape=k_shape, dtype="float32") + out = relay.nn.conv2d(x, kernel, channels=16, kernel_size=(3, 3), groups=1) + f = relay.Function([x, kernel], out) + mod = tvm.IRModule() + mod["main"] = f + if use_trt: + mod = tensorrt.partition_for_tensorrt(mod, params) + + if not skip_runtime_test(): + with relay.build_config(opt_level=3): + relay_exec = relay.create_executor("vm", mod=mod, ctx=tvm.cpu(0), target="llvm") + + for i, batch_size in enumerate(batches_to_test): + result_dict[(i, use_trt)] = relay_exec.evaluate()( + x=x_data[:batch_size, ...], **params + ) + + if not skip_runtime_test(): + for i in range(len(batches_to_test)): + assert_result_matches(result_dict[(i, True)], result_dict[(i, False)]) + + def test_maskrcnn_resnet50() -> None: """ This function tests the working of pytorch maskrcnn with resnet50 as backbone with From 56c75bf80d783fd54c66ab722cd9cbc2d5f5fbe7 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 30 Nov 2020 21:19:48 +0000 Subject: [PATCH 32/34] TRT --- python/tvm/relay/op/contrib/tensorrt.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py index 3fc3e5a0393f..3b697f6bdfcd 100644 --- a/python/tvm/relay/op/contrib/tensorrt.py +++ b/python/tvm/relay/op/contrib/tensorrt.py @@ -652,7 +652,7 @@ def reshape_annotate_fn(expr): # pylint: disable=unused-variable new_shape[i] = original_volume // np.prod([x for x in new_shape if x != -1]) # Remove batch dimension and see if volumes match if shape[0] != new_shape[0]: - print("reshape: can't modify batch dimension.") + logger.info("reshape: can't modify batch dimension.") return False return True @@ -850,7 +850,7 @@ def __init__(self): self.is_compute_intensive = False def visit_call(self, call): - heavy_ops = set( + compute_intensive_ops = set( [ "nn.conv2d", "nn.conv2d_transpose", @@ -861,12 +861,15 @@ def visit_call(self, call): ] ) if isinstance(call.op, tvm.tir.op.Op): - if str(call.op) in heavy_ops: + if str(call.op) in compute_intensive_ops: self.is_compute_intensive = True return super().visit_call(call) - def is_graph_compute_intensive(self, subgraph): + def is_graph_compute_intensive(self, subgraph) -> bool: + """ + This function recursively visits the graph and checks if it's compute intensive" + """ self.visit(subgraph) return self.is_compute_intensive From dc6aaef908c9e83496d1d2b2ee1f60561196fd56 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 30 Nov 2020 21:38:01 +0000 Subject: [PATCH 33/34] Resolve PR comments --- python/tvm/relay/op/contrib/tensorrt.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py index 3b697f6bdfcd..acd4f4740b2d 100644 --- a/python/tvm/relay/op/contrib/tensorrt.py +++ b/python/tvm/relay/op/contrib/tensorrt.py @@ -206,6 +206,9 @@ def _func_wrapper(expr): ] for arg in args ] + # Batched multiply operations don't work in implicit batch mode. The following shapes + # have been excluded because they occur in PT MaskRCNN model. The long term solution is + # to switch to explicit batch mode after performance regressions are solved. if all( [list(map(int, shape)) in [[300, 64, 7, 7], [300, 1, 1, 1]] for shape in shapes] ): @@ -881,12 +884,15 @@ def is_valid_subgraph(params, body): input_batch_sizes = [] for var in params: # In implicit batch mode, all inputs must have same batch size + # TODO: (codeislife99) : Fix different dynamic batch size inputs + if isinstance(var.checked_type, relay.TupleType): for tupe_type in var.checked_type.fields: # Scalar inputs not allowed if len(tupe_type.shape) == 0: logger.info("tensorrt: scalar inputs not supported") return False + if not isinstance(tupe_type.shape[0], tvm.tir.expr.Any): input_batch_sizes.append(int(tupe_type.shape[0])) else: From 121c46bc95f5c13fddf1dc0afb3188968772fab9 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 30 Nov 2020 22:58:54 +0000 Subject: [PATCH 34/34] Zero batch size add --- src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 1 + tests/python/contrib/test_tensorrt.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc index 805e7e1bc7c3..3f87f8d00ee6 100644 --- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc +++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc @@ -113,6 +113,7 @@ class TensorRTRuntime : public JSONRuntimeBase { void Run() override { BuildEngine(); batch_size_ = data_entry_[input_var_eid_[0]]->shape[0]; + if (batch_size_ == 0) return; auto& engine_and_context = trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_)); auto engine = engine_and_context.engine; auto context = engine_and_context.context; diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py index 4be6d84f47b1..10c311a6d363 100644 --- a/tests/python/contrib/test_tensorrt.py +++ b/tests/python/contrib/test_tensorrt.py @@ -1042,7 +1042,7 @@ def test_tensorrt_dynamic_batch(): if skip_codegen_test(): return - batches_to_test = [1, 1, 2, 3, 1, 3, 2] + batches_to_test = [1, 1, 0, 2, 3, 0, 1, 3, 2] x_shape = (relay.Any(), 1, 8, 8) x_data = np.ones([max(batches_to_test)] + list(x_shape)[1:]).astype("float32") result_dict = {} @@ -1070,7 +1070,7 @@ def test_tensorrt_dynamic_batch(): def test_tensorrt_dynamic_batch_conv(): if skip_codegen_test(): return - batches_to_test = [1, 1, 2, 3, 1, 3, 2] + batches_to_test = [1, 1, 0, 2, 3, 0, 1, 3, 2] x_shape = (relay.Any(), 32, 8, 8) x_data = np.ones([max(batches_to_test)] + list(x_shape)[1:]).astype("float32") k_shape = (16, 32, 3, 3)