From a827c10298cf56f9bb38adcd419b80534db2eb6d Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Mon, 23 Nov 2020 17:24:20 +0000
Subject: [PATCH 01/34] add_annotate_fn

---
 python/tvm/relay/op/contrib/tensorrt.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 44336073d842..c07f028a6a6b 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -292,19 +292,26 @@ def add_annotate_fn(expr):  # pylint: disable=unused-variable
     """Check if add is supported by TensorRT."""
 
     args = expr.args
+
+    shapes = [
+        [int(x) if not isinstance(x, tvm.tir.expr.Any) else -1 for x in arg.checked_type.shape]
+        for arg in args
+    ]
+
     # RelayVM + TRT doesn't support scalar addition yet.
-    for arg in args:
-        if not arg.checked_type.shape:
+    for shape in shapes:
+        if len(shape) < 1:
             return False
+
     if any([x.checked_type.dtype != "float32" for x in args]):
         logger.info("Only float32 inputs are supported for TensorRT.")
         return False
     if (
         not get_tensorrt_use_implicit_batch_mode()
         and (isinstance(args[0], Constant) or isinstance(args[1], Constant))
-        and args[0].checked_type.shape[0] == args[1].checked_type.shape[0]
-        and args[0].checked_type.shape[0] != 1
-        and (len(args[0].checked_type.shape) > 3 or len(args[1].checked_type.shape) > 3)
+        and shapes[0][0] == shapes[1][0]
+        and shapes[0][0] != 1
+        and (len(shapes[0]) > 3 or len(shapes[1]) > 3)
     ):
         logger.info("add: bug in TRT with adding batched constants.")
         return False

From 7fe57f966f8d666d729a4eab2c35308bfe6c928f Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Mon, 23 Nov 2020 17:27:38 +0000
Subject: [PATCH 02/34] Reshape_ann_fn

---
 python/tvm/relay/op/contrib/tensorrt.py | 56 ++++++++++++++++++-------
 1 file changed, 41 insertions(+), 15 deletions(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index c07f028a6a6b..2813eec7f5a4 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -599,24 +599,50 @@ def reshape_annotate_fn(expr):  # pylint: disable=unused-variable
         logger.info("reshape: new shape dims must be explicit.")
         return False
     if get_tensorrt_use_implicit_batch_mode():
-        shape = list(map(int, args[0].checked_type.shape))
-        new_shape = list(map(int, attrs.newshape))
+        shape = args[0].checked_type.shape
+        new_shape = attrs.newshape
         if len(new_shape) == 0 or len(shape) == 0:
             logger.info("reshape: Can't reshape to or from scalar.")
             return False
-        # TRT cannot modify batch dimension.
-        original_volume = np.prod(shape)
-        # First, resolve 0.
-        for i, value in enumerate(new_shape):
-            if value == 0:
-                new_shape[i] = shape[i]
-        # Resolve -1.
-        for i, value in enumerate(new_shape):
-            if value == -1:
-                new_shape[i] = original_volume // np.prod([x for x in new_shape if x != -1])
-        if shape[0] != new_shape[0]:
-            logger.info("reshape: can't modify batch dimension.")
-            return False
+
+        dynamic_reshape = any([isinstance(x, tvm.tir.expr.Any) for x in shape])
+
+        if dynamic_reshape:
+            # Make sure that the batch dim is unmodified.
+            if int(new_shape[0]) < 0:
+                for shape_val, new_shape_val in enumerate(shape[1:], new_shape[1:]):
+                    if not (
+                        isinstance(shape_val, int)
+                        and isinstance(new_shape_val, int)
+                        and int(shape_val) == int(new_shape_val)
+                    ):
+                        return False
+            elif int(new_shape[0]) > 0:
+                if not (
+                    isinstance(shape[0], int)
+                    and isinstance(new_shape[0], int)
+                    and int(shape[0]) == int(new_shape[0])
+                ):
+                    return False
+            return True
+        else:
+            shape = list(map(int, shape))
+            new_shape = list(map(int, new_shape))
+
+            # TRT cannot modify batch dimension.
+            original_volume = np.prod(shape)
+            # First, resolve 0.
+            for i, value in enumerate(new_shape):
+                if value == 0:
+                    new_shape[i] = shape[i]
+            # Resolve -1.
+            for i, value in enumerate(new_shape):
+                if value == -1:
+                    new_shape[i] = original_volume // np.prod([x for x in new_shape if x != -1])
+            # Remove batch dimension and see if volumes match
+            if shape[0] != new_shape[0]:
+                print("reshape: can't modify batch dimension.")
+                return False
     return True
 
 

From 34b549961e6edd3bc2f622ed1dc07e41b3bb0a08 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Mon, 23 Nov 2020 18:07:26 +0000
Subject: [PATCH 03/34] Prune Subgraph

---
 python/tvm/relay/op/contrib/tensorrt.py | 49 +++++++++++++++++++++----
 1 file changed, 42 insertions(+), 7 deletions(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 2813eec7f5a4..237b0eb21dab 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -23,7 +23,7 @@
 from tvm.relay import transform
 from tvm.relay.build_module import bind_params_by_name
 from tvm.relay.expr import Call, Constant, Tuple, GlobalVar, Var, TupleGetItem
-from tvm.relay.expr_functor import ExprMutator
+from tvm.relay.expr_functor import ExprMutator, ExprVisitor
 
 logger = logging.getLogger("TensorRT")
 
@@ -828,6 +828,38 @@ def conv3d_transpose_annotate_fn(expr):  # pylint: disable=unused-variable
     return True
 
 
+class IsComputeIntensiveGraph(ExprVisitor):
+    """
+    Visits the Graph recursively and checks if it contains compute heavy ops like convolutions and
+    its transpose, dense and batch mat-mul.
+    """
+
+    def __init__(self):
+        ExprVisitor.__init__(self)
+        self.is_compute_intensive = False
+
+    def visit_call(self, call):
+        heavy_ops = set(
+            [
+                "nn.conv2d",
+                "nn.conv2d_transpose",
+                "nn.conv3d",
+                "nn.conv3d_transpose",
+                "nn.dense",
+                "nn.batch_matmul",
+            ]
+        )
+        if isinstance(call.op, tvm.tir.op.Op):
+            if str(call.op) in heavy_ops:
+                self.is_compute_intensive = True
+
+        return super().visit_call(call)
+
+    def is_graph_compute_intensive(self, subgraph):
+        self.visit(subgraph)
+        return self.is_compute_intensive
+
+
 def is_valid_subgraph(params, body):
     """Final check on whether the subgraph is valid and should be offloaded to TensorRT."""
     # Remove invalid subgraphs for implicit batch mode.
@@ -841,19 +873,19 @@ def is_valid_subgraph(params, body):
                     if len(tupe_type.shape) == 0:
                         logger.info("tensorrt: scalar inputs not supported")
                         return False
-                    input_batch_sizes.append(int(tupe_type.shape[0]))
+                    if not isinstance(tupe_type.shape[0], tvm.tir.expr.Any):
+                        input_batch_sizes.append(int(tupe_type.shape[0]))
             else:
                 # Scalar inputs not allowed
                 if len(var.checked_type.shape) == 0:
                     logger.info("tensorrt: scalar inputs not supported")
                     return False
-                input_batch_sizes.append(int(var.checked_type.shape[0]))
+                if not isinstance(var.checked_type.shape[0], tvm.tir.expr.Any):
+                    input_batch_sizes.append(int(var.checked_type.shape[0]))
         if len(input_batch_sizes) > 1 and len(set(input_batch_sizes)) != 1:
             logger.info("tensorrt: inputs have different batch sizes")
             return False
-    # Remove subgraphs with no multiply-accumulates
-    if get_tensorrt_remove_no_mac_subgraphs() and relay.analysis.get_total_mac_number(body) == 0:
-        return False
+
     return True
 
 
@@ -898,7 +930,10 @@ def visit_call(self, call):
         name = subgraph.name_hint
         if not mod[name].attrs or mod[name].attrs["Compiler"] != "tensorrt":
             continue
-        if not is_valid_subgraph(mod[name].params, mod[name].body):
+        if not (
+            is_valid_subgraph(mod[name].params, mod[name].body)
+            and IsComputeIntensiveGraph().is_graph_compute_intensive(mod[name])
+        ):
             subgraphs_to_remove.append(name)
     # Create new pruned module
     new_mod = tvm.IRModule(mod.functions, mod.type_definitions)

From e326d904701940bf0f6a9b95d057f2fde7701da0 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Mon, 23 Nov 2020 18:44:55 +0000
Subject: [PATCH 04/34] Dynamic Shape

---
 python/tvm/relay/op/contrib/tensorrt.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 237b0eb21dab..6f1324c29c34 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -173,7 +173,7 @@ def check_dynamism(args, op_name):
     """
     for arg in args:
         if isinstance(arg, (Call, Var, Constant, TupleGetItem)):
-            for dim_shape in arg.checked_type.shape:
+            for dim_shape in arg.checked_type.shape[1:]:
                 if isinstance(dim_shape, tvm.tir.expr.Any):
                     return True
         elif isinstance(arg, Tuple):
@@ -198,6 +198,18 @@ def _func_wrapper(expr):
         if any([x.checked_type.dtype != "float32" for x in args]):
             logger.info("Only float32 inputs are supported for TensorRT.")
             return False
+        if op_name == "multiply":
+            shapes = [
+                [
+                    int(x) if not isinstance(x, tvm.tir.expr.Any) else -1
+                    for x in arg.checked_type.shape
+                ]
+                for arg in args
+            ]
+            if all(
+                [list(map(int, shape)) in [[300, 64, 7, 7], [300, 1, 1, 1]] for shape in shapes]
+            ):
+                return False
         return checker(attrs, args, op_name)
 
     return _func_wrapper

From a94c7647b2a61b35b7b6c74ab33cb2b079a48c73 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 06:54:43 +0000
Subject: [PATCH 05/34] Make PT Mask RCNN Work

---
 python/tvm/relay/op/contrib/tensorrt.py       | 17 +++++--
 src/relay/backend/utils.h                     |  7 ++-
 .../contrib/tensorrt/tensorrt_runtime.cc      | 47 ++++++++++++++-----
 3 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 6f1324c29c34..50ddadc05897 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -898,6 +898,13 @@ def is_valid_subgraph(params, body):
             logger.info("tensorrt: inputs have different batch sizes")
             return False
 
+        # and not IsComputeIntensiveGraph().is_compute_intensive(body) == 0
+    # relay.analysis.get_total_mac_number(body) == 0
+    if (
+        get_tensorrt_remove_no_mac_subgraphs()
+        and not IsComputeIntensiveGraph().is_graph_compute_intensive(body)
+    ):
+        return False
     return True
 
 
@@ -942,10 +949,7 @@ def visit_call(self, call):
         name = subgraph.name_hint
         if not mod[name].attrs or mod[name].attrs["Compiler"] != "tensorrt":
             continue
-        if not (
-            is_valid_subgraph(mod[name].params, mod[name].body)
-            and IsComputeIntensiveGraph().is_graph_compute_intensive(mod[name])
-        ):
+        if not (is_valid_subgraph(mod[name].params, mod[name].body)):
             subgraphs_to_remove.append(name)
     # Create new pruned module
     new_mod = tvm.IRModule(mod.functions, mod.type_definitions)
@@ -960,6 +964,11 @@ class RemoveDropout(ExprMutator):
 
     def visit_tuple_getitem(self, op):
         visit = super().visit_tuple_getitem(op)
+        if visit.index != 0:
+            return visit
+        # if isinstance(visit.tuple_value, Call):
+        #     print("Name of VISIT OP", str(visit.tuple_value.op))
+        #     print(" IS IT DROPOUT", str(visit.tuple_value.op) == "nn.dropout")
         if (
             isinstance(visit.tuple_value, Call)
             and visit.tuple_value.op.name == "nn.dropout"
diff --git a/src/relay/backend/utils.h b/src/relay/backend/utils.h
index 4426642e8e18..3ce308cbbe8b 100644
--- a/src/relay/backend/utils.h
+++ b/src/relay/backend/utils.h
@@ -160,8 +160,11 @@ inline std::vector<int64_t> GetIntShape(const Array<IndexExpr>& shape) {
   std::vector<int64_t> ret;
   for (const auto& dim : shape) {
     const int64_t* pval = tir::as_const_int(dim);
-    ICHECK(pval) << "Expect integer, but received: " << dim->GetTypeKey();
-    ret.push_back(*pval);
+    ret.push_back(pval ? *pval : -2);
+    // ICHECK(pval) << "Expect integer, but received: " << dim->GetTypeKey();
+    // _shape.push_back(val ? val->value : -1);
+
+    // ret.push_back(*pval);
   }
   return ret;
 }
diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
index 445010321668..0dc8a342d715 100644
--- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
+++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
@@ -41,6 +41,13 @@ namespace tvm {
 namespace runtime {
 namespace contrib {
 
+struct PairHash {
+  template <class T1, class T2>
+  std::size_t operator()(const std::pair<T1, T2>& pair) const {
+    return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
+  }
+};
+
 using namespace tvm::runtime::json;
 
 class TensorRTRuntime : public JSONRuntimeBase {
@@ -105,12 +112,14 @@ class TensorRTRuntime : public JSONRuntimeBase {
   /*! \brief Run inference using built engine. */
   void Run() override {
     BuildEngine();
-    auto& engine_and_context = trt_engine_cache_.at(symbol_name_);
+    batch_size_ = data_entry_[input_var_eid_[0]]->shape[0];
+
+    // batch_size_ = GetBatchSize();
+    auto& engine_and_context = trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_));
     auto engine = engine_and_context.engine;
     auto context = engine_and_context.context;
     auto& device_buffers = engine_and_context.device_buffers;
     std::vector<void*> bindings(engine->getNbBindings(), nullptr);
-
     for (size_t i = 0; i < input_nodes_.size(); ++i) {
       auto nid = input_nodes_[i];
       if (nodes_[nid].GetOpType() == "input") {
@@ -169,10 +178,12 @@ class TensorRTRuntime : public JSONRuntimeBase {
    * do nothing.
    */
   void BuildEngine() {
-    if (trt_engine_cache_.count(symbol_name_)) return;
-    DLOG(INFO) << "Building new TensorRT engine for subgraph " << symbol_name_;
+    // batch_size_ = GetBatchSize();
+    batch_size_ = data_entry_[input_var_eid_[0]]->shape[0];
+    if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) return;
+    LOG(INFO) << "Building new TensorRT engine for subgraph " << symbol_name_ << " with batch size "
+              << batch_size_;
     const bool use_fp16 = dmlc::GetEnv("TVM_TENSORRT_USE_FP16", false);
-    batch_size_ = GetBatchSize();
     TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_,
                             use_fp16, batch_size_);
 
@@ -203,8 +214,9 @@ class TensorRTRuntime : public JSONRuntimeBase {
     }
 
     // Build engine.
-    trt_engine_cache_[symbol_name_] = builder.BuildEngine();
-    DLOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_;
+    trt_engine_cache_[std::make_pair(symbol_name_, batch_size_)] = builder.BuildEngine();
+    LOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_
+              << " with batch size" << batch_size_;
     CacheEngineToDisk();
   }
 
@@ -240,7 +252,9 @@ class TensorRTRuntime : public JSONRuntimeBase {
     helper.DeclareField("inputs", &engine_and_context.inputs);
     helper.DeclareField("outputs", &engine_and_context.outputs);
     helper.ReadAllFields(&reader);
-    trt_engine_cache_[symbol_name_] = engine_and_context;
+    // trt_engine_cache_[symbol_name_] = engine_and_context;
+    const int batch_size = 1;
+    trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context;
     return true;
   }
 
@@ -248,13 +262,17 @@ class TensorRTRuntime : public JSONRuntimeBase {
    * directory so it can be loaded later.
    */
   void CacheEngineToDisk() {
+    // batch_size_ = GetBatchSize();
+
+    batch_size_ = data_entry_[input_var_eid_[0]]->shape[0];
     std::string cache_dir = dmlc::GetEnv("TVM_TENSORRT_CACHE_DIR", std::string(""));
     if (cache_dir.empty()) return;
     std::string key = GetSubgraphKey();
     std::string path = cache_dir + "/" + key + ".plan";
     DLOG(INFO) << "Caching TensorRT engine to " << path;
     // Serialize engine to disk
-    nvinfer1::IHostMemory* serialized_engine = trt_engine_cache_[symbol_name_].engine->serialize();
+    nvinfer1::IHostMemory* serialized_engine =
+        trt_engine_cache_[std::make_pair(symbol_name_, batch_size_)].engine->serialize();
     SaveBinaryToFile(path, std::string(static_cast<const char*>(serialized_engine->data()),
                                        serialized_engine->size()));
     serialized_engine->destroy();
@@ -262,8 +280,10 @@ class TensorRTRuntime : public JSONRuntimeBase {
     std::ostringstream os;
     dmlc::JSONWriter writer(&os);
     writer.BeginObject();
-    writer.WriteObjectKeyValue("inputs", trt_engine_cache_[symbol_name_].inputs);
-    writer.WriteObjectKeyValue("outputs", trt_engine_cache_[symbol_name_].outputs);
+    writer.WriteObjectKeyValue("inputs",
+                               trt_engine_cache_[std::make_pair(symbol_name_, batch_size_)].inputs);
+    writer.WriteObjectKeyValue(
+        "outputs", trt_engine_cache_[std::make_pair(symbol_name_, batch_size_)].outputs);
     writer.EndObject();
     std::string meta_path = cache_dir + "/" + key + ".meta";
     SaveBinaryToFile(meta_path, os.str());
@@ -283,6 +303,8 @@ class TensorRTRuntime : public JSONRuntimeBase {
       auto nid = input_nodes_[i];
       if (nodes_[nid].GetOpType() == "input") {
         // Get batch size from first input.
+
+        LOG(INFO) << "Current Batch Size " << nodes_[nid].GetOpShape()[0][0];
         return nodes_[nid].GetOpShape()[0][0];
       }
     }
@@ -290,7 +312,8 @@ class TensorRTRuntime : public JSONRuntimeBase {
   }
 
   /*! \brief Map of function name to TRT engine if built already. */
-  std::unordered_map<std::string, TensorRTEngineAndContext> trt_engine_cache_;
+  std::unordered_map<std::pair<std::string, int>, TensorRTEngineAndContext, PairHash>
+      trt_engine_cache_;
 
   /*! \brief TensorRT logger. */
   TensorRTLogger logger_;

From 08352aba58c398060586650e68a16d027f7a3b43 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 07:09:03 +0000
Subject: [PATCH 06/34] Cleanup

---
 python/tvm/relay/op/contrib/tensorrt.py | 37 ++++++++++++-------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 50ddadc05897..4937fcf901ed 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -637,24 +637,23 @@ def reshape_annotate_fn(expr):  # pylint: disable=unused-variable
                 ):
                     return False
             return True
-        else:
-            shape = list(map(int, shape))
-            new_shape = list(map(int, new_shape))
-
-            # TRT cannot modify batch dimension.
-            original_volume = np.prod(shape)
-            # First, resolve 0.
-            for i, value in enumerate(new_shape):
-                if value == 0:
-                    new_shape[i] = shape[i]
-            # Resolve -1.
-            for i, value in enumerate(new_shape):
-                if value == -1:
-                    new_shape[i] = original_volume // np.prod([x for x in new_shape if x != -1])
-            # Remove batch dimension and see if volumes match
-            if shape[0] != new_shape[0]:
-                print("reshape: can't modify batch dimension.")
-                return False
+        shape = list(map(int, shape))
+        new_shape = list(map(int, new_shape))
+
+        # TRT cannot modify batch dimension.
+        original_volume = np.prod(shape)
+        # First, resolve 0.
+        for i, value in enumerate(new_shape):
+            if value == 0:
+                new_shape[i] = shape[i]
+        # Resolve -1.
+        for i, value in enumerate(new_shape):
+            if value == -1:
+                new_shape[i] = original_volume // np.prod([x for x in new_shape if x != -1])
+        # Remove batch dimension and see if volumes match
+        if shape[0] != new_shape[0]:
+            print("reshape: can't modify batch dimension.")
+            return False
     return True
 
 
@@ -949,7 +948,7 @@ def visit_call(self, call):
         name = subgraph.name_hint
         if not mod[name].attrs or mod[name].attrs["Compiler"] != "tensorrt":
             continue
-        if not (is_valid_subgraph(mod[name].params, mod[name].body)):
+        if not is_valid_subgraph(mod[name].params, mod[name].body):
             subgraphs_to_remove.append(name)
     # Create new pruned module
     new_mod = tvm.IRModule(mod.functions, mod.type_definitions)

From 8d11a492aab65a3c31b6a996dd0dfa0baedb61f9 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 07:29:32 +0000
Subject: [PATCH 07/34] Remove comments

---
 python/tvm/relay/op/contrib/tensorrt.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 4937fcf901ed..2e341fa09da4 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -896,9 +896,6 @@ def is_valid_subgraph(params, body):
         if len(input_batch_sizes) > 1 and len(set(input_batch_sizes)) != 1:
             logger.info("tensorrt: inputs have different batch sizes")
             return False
-
-        # and not IsComputeIntensiveGraph().is_compute_intensive(body) == 0
-    # relay.analysis.get_total_mac_number(body) == 0
     if (
         get_tensorrt_remove_no_mac_subgraphs()
         and not IsComputeIntensiveGraph().is_graph_compute_intensive(body)

From 4298de25d8ebbad4a1368a838a6dd3992d4765e9 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 07:30:44 +0000
Subject: [PATCH 08/34] Remove COmments

---
 src/relay/backend/utils.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/relay/backend/utils.h b/src/relay/backend/utils.h
index 3ce308cbbe8b..ccb8611b7a3c 100644
--- a/src/relay/backend/utils.h
+++ b/src/relay/backend/utils.h
@@ -160,11 +160,7 @@ inline std::vector<int64_t> GetIntShape(const Array<IndexExpr>& shape) {
   std::vector<int64_t> ret;
   for (const auto& dim : shape) {
     const int64_t* pval = tir::as_const_int(dim);
-    ret.push_back(pval ? *pval : -2);
-    // ICHECK(pval) << "Expect integer, but received: " << dim->GetTypeKey();
-    // _shape.push_back(val ? val->value : -1);
-
-    // ret.push_back(*pval);
+    ret.push_back(pval ? *pval : -1);
   }
   return ret;
 }

From 4d753223d23c44f0562874d0c6cc66b1353b5f9d Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 17:13:22 +0000
Subject: [PATCH 09/34] GetBatchSizeFix

---
 src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
index 0dc8a342d715..d6368d852fec 100644
--- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
+++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
@@ -113,8 +113,6 @@ class TensorRTRuntime : public JSONRuntimeBase {
   void Run() override {
     BuildEngine();
     batch_size_ = data_entry_[input_var_eid_[0]]->shape[0];
-
-    // batch_size_ = GetBatchSize();
     auto& engine_and_context = trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_));
     auto engine = engine_and_context.engine;
     auto context = engine_and_context.context;
@@ -178,7 +176,6 @@ class TensorRTRuntime : public JSONRuntimeBase {
    * do nothing.
    */
   void BuildEngine() {
-    // batch_size_ = GetBatchSize();
     batch_size_ = data_entry_[input_var_eid_[0]]->shape[0];
     if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) return;
     LOG(INFO) << "Building new TensorRT engine for subgraph " << symbol_name_ << " with batch size "
@@ -262,8 +259,6 @@ class TensorRTRuntime : public JSONRuntimeBase {
    * directory so it can be loaded later.
    */
   void CacheEngineToDisk() {
-    // batch_size_ = GetBatchSize();
-
     batch_size_ = data_entry_[input_var_eid_[0]]->shape[0];
     std::string cache_dir = dmlc::GetEnv("TVM_TENSORRT_CACHE_DIR", std::string(""));
     if (cache_dir.empty()) return;
@@ -303,8 +298,6 @@ class TensorRTRuntime : public JSONRuntimeBase {
       auto nid = input_nodes_[i];
       if (nodes_[nid].GetOpType() == "input") {
         // Get batch size from first input.
-
-        LOG(INFO) << "Current Batch Size " << nodes_[nid].GetOpShape()[0][0];
         return nodes_[nid].GetOpShape()[0][0];
       }
     }

From 671fca2d178ae07ddd9980f0176f4748fc154f0c Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 18:06:53 +0000
Subject: [PATCH 10/34] Fix Remove Droupout

---
 python/tvm/relay/op/contrib/tensorrt.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 2e341fa09da4..f318f9d974cd 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -959,12 +959,10 @@ class RemoveDropout(ExprMutator):
     """
 
     def visit_tuple_getitem(self, op):
+        print("HERE")
         visit = super().visit_tuple_getitem(op)
         if visit.index != 0:
             return visit
-        # if isinstance(visit.tuple_value, Call):
-        #     print("Name of VISIT OP", str(visit.tuple_value.op))
-        #     print(" IS IT DROPOUT", str(visit.tuple_value.op) == "nn.dropout")
         if (
             isinstance(visit.tuple_value, Call)
             and visit.tuple_value.op.name == "nn.dropout"

From a021c4e5902095be454d20ecb3a59040ec29870d Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 18:07:48 +0000
Subject: [PATCH 11/34] Fix Remove Droupout

---
 python/tvm/relay/op/contrib/tensorrt.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index f318f9d974cd..3fc3e5a0393f 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -959,7 +959,6 @@ class RemoveDropout(ExprMutator):
     """
 
     def visit_tuple_getitem(self, op):
-        print("HERE")
         visit = super().visit_tuple_getitem(op)
         if visit.index != 0:
             return visit

From 408c35f3343ba5034b325275141da0707b806b16 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 18:09:51 +0000
Subject: [PATCH 12/34] TRT Runtime

---
 src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
index d6368d852fec..b8d42a1b5671 100644
--- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
+++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
@@ -249,7 +249,6 @@ class TensorRTRuntime : public JSONRuntimeBase {
     helper.DeclareField("inputs", &engine_and_context.inputs);
     helper.DeclareField("outputs", &engine_and_context.outputs);
     helper.ReadAllFields(&reader);
-    // trt_engine_cache_[symbol_name_] = engine_and_context;
     const int batch_size = 1;
     trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context;
     return true;

From c70279c6b8e1debbff4b799fc7221652584783a5 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 21:04:38 +0000
Subject: [PATCH 13/34] Add MaskrCNN R50

---
 tests/python/frontend/pytorch/test_forward.py | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py
index 6250dfff811a..325db535cf41 100644
--- a/tests/python/frontend/pytorch/test_forward.py
+++ b/tests/python/frontend/pytorch/test_forward.py
@@ -29,7 +29,10 @@
 from tvm.contrib import graph_runtime
 from tvm.contrib.nvcc import have_fp16
 import tvm.testing
+from typing import Dict, Tuple, Union
 from packaging import version as package_version
+from tvm.contrib.download import download
+import cv2
 
 sys.setrecursionlimit(10000)
 
@@ -3363,6 +3366,77 @@ def test_fn(x, weights=None):
     verify_trace_model(test_fn, [inp, weights.to(torch.float64)], ["llvm"])
 
 
+def test_maskrcnn_resnet50():
+    def dict_to_tuple(
+        out_dict: Dict,
+    ) -> Union[
+        Tuple[torch.Tensor, torch.Tensor, torch.Tensor],
+        Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],
+    ]:
+        """
+        This function converts the dictionary output of maskrcnn to a tuple for downstream tracing
+        """
+        if "masks" in out_dict.keys():
+            return out_dict["boxes"], out_dict["scores"], out_dict["labels"], out_dict["masks"]
+        return out_dict["boxes"], out_dict["scores"], out_dict["labels"]
+
+    class TraceWrapper(torch.nn.Module):
+        """
+        This class is a wrapper over the torch module to convert the outputs into traceable form
+        """
+
+        def __init__(self, model: torch.nn.Module) -> None:
+            super().__init__()
+            self.model = model
+
+        def forward(self, inp: torch.Tensor) -> Union[
+        Tuple[torch.Tensor, torch.Tensor, torch.Tensor],
+        Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+            out = self.model(inp)
+            return dict_to_tuple(out[0])
+
+    def get_traced_maskrcnn_model(np_sample_input: np.ndarray) -> torch.jit.TopLevelTracedModule:
+        """
+        This function takes a sample input and returns the traced maskrcnn model
+        """
+        model_func = torchvision.models.detection.maskrcnn_resnet50_fpn
+        model = TraceWrapper(model_func(pretrained=True))
+        model.eval()
+        inp = torch.Tensor(np.random.uniform(0.0, 250.0, size=np_sample_input.shape))
+
+        with torch.no_grad():
+            out = model(inp)
+            script_module = torch.jit.trace(model, inp)
+            script_module.eval()
+
+        return script_module
+
+    def get_maskrcnn_input(in_size: int) -> np.ndarray:
+        """
+        This function gets a real image with multiple objects of interest and returns it.  
+        """
+        input_shape = (1, 3, in_size, in_size)
+        img_path = "test_street_small.jpg"
+        img_url = (
+            "https://raw.githubusercontent.com/dmlc/web-data/"
+            "master/gluoncv/detection/street_small.jpg"
+        )
+        download(img_url, img_path)
+
+        img = cv2.imread(img_path).astype("float32")
+        img = cv2.resize(img, (in_size, in_size))
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = np.transpose(img / 255.0, [2, 0, 1])
+        img = np.expand_dims(img, axis=0)
+
+        return img
+
+    in_size = 300
+    np_sample_input = get_maskrcnn_input(in_size)
+    script_module = get_traced_maskrcnn_model(np_sample_input)
+        # vm_trt_exec = convert_scripted_model_to_vm_trt(script_module, np_sample_input, target)
+
+
 if __name__ == "__main__":
     # some structural tests
     test_forward_traced_function()

From e38edb095aae2b86eaf439b0f7ca186a0a8ee80d Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 21:18:39 +0000
Subject: [PATCH 14/34] New Testing code

---
 tests/python/frontend/pytorch/test_forward.py | 30 +++++++++++++++----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py
index 325db535cf41..8e34bde12eeb 100644
--- a/tests/python/frontend/pytorch/test_forward.py
+++ b/tests/python/frontend/pytorch/test_forward.py
@@ -3365,8 +3365,28 @@ def test_fn(x, weights=None):
     verify_trace_model(test_fn, [inp, weights], ["llvm"])
     verify_trace_model(test_fn, [inp, weights.to(torch.float64)], ["llvm"])
 
+def convert_traced_model_to_vm_trt(traced_module: torch.jit.TopLevelTracedModule, 
+                                    np_sample_input: np.ndarray, 
+                                    target: str) -> tvm.runtime.vm.Executable:
+    """
+    This function converts a traced pytorch model to VM + TRT.
+    """
+    input_shape = np_sample_input.shape
+    input_name = "input0"
+    shape_list = [(input_name, input_shape)]
+    mod, params = relay.frontend.from_pytorch(traced_module, shape_list)
+    mod, config = tensorrt.partition_for_tensorrt(mod, params, remove_no_mac_subgraphs=True)
+    with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]):
+        vm_trt_exec = relay.vm.compile(mod, target=target, params=params)
+
+    return vm_trt_exec
 
 def test_maskrcnn_resnet50():
+    """
+    This function tests the working of pytorch maskrcnn with resnet50 as backbone with 
+    VM and VM + TRT. Since the order of compiled model outputs is a bit different from 
+    original pytorch model, it uses a custom logic for comparison check. 
+    """
     def dict_to_tuple(
         out_dict: Dict,
     ) -> Union[
@@ -3406,10 +3426,10 @@ def get_traced_maskrcnn_model(np_sample_input: np.ndarray) -> torch.jit.TopLevel
 
         with torch.no_grad():
             out = model(inp)
-            script_module = torch.jit.trace(model, inp)
-            script_module.eval()
+            traced_module = torch.jit.trace(model, inp)
+            traced_module.eval()
 
-        return script_module
+        return traced_module
 
     def get_maskrcnn_input(in_size: int) -> np.ndarray:
         """
@@ -3433,8 +3453,8 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray:
 
     in_size = 300
     np_sample_input = get_maskrcnn_input(in_size)
-    script_module = get_traced_maskrcnn_model(np_sample_input)
-        # vm_trt_exec = convert_scripted_model_to_vm_trt(script_module, np_sample_input, target)
+    traced_module = get_traced_maskrcnn_model(np_sample_input)
+    vm_trt_exec = convert_traced_model_to_vm_trt(traced_module, np_sample_input, target)
 
 
 if __name__ == "__main__":

From 1549502dc67349831a8ee6a156c7395845992ae8 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 21:21:42 +0000
Subject: [PATCH 15/34] Fix black

---
 tests/python/frontend/pytorch/test_forward.py | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py
index 8e34bde12eeb..a44d97b4fa4a 100644
--- a/tests/python/frontend/pytorch/test_forward.py
+++ b/tests/python/frontend/pytorch/test_forward.py
@@ -3365,9 +3365,10 @@ def test_fn(x, weights=None):
     verify_trace_model(test_fn, [inp, weights], ["llvm"])
     verify_trace_model(test_fn, [inp, weights.to(torch.float64)], ["llvm"])
 
-def convert_traced_model_to_vm_trt(traced_module: torch.jit.TopLevelTracedModule, 
-                                    np_sample_input: np.ndarray, 
-                                    target: str) -> tvm.runtime.vm.Executable:
+
+def convert_traced_model_to_vm_trt(
+    traced_module: torch.jit.TopLevelTracedModule, np_sample_input: np.ndarray, target: str
+) -> tvm.runtime.vm.Executable:
     """
     This function converts a traced pytorch model to VM + TRT.
     """
@@ -3381,12 +3382,14 @@ def convert_traced_model_to_vm_trt(traced_module: torch.jit.TopLevelTracedModule
 
     return vm_trt_exec
 
+
 def test_maskrcnn_resnet50():
     """
-    This function tests the working of pytorch maskrcnn with resnet50 as backbone with 
-    VM and VM + TRT. Since the order of compiled model outputs is a bit different from 
-    original pytorch model, it uses a custom logic for comparison check. 
+    This function tests the working of pytorch maskrcnn with resnet50 as backbone with
+    VM and VM + TRT. Since the order of compiled model outputs is a bit different from
+    original pytorch model, it uses a custom logic for comparison check.
     """
+
     def dict_to_tuple(
         out_dict: Dict,
     ) -> Union[
@@ -3409,9 +3412,12 @@ def __init__(self, model: torch.nn.Module) -> None:
             super().__init__()
             self.model = model
 
-        def forward(self, inp: torch.Tensor) -> Union[
-        Tuple[torch.Tensor, torch.Tensor, torch.Tensor],
-        Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        def forward(
+            self, inp: torch.Tensor
+        ) -> Union[
+            Tuple[torch.Tensor, torch.Tensor, torch.Tensor],
+            Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],
+        ]:
             out = self.model(inp)
             return dict_to_tuple(out[0])
 
@@ -3433,7 +3439,7 @@ def get_traced_maskrcnn_model(np_sample_input: np.ndarray) -> torch.jit.TopLevel
 
     def get_maskrcnn_input(in_size: int) -> np.ndarray:
         """
-        This function gets a real image with multiple objects of interest and returns it.  
+        This function gets a real image with multiple objects of interest and returns it.
         """
         input_shape = (1, 3, in_size, in_size)
         img_path = "test_street_small.jpg"

From 5528c5f4fed8848d5cd5caad5c27abd71b5d4b39 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 22:39:11 +0000
Subject: [PATCH 16/34] Test Maskrcnn r50 done

---
 tests/python/frontend/pytorch/test_forward.py | 383 +++++++++---------
 1 file changed, 199 insertions(+), 184 deletions(-)

diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py
index a44d97b4fa4a..e612ff75d674 100644
--- a/tests/python/frontend/pytorch/test_forward.py
+++ b/tests/python/frontend/pytorch/test_forward.py
@@ -33,6 +33,7 @@
 from packaging import version as package_version
 from tvm.contrib.download import download
 import cv2
+from tvm.relay.op.contrib import tensorrt
 
 sys.setrecursionlimit(10000)
 
@@ -3383,26 +3384,13 @@ def convert_traced_model_to_vm_trt(
     return vm_trt_exec
 
 
-def test_maskrcnn_resnet50():
+def test_maskrcnn_resnet50() -> None:
     """
     This function tests the working of pytorch maskrcnn with resnet50 as backbone with
     VM and VM + TRT. Since the order of compiled model outputs is a bit different from
     original pytorch model, it uses a custom logic for comparison check.
     """
 
-    def dict_to_tuple(
-        out_dict: Dict,
-    ) -> Union[
-        Tuple[torch.Tensor, torch.Tensor, torch.Tensor],
-        Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],
-    ]:
-        """
-        This function converts the dictionary output of maskrcnn to a tuple for downstream tracing
-        """
-        if "masks" in out_dict.keys():
-            return out_dict["boxes"], out_dict["scores"], out_dict["labels"], out_dict["masks"]
-        return out_dict["boxes"], out_dict["scores"], out_dict["labels"]
-
     class TraceWrapper(torch.nn.Module):
         """
         This class is a wrapper over the torch module to convert the outputs into traceable form
@@ -3414,12 +3402,9 @@ def __init__(self, model: torch.nn.Module) -> None:
 
         def forward(
             self, inp: torch.Tensor
-        ) -> Union[
-            Tuple[torch.Tensor, torch.Tensor, torch.Tensor],
-            Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],
-        ]:
+        ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
             out = self.model(inp)
-            return dict_to_tuple(out[0])
+            return out[0]["boxes"], out[0]["scores"], out[0]["labels"], out[0]["masks"]
 
     def get_traced_maskrcnn_model(np_sample_input: np.ndarray) -> torch.jit.TopLevelTracedModule:
         """
@@ -3460,171 +3445,201 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray:
     in_size = 300
     np_sample_input = get_maskrcnn_input(in_size)
     traced_module = get_traced_maskrcnn_model(np_sample_input)
-    vm_trt_exec = convert_traced_model_to_vm_trt(traced_module, np_sample_input, target)
+    vm_trt_exec = convert_traced_model_to_vm_trt(traced_module, np_sample_input, target="llvm")
+    ctx = tvm.cpu()
+    vm = tvm.runtime.vm.VirtualMachine(vm_trt_exec, ctx)
+    vm.set_input("main", **{"input0": np_sample_input})
+    tvm_res = vm.run()
+
+    # Descending sort by scores and get the high confidence indices. In this example 9 is chosen,
+    # because this image has 9 boxes over 0.9 confidence
+    num_high_confidence_boxes = 9
+    tvm_indices = np.argsort(-1 * tvm_res[1].asnumpy())[:num_high_confidence_boxes]
+
+    with torch.no_grad():
+        out = traced_module(torch.Tensor(np_sample_input))
+        # Descending sort by scores and get the high confidence indices
+        pt_indices = np.argsort(-1 * out[1].numpy())[:num_high_confidence_boxes]
+
+    tol = [1e-1, 5e-3, 1e-5, 4e-1]  # [Box Tol, Score Tol, Label Tol, Mask Tol]
+    # Because of certain ops, there are certain minor differences in TVM outputs and PT outputs,
+    # This means that the tolerance can't be 1e-4 or 1e-5 throughout. The ideal way to get around
+    # this is to test it on an entire dataset and compare mAP with the original model.
+    # However, since that is not practically possible on CI, the following compromise is made.
+    # These tolerances are chosen based on their impact or lack thereof to the mAP score, e.g:
+    # 0.1 pixel difference of a box in a 300X300 image wont make any change.
+    for i, tol_val in zip(range(4), tol):
+        np.testing.assert_allclose(
+            tvm_res[i].asnumpy()[tvm_indices],
+            out[i].numpy()[pt_indices],
+            rtol=tol_val,
+            atol=tol_val,
+        )
 
 
 if __name__ == "__main__":
-    # some structural tests
-    test_forward_traced_function()
-    test_forward_dtypes()
-    test_weight_names()
-    test_duplicate_weight_use()
-
-    # Single operator tests
-    test_forward_pixel_shuffle()
-    test_forward_add()
-    test_forward_subtract()
-    test_forward_multiply()
-    test_forward_matmul()
-    test_forward_rsub()
-    test_forward_onehot()
-    test_forward_embedding()
-    test_forward_reshape()
-    test_forward_reciprocal()
-    test_forward_repeat()
-    test_forward_repeat_interleave()
-    test_forward_squeeze()
-    test_forward_unsqueeze()
-    test_forward_concatenate()
-    test_forward_reduce_sum()
-    test_forward_reduce_prod()
-    test_forward_argmin()
-    test_forward_argmax()
-    test_forward_norm()
-    test_forward_frobenius_norm()
-    test_forward_std()
-    test_forward_variance()
-    test_forward_relu()
-    test_forward_prelu()
-    test_forward_leakyrelu()
-    test_forward_elu()
-    test_forward_celu()
-    test_forward_gelu()
-    test_forward_selu()
-    test_forward_log_sigmoid()
-    test_forward_adaptiveavgpool()
-    test_forward_maxpool2d()
-    test_forward_maxpool1d()
-    test_forward_maxpool3d()
-    test_forward_hardtanh()
-    test_forward_conv()
-    test_forward_conv_transpose()
-    test_forward_threshold()
-    test_forward_contiguous()
-    test_forward_batchnorm()
-    test_forward_instancenorm()
-    test_forward_layernorm()
-    test_forward_groupnorm()
-    test_forward_transpose()
-    test_forward_size()
-    test_forward_view()
-    test_forward_select()
-    test_forward_take()
-    test_forward_topk()
-    test_forward_where()
-    test_forward_addcdiv()
-    test_forward_addcmul()
-    test_forward_true_divide()
-    test_forward_clone()
-    test_forward_softplus()
-    test_forward_softsign()
-    test_forward_logsoftmax()
-    test_forward_sigmoid()
-    test_forward_dense()
-    test_forward_avgpool()
-    test_forward_avgpool3d()
-    test_forward_dropout()
-    test_forward_slice()
-    test_forward_mean()
-    test_forward_expand()
-    test_forward_pow()
-    test_forward_unary()
-    test_forward_clamp()
-    test_forward_clamp_()
-    test_forward_logical_not()
-    test_forward_bitwise_not()
-    test_forward_bitwise_xor()
-    test_forward_logical_xor()
-    test_forward_isfinite()
-    test_forward_isnan()
-    test_forward_isinf()
-    test_forward_ones()
-    test_forward_ones_like()
-    test_forward_zeros()
-    test_forward_zeros_like()
-    test_forward_full()
-    test_forward_full_like()
-    test_forward_linspace()
-    test_forward_arange()
-    test_forward_mesh_grid()
-    test_forward_chunk()
-    test_forward_split()
-    test_forward_gather()
-    test_upsample()
-    test_forward_upsample3d()
-    test_forward_nms()
-    test_forward_roi_align()
-    test_to()
-    test_flatten()
-    test_type_as()
-    test_forward_functional_pad()
-    test_forward_zero_pad2d()
-    test_forward_constant_pad1d()
-    test_forward_constant_pad2d()
-    test_forward_constant_pad3d()
-    test_forward_reflection_pad1d()
-    test_forward_reflection_pad2d()
-    test_forward_replication_pad1d()
-    test_forward_replication_pad2d()
-    test_forward_replication_pad3d()
-    test_adaptive_pool3d()
-    test_conv3d()
-    test_conv3d_transpose()
-    test_forward_index()
-    test_min_max()
-    test_logsumexp()
-    test_stack()
-    test_stack_dynamic()
-    test_forward_unbind()
-    test_forward_nonzero()
-    test_forward_scatter()
-    test_numel()
-    test_bincount()
-
-    # Model tests
-    test_resnet18()
-    test_squeezenet1_0()
-    test_squeezenet1_1()
-    test_densenet121()
-    # disable inception test for now, since loading it takes ~5min on torchvision-0.5 due to scipy bug
-    # See https://discuss.pytorch.org/t/torchvisions-inception-v3-takes-much-longer-to-load-than-other-models/68756
-    # test_inception_v3()
-    test_googlenet()
-    test_mnasnet0_5()
-    test_mobilenet_v2()
-
-    test_custom_conversion_map()
-
-    test_segmentaton_models()
-    test_3d_models()
-
-    # Quantization test
-    from qnn_test import test_quantized_imagenet, test_quantized_modules
-
-    test_quantized_modules()
-    test_quantized_imagenet()
-
-    # Test simple conditionals and loop
-    test_control_flow()
-    test_simple_rnn()
-
-    # More complex recurrent models
-    from test_lstm import test_custom_lstm
-
-    test_custom_lstm()
-
-    # Test bert model
-    test_forward_pretrained_bert_base_uncased()
-
-    # Test convert torch script(jit) with specific inputs' types
-    test_convert_torch_script_with_input_types()
+    test_maskrcnn_resnet50()
+    # # some structural tests
+    # test_forward_traced_function()
+    # test_forward_dtypes()
+    # test_weight_names()
+    # test_duplicate_weight_use()
+
+    # # Single operator tests
+    # test_forward_pixel_shuffle()
+    # test_forward_add()
+    # test_forward_subtract()
+    # test_forward_multiply()
+    # test_forward_matmul()
+    # test_forward_rsub()
+    # test_forward_onehot()
+    # test_forward_embedding()
+    # test_forward_reshape()
+    # test_forward_reciprocal()
+    # test_forward_repeat()
+    # test_forward_repeat_interleave()
+    # test_forward_squeeze()
+    # test_forward_unsqueeze()
+    # test_forward_concatenate()
+    # test_forward_reduce_sum()
+    # test_forward_reduce_prod()
+    # test_forward_argmin()
+    # test_forward_argmax()
+    # test_forward_norm()
+    # test_forward_frobenius_norm()
+    # test_forward_std()
+    # test_forward_variance()
+    # test_forward_relu()
+    # test_forward_prelu()
+    # test_forward_leakyrelu()
+    # test_forward_elu()
+    # test_forward_celu()
+    # test_forward_gelu()
+    # test_forward_selu()
+    # test_forward_log_sigmoid()
+    # test_forward_adaptiveavgpool()
+    # test_forward_maxpool2d()
+    # test_forward_maxpool1d()
+    # test_forward_maxpool3d()
+    # test_forward_hardtanh()
+    # test_forward_conv()
+    # test_forward_conv_transpose()
+    # test_forward_threshold()
+    # test_forward_contiguous()
+    # test_forward_batchnorm()
+    # test_forward_instancenorm()
+    # test_forward_layernorm()
+    # test_forward_groupnorm()
+    # test_forward_transpose()
+    # test_forward_size()
+    # test_forward_view()
+    # test_forward_select()
+    # test_forward_take()
+    # test_forward_topk()
+    # test_forward_where()
+    # test_forward_addcdiv()
+    # test_forward_addcmul()
+    # test_forward_true_divide()
+    # test_forward_clone()
+    # test_forward_softplus()
+    # test_forward_softsign()
+    # test_forward_logsoftmax()
+    # test_forward_sigmoid()
+    # test_forward_dense()
+    # test_forward_avgpool()
+    # test_forward_avgpool3d()
+    # test_forward_dropout()
+    # test_forward_slice()
+    # test_forward_mean()
+    # test_forward_expand()
+    # test_forward_pow()
+    # test_forward_unary()
+    # test_forward_clamp()
+    # test_forward_clamp_()
+    # test_forward_logical_not()
+    # test_forward_bitwise_not()
+    # test_forward_bitwise_xor()
+    # test_forward_logical_xor()
+    # test_forward_isfinite()
+    # test_forward_isnan()
+    # test_forward_isinf()
+    # test_forward_ones()
+    # test_forward_ones_like()
+    # test_forward_zeros()
+    # test_forward_zeros_like()
+    # test_forward_full()
+    # test_forward_full_like()
+    # test_forward_linspace()
+    # test_forward_arange()
+    # test_forward_mesh_grid()
+    # test_forward_chunk()
+    # test_forward_split()
+    # test_forward_gather()
+    # test_upsample()
+    # test_forward_upsample3d()
+    # test_forward_nms()
+    # test_forward_roi_align()
+    # test_to()
+    # test_flatten()
+    # test_type_as()
+    # test_forward_functional_pad()
+    # test_forward_zero_pad2d()
+    # test_forward_constant_pad1d()
+    # test_forward_constant_pad2d()
+    # test_forward_constant_pad3d()
+    # test_forward_reflection_pad1d()
+    # test_forward_reflection_pad2d()
+    # test_forward_replication_pad1d()
+    # test_forward_replication_pad2d()
+    # test_forward_replication_pad3d()
+    # test_adaptive_pool3d()
+    # test_conv3d()
+    # test_conv3d_transpose()
+    # test_forward_index()
+    # test_min_max()
+    # test_logsumexp()
+    # test_stack()
+    # test_stack_dynamic()
+    # test_forward_unbind()
+    # test_forward_nonzero()
+    # test_forward_scatter()
+    # test_numel()
+    # test_bincount()
+
+    # # Model tests
+    # test_resnet18()
+    # test_squeezenet1_0()
+    # test_squeezenet1_1()
+    # test_densenet121()
+    # # disable inception test for now, since loading it takes ~5min on torchvision-0.5 due to scipy bug
+    # # See https://discuss.pytorch.org/t/torchvisions-inception-v3-takes-much-longer-to-load-than-other-models/68756
+    # # test_inception_v3()
+    # test_googlenet()
+    # test_mnasnet0_5()
+    # test_mobilenet_v2()
+
+    # test_custom_conversion_map()
+
+    # test_segmentaton_models()
+    # test_3d_models()
+
+    # # Quantization test
+    # from qnn_test import test_quantized_imagenet, test_quantized_modules
+
+    # test_quantized_modules()
+    # test_quantized_imagenet()
+
+    # # Test simple conditionals and loop
+    # test_control_flow()
+    # test_simple_rnn()
+
+    # # More complex recurrent models
+    # from test_lstm import test_custom_lstm
+
+    # test_custom_lstm()
+
+    # # Test bert model
+    # test_forward_pretrained_bert_base_uncased()
+
+    # # Test convert torch script(jit) with specific inputs' types
+    # test_convert_torch_script_with_input_types()

From d9366be78062839c068f0aa24f524760018e603d Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 22:41:10 +0000
Subject: [PATCH 17/34] Test MR50

---
 tests/python/frontend/pytorch/test_forward.py | 328 +++++++++---------
 1 file changed, 164 insertions(+), 164 deletions(-)

diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py
index e612ff75d674..404614db71b9 100644
--- a/tests/python/frontend/pytorch/test_forward.py
+++ b/tests/python/frontend/pytorch/test_forward.py
@@ -3478,168 +3478,168 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray:
 
 
 if __name__ == "__main__":
+    # some structural tests
+    test_forward_traced_function()
+    test_forward_dtypes()
+    test_weight_names()
+    test_duplicate_weight_use()
+
+    # Single operator tests
+    test_forward_pixel_shuffle()
+    test_forward_add()
+    test_forward_subtract()
+    test_forward_multiply()
+    test_forward_matmul()
+    test_forward_rsub()
+    test_forward_onehot()
+    test_forward_embedding()
+    test_forward_reshape()
+    test_forward_reciprocal()
+    test_forward_repeat()
+    test_forward_repeat_interleave()
+    test_forward_squeeze()
+    test_forward_unsqueeze()
+    test_forward_concatenate()
+    test_forward_reduce_sum()
+    test_forward_reduce_prod()
+    test_forward_argmin()
+    test_forward_argmax()
+    test_forward_norm()
+    test_forward_frobenius_norm()
+    test_forward_std()
+    test_forward_variance()
+    test_forward_relu()
+    test_forward_prelu()
+    test_forward_leakyrelu()
+    test_forward_elu()
+    test_forward_celu()
+    test_forward_gelu()
+    test_forward_selu()
+    test_forward_log_sigmoid()
+    test_forward_adaptiveavgpool()
+    test_forward_maxpool2d()
+    test_forward_maxpool1d()
+    test_forward_maxpool3d()
+    test_forward_hardtanh()
+    test_forward_conv()
+    test_forward_conv_transpose()
+    test_forward_threshold()
+    test_forward_contiguous()
+    test_forward_batchnorm()
+    test_forward_instancenorm()
+    test_forward_layernorm()
+    test_forward_groupnorm()
+    test_forward_transpose()
+    test_forward_size()
+    test_forward_view()
+    test_forward_select()
+    test_forward_take()
+    test_forward_topk()
+    test_forward_where()
+    test_forward_addcdiv()
+    test_forward_addcmul()
+    test_forward_true_divide()
+    test_forward_clone()
+    test_forward_softplus()
+    test_forward_softsign()
+    test_forward_logsoftmax()
+    test_forward_sigmoid()
+    test_forward_dense()
+    test_forward_avgpool()
+    test_forward_avgpool3d()
+    test_forward_dropout()
+    test_forward_slice()
+    test_forward_mean()
+    test_forward_expand()
+    test_forward_pow()
+    test_forward_unary()
+    test_forward_clamp()
+    test_forward_clamp_()
+    test_forward_logical_not()
+    test_forward_bitwise_not()
+    test_forward_bitwise_xor()
+    test_forward_logical_xor()
+    test_forward_isfinite()
+    test_forward_isnan()
+    test_forward_isinf()
+    test_forward_ones()
+    test_forward_ones_like()
+    test_forward_zeros()
+    test_forward_zeros_like()
+    test_forward_full()
+    test_forward_full_like()
+    test_forward_linspace()
+    test_forward_arange()
+    test_forward_mesh_grid()
+    test_forward_chunk()
+    test_forward_split()
+    test_forward_gather()
+    test_upsample()
+    test_forward_upsample3d()
+    test_forward_nms()
+    test_forward_roi_align()
+    test_to()
+    test_flatten()
+    test_type_as()
+    test_forward_functional_pad()
+    test_forward_zero_pad2d()
+    test_forward_constant_pad1d()
+    test_forward_constant_pad2d()
+    test_forward_constant_pad3d()
+    test_forward_reflection_pad1d()
+    test_forward_reflection_pad2d()
+    test_forward_replication_pad1d()
+    test_forward_replication_pad2d()
+    test_forward_replication_pad3d()
+    test_adaptive_pool3d()
+    test_conv3d()
+    test_conv3d_transpose()
+    test_forward_index()
+    test_min_max()
+    test_logsumexp()
+    test_stack()
+    test_stack_dynamic()
+    test_forward_unbind()
+    test_forward_nonzero()
+    test_forward_scatter()
+    test_numel()
+    test_bincount()
+
+    # Model tests
+    test_resnet18()
+    test_squeezenet1_0()
+    test_squeezenet1_1()
+    test_densenet121()
+    # disable inception test for now, since loading it takes ~5min on torchvision-0.5 due to scipy bug
+    # See https://discuss.pytorch.org/t/torchvisions-inception-v3-takes-much-longer-to-load-than-other-models/68756
+    # test_inception_v3()
+    test_googlenet()
+    test_mnasnet0_5()
+    test_mobilenet_v2()
+
+    test_custom_conversion_map()
+
+    test_segmentaton_models()
+    test_3d_models()
     test_maskrcnn_resnet50()
-    # # some structural tests
-    # test_forward_traced_function()
-    # test_forward_dtypes()
-    # test_weight_names()
-    # test_duplicate_weight_use()
-
-    # # Single operator tests
-    # test_forward_pixel_shuffle()
-    # test_forward_add()
-    # test_forward_subtract()
-    # test_forward_multiply()
-    # test_forward_matmul()
-    # test_forward_rsub()
-    # test_forward_onehot()
-    # test_forward_embedding()
-    # test_forward_reshape()
-    # test_forward_reciprocal()
-    # test_forward_repeat()
-    # test_forward_repeat_interleave()
-    # test_forward_squeeze()
-    # test_forward_unsqueeze()
-    # test_forward_concatenate()
-    # test_forward_reduce_sum()
-    # test_forward_reduce_prod()
-    # test_forward_argmin()
-    # test_forward_argmax()
-    # test_forward_norm()
-    # test_forward_frobenius_norm()
-    # test_forward_std()
-    # test_forward_variance()
-    # test_forward_relu()
-    # test_forward_prelu()
-    # test_forward_leakyrelu()
-    # test_forward_elu()
-    # test_forward_celu()
-    # test_forward_gelu()
-    # test_forward_selu()
-    # test_forward_log_sigmoid()
-    # test_forward_adaptiveavgpool()
-    # test_forward_maxpool2d()
-    # test_forward_maxpool1d()
-    # test_forward_maxpool3d()
-    # test_forward_hardtanh()
-    # test_forward_conv()
-    # test_forward_conv_transpose()
-    # test_forward_threshold()
-    # test_forward_contiguous()
-    # test_forward_batchnorm()
-    # test_forward_instancenorm()
-    # test_forward_layernorm()
-    # test_forward_groupnorm()
-    # test_forward_transpose()
-    # test_forward_size()
-    # test_forward_view()
-    # test_forward_select()
-    # test_forward_take()
-    # test_forward_topk()
-    # test_forward_where()
-    # test_forward_addcdiv()
-    # test_forward_addcmul()
-    # test_forward_true_divide()
-    # test_forward_clone()
-    # test_forward_softplus()
-    # test_forward_softsign()
-    # test_forward_logsoftmax()
-    # test_forward_sigmoid()
-    # test_forward_dense()
-    # test_forward_avgpool()
-    # test_forward_avgpool3d()
-    # test_forward_dropout()
-    # test_forward_slice()
-    # test_forward_mean()
-    # test_forward_expand()
-    # test_forward_pow()
-    # test_forward_unary()
-    # test_forward_clamp()
-    # test_forward_clamp_()
-    # test_forward_logical_not()
-    # test_forward_bitwise_not()
-    # test_forward_bitwise_xor()
-    # test_forward_logical_xor()
-    # test_forward_isfinite()
-    # test_forward_isnan()
-    # test_forward_isinf()
-    # test_forward_ones()
-    # test_forward_ones_like()
-    # test_forward_zeros()
-    # test_forward_zeros_like()
-    # test_forward_full()
-    # test_forward_full_like()
-    # test_forward_linspace()
-    # test_forward_arange()
-    # test_forward_mesh_grid()
-    # test_forward_chunk()
-    # test_forward_split()
-    # test_forward_gather()
-    # test_upsample()
-    # test_forward_upsample3d()
-    # test_forward_nms()
-    # test_forward_roi_align()
-    # test_to()
-    # test_flatten()
-    # test_type_as()
-    # test_forward_functional_pad()
-    # test_forward_zero_pad2d()
-    # test_forward_constant_pad1d()
-    # test_forward_constant_pad2d()
-    # test_forward_constant_pad3d()
-    # test_forward_reflection_pad1d()
-    # test_forward_reflection_pad2d()
-    # test_forward_replication_pad1d()
-    # test_forward_replication_pad2d()
-    # test_forward_replication_pad3d()
-    # test_adaptive_pool3d()
-    # test_conv3d()
-    # test_conv3d_transpose()
-    # test_forward_index()
-    # test_min_max()
-    # test_logsumexp()
-    # test_stack()
-    # test_stack_dynamic()
-    # test_forward_unbind()
-    # test_forward_nonzero()
-    # test_forward_scatter()
-    # test_numel()
-    # test_bincount()
-
-    # # Model tests
-    # test_resnet18()
-    # test_squeezenet1_0()
-    # test_squeezenet1_1()
-    # test_densenet121()
-    # # disable inception test for now, since loading it takes ~5min on torchvision-0.5 due to scipy bug
-    # # See https://discuss.pytorch.org/t/torchvisions-inception-v3-takes-much-longer-to-load-than-other-models/68756
-    # # test_inception_v3()
-    # test_googlenet()
-    # test_mnasnet0_5()
-    # test_mobilenet_v2()
-
-    # test_custom_conversion_map()
-
-    # test_segmentaton_models()
-    # test_3d_models()
-
-    # # Quantization test
-    # from qnn_test import test_quantized_imagenet, test_quantized_modules
-
-    # test_quantized_modules()
-    # test_quantized_imagenet()
-
-    # # Test simple conditionals and loop
-    # test_control_flow()
-    # test_simple_rnn()
-
-    # # More complex recurrent models
-    # from test_lstm import test_custom_lstm
-
-    # test_custom_lstm()
-
-    # # Test bert model
-    # test_forward_pretrained_bert_base_uncased()
-
-    # # Test convert torch script(jit) with specific inputs' types
-    # test_convert_torch_script_with_input_types()
+
+    # Quantization test
+    from qnn_test import test_quantized_imagenet, test_quantized_modules
+
+    test_quantized_modules()
+    test_quantized_imagenet()
+
+    # Test simple conditionals and loop
+    test_control_flow()
+    test_simple_rnn()
+
+    # More complex recurrent models
+    from test_lstm import test_custom_lstm
+
+    test_custom_lstm()
+
+    # Test bert model
+    test_forward_pretrained_bert_base_uncased()
+
+    # Test convert torch script(jit) with specific inputs' types
+    test_convert_torch_script_with_input_types()

From 13be588c7b0bc602f1ec78f05dbb0c503dddac5e Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 22:54:22 +0000
Subject: [PATCH 18/34] Space typo

---
 src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
index b8d42a1b5671..9654d699217b 100644
--- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
+++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
@@ -213,7 +213,7 @@ class TensorRTRuntime : public JSONRuntimeBase {
     // Build engine.
     trt_engine_cache_[std::make_pair(symbol_name_, batch_size_)] = builder.BuildEngine();
     LOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_
-              << " with batch size" << batch_size_;
+              << " with batch size " << batch_size_;
     CacheEngineToDisk();
   }
 

From 87c92f8deb230166079b0a26f7779da5a7cd186e Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Tue, 24 Nov 2020 22:56:59 +0000
Subject: [PATCH 19/34] Change Log to Dlog

---
 src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
index 9654d699217b..805e7e1bc7c3 100644
--- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
+++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
@@ -178,8 +178,8 @@ class TensorRTRuntime : public JSONRuntimeBase {
   void BuildEngine() {
     batch_size_ = data_entry_[input_var_eid_[0]]->shape[0];
     if (trt_engine_cache_.count(std::make_pair(symbol_name_, batch_size_))) return;
-    LOG(INFO) << "Building new TensorRT engine for subgraph " << symbol_name_ << " with batch size "
-              << batch_size_;
+    DLOG(INFO) << "Building new TensorRT engine for subgraph " << symbol_name_
+               << " with batch size " << batch_size_;
     const bool use_fp16 = dmlc::GetEnv("TVM_TENSORRT_USE_FP16", false);
     TensorRTBuilder builder(&logger_, data_entry_, max_workspace_size_, use_implicit_batch_,
                             use_fp16, batch_size_);
@@ -212,8 +212,8 @@ class TensorRTRuntime : public JSONRuntimeBase {
 
     // Build engine.
     trt_engine_cache_[std::make_pair(symbol_name_, batch_size_)] = builder.BuildEngine();
-    LOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_
-              << " with batch size " << batch_size_;
+    DLOG(INFO) << "Finished building TensorRT engine for subgraph " << symbol_name_
+               << " with batch size " << batch_size_;
     CacheEngineToDisk();
   }
 

From 230c1257c18b32134b63212dd6a06193d6088614 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Wed, 25 Nov 2020 17:56:20 +0000
Subject: [PATCH 20/34] Move test to tensorrt.py

---
 tests/python/contrib/test_tensorrt.py         | 99 +++++++++++++++++++
 tests/python/frontend/pytorch/test_forward.py | 94 ------------------
 2 files changed, 99 insertions(+), 94 deletions(-)

diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index 8b61323a71ad..d32268d5f80d 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -26,6 +26,10 @@
 from tvm.contrib import graph_runtime, utils
 from tvm.runtime.vm import VirtualMachine
 from tvm.relay import Any, GlobalVar, transform
+from typing import Dict, Tuple, Union
+from tvm.contrib.download import download
+import cv2
+from tvm.relay.op.contrib import tensorrt
 
 
 def skip_codegen_test():
@@ -1034,5 +1038,100 @@ def set_func_attr(func, compile_name, symbol_name):
     tvm.ir.assert_structural_equal(mod_trt, mod_exp, map_free_vars=True)
 
 
+def test_maskrcnn_resnet50() -> None:
+    """
+    This function tests the working of pytorch maskrcnn with resnet50 as backbone with
+    VM and VM + TRT. Since the order of compiled model outputs is a bit different from
+    original pytorch model, it uses a custom logic for comparison check.
+    """
+    if skip_codegen_test() or skip_runtime_test():
+        return
+
+    class TraceWrapper(torch.nn.Module):
+        """
+        This class is a wrapper over the torch module to convert the outputs into traceable form
+        """
+
+        def __init__(self, model: torch.nn.Module) -> None:
+            super().__init__()
+            self.model = model
+
+        def forward(
+            self, inp: torch.Tensor
+        ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+            out = self.model(inp)
+            return out[0]["boxes"], out[0]["scores"], out[0]["labels"], out[0]["masks"]
+
+    def get_traced_maskrcnn_model(np_sample_input: np.ndarray) -> torch.jit.TopLevelTracedModule:
+        """
+        This function takes a sample input and returns the traced maskrcnn model
+        """
+        model_func = torchvision.models.detection.maskrcnn_resnet50_fpn
+        model = TraceWrapper(model_func(pretrained=True))
+        model.eval()
+        inp = torch.Tensor(np.random.uniform(0.0, 250.0, size=np_sample_input.shape))
+
+        with torch.no_grad():
+            out = model(inp)
+            traced_module = torch.jit.trace(model, inp)
+            traced_module.eval()
+
+        return traced_module
+
+    def get_maskrcnn_input(in_size: int) -> np.ndarray:
+        """
+        This function gets a real image with multiple objects of interest and returns it.
+        """
+        input_shape = (1, 3, in_size, in_size)
+        img_path = "test_street_small.jpg"
+        img_url = (
+            "https://raw.githubusercontent.com/dmlc/web-data/"
+            "master/gluoncv/detection/street_small.jpg"
+        )
+        download(img_url, img_path)
+
+        img = cv2.imread(img_path).astype("float32")
+        img = cv2.resize(img, (in_size, in_size))
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = np.transpose(img / 255.0, [2, 0, 1])
+        img = np.expand_dims(img, axis=0)
+
+        return img
+
+    in_size = 300
+    np_sample_input = get_maskrcnn_input(in_size)
+    traced_module = get_traced_maskrcnn_model(np_sample_input)
+    vm_trt_exec = convert_traced_model_to_vm_trt(traced_module, np_sample_input, target="llvm")
+    ctx = tvm.cpu()
+    vm = tvm.runtime.vm.VirtualMachine(vm_trt_exec, ctx)
+    vm.set_input("main", **{"input0": np_sample_input})
+    tvm_res = vm.run()
+
+    # Descending sort by scores and get the high confidence indices. In this example 9 is chosen,
+    # because this image has 9 boxes over 0.9 confidence
+    num_high_confidence_boxes = 9
+    tvm_indices = np.argsort(-1 * tvm_res[1].asnumpy())[:num_high_confidence_boxes]
+
+    with torch.no_grad():
+        out = traced_module(torch.Tensor(np_sample_input))
+        # Descending sort by scores and get the high confidence indices
+        pt_indices = np.argsort(-1 * out[1].numpy())[:num_high_confidence_boxes]
+
+    tol = [1e-1, 5e-3, 1e-5, 4e-1]  # [Box Tol, Score Tol, Label Tol, Mask Tol]
+    # Because of certain ops, there are certain minor differences in TVM outputs and PT outputs,
+    # This means that the tolerance can't be 1e-4 or 1e-5 throughout. The ideal way to get around
+    # this is to test it on an entire dataset and compare mAP with the original model.
+    # However, since that is not practically possible on CI, the following compromise is made.
+    # These tolerances are chosen based on their impact or lack thereof to the mAP score, e.g:
+    # 0.1 pixel difference of a box in a 300X300 image wont make any change.
+    for i, tol_val in zip(range(4), tol):
+        np.testing.assert_allclose(
+            tvm_res[i].asnumpy()[tvm_indices],
+            out[i].numpy()[pt_indices],
+            rtol=tol_val,
+            atol=tol_val,
+        )
+
+
 if __name__ == "__main__":
     pytest.main([__file__])
diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py
index 404614db71b9..362046fc5817 100644
--- a/tests/python/frontend/pytorch/test_forward.py
+++ b/tests/python/frontend/pytorch/test_forward.py
@@ -3384,99 +3384,6 @@ def convert_traced_model_to_vm_trt(
     return vm_trt_exec
 
 
-def test_maskrcnn_resnet50() -> None:
-    """
-    This function tests the working of pytorch maskrcnn with resnet50 as backbone with
-    VM and VM + TRT. Since the order of compiled model outputs is a bit different from
-    original pytorch model, it uses a custom logic for comparison check.
-    """
-
-    class TraceWrapper(torch.nn.Module):
-        """
-        This class is a wrapper over the torch module to convert the outputs into traceable form
-        """
-
-        def __init__(self, model: torch.nn.Module) -> None:
-            super().__init__()
-            self.model = model
-
-        def forward(
-            self, inp: torch.Tensor
-        ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-            out = self.model(inp)
-            return out[0]["boxes"], out[0]["scores"], out[0]["labels"], out[0]["masks"]
-
-    def get_traced_maskrcnn_model(np_sample_input: np.ndarray) -> torch.jit.TopLevelTracedModule:
-        """
-        This function takes a sample input and returns the traced maskrcnn model
-        """
-        model_func = torchvision.models.detection.maskrcnn_resnet50_fpn
-        model = TraceWrapper(model_func(pretrained=True))
-        model.eval()
-        inp = torch.Tensor(np.random.uniform(0.0, 250.0, size=np_sample_input.shape))
-
-        with torch.no_grad():
-            out = model(inp)
-            traced_module = torch.jit.trace(model, inp)
-            traced_module.eval()
-
-        return traced_module
-
-    def get_maskrcnn_input(in_size: int) -> np.ndarray:
-        """
-        This function gets a real image with multiple objects of interest and returns it.
-        """
-        input_shape = (1, 3, in_size, in_size)
-        img_path = "test_street_small.jpg"
-        img_url = (
-            "https://raw.githubusercontent.com/dmlc/web-data/"
-            "master/gluoncv/detection/street_small.jpg"
-        )
-        download(img_url, img_path)
-
-        img = cv2.imread(img_path).astype("float32")
-        img = cv2.resize(img, (in_size, in_size))
-        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-        img = np.transpose(img / 255.0, [2, 0, 1])
-        img = np.expand_dims(img, axis=0)
-
-        return img
-
-    in_size = 300
-    np_sample_input = get_maskrcnn_input(in_size)
-    traced_module = get_traced_maskrcnn_model(np_sample_input)
-    vm_trt_exec = convert_traced_model_to_vm_trt(traced_module, np_sample_input, target="llvm")
-    ctx = tvm.cpu()
-    vm = tvm.runtime.vm.VirtualMachine(vm_trt_exec, ctx)
-    vm.set_input("main", **{"input0": np_sample_input})
-    tvm_res = vm.run()
-
-    # Descending sort by scores and get the high confidence indices. In this example 9 is chosen,
-    # because this image has 9 boxes over 0.9 confidence
-    num_high_confidence_boxes = 9
-    tvm_indices = np.argsort(-1 * tvm_res[1].asnumpy())[:num_high_confidence_boxes]
-
-    with torch.no_grad():
-        out = traced_module(torch.Tensor(np_sample_input))
-        # Descending sort by scores and get the high confidence indices
-        pt_indices = np.argsort(-1 * out[1].numpy())[:num_high_confidence_boxes]
-
-    tol = [1e-1, 5e-3, 1e-5, 4e-1]  # [Box Tol, Score Tol, Label Tol, Mask Tol]
-    # Because of certain ops, there are certain minor differences in TVM outputs and PT outputs,
-    # This means that the tolerance can't be 1e-4 or 1e-5 throughout. The ideal way to get around
-    # this is to test it on an entire dataset and compare mAP with the original model.
-    # However, since that is not practically possible on CI, the following compromise is made.
-    # These tolerances are chosen based on their impact or lack thereof to the mAP score, e.g:
-    # 0.1 pixel difference of a box in a 300X300 image wont make any change.
-    for i, tol_val in zip(range(4), tol):
-        np.testing.assert_allclose(
-            tvm_res[i].asnumpy()[tvm_indices],
-            out[i].numpy()[pt_indices],
-            rtol=tol_val,
-            atol=tol_val,
-        )
-
-
 if __name__ == "__main__":
     # some structural tests
     test_forward_traced_function()
@@ -3621,7 +3528,6 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray:
 
     test_segmentaton_models()
     test_3d_models()
-    test_maskrcnn_resnet50()
 
     # Quantization test
     from qnn_test import test_quantized_imagenet, test_quantized_modules

From d17c4d02bdf72eb7b65f337d66989f0d23d8f262 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Wed, 25 Nov 2020 17:57:02 +0000
Subject: [PATCH 21/34] Remove imports

---
 tests/python/frontend/pytorch/test_forward.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py
index 362046fc5817..f4a960bbfd55 100644
--- a/tests/python/frontend/pytorch/test_forward.py
+++ b/tests/python/frontend/pytorch/test_forward.py
@@ -29,11 +29,7 @@
 from tvm.contrib import graph_runtime
 from tvm.contrib.nvcc import have_fp16
 import tvm.testing
-from typing import Dict, Tuple, Union
 from packaging import version as package_version
-from tvm.contrib.download import download
-import cv2
-from tvm.relay.op.contrib import tensorrt
 
 sys.setrecursionlimit(10000)
 

From 5bdc028f9bc6ee08ed37de8aaf2afec711e9ddf3 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Wed, 25 Nov 2020 17:58:25 +0000
Subject: [PATCH 22/34] Remove function

---
 tests/python/frontend/pytorch/test_forward.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py
index f4a960bbfd55..6250dfff811a 100644
--- a/tests/python/frontend/pytorch/test_forward.py
+++ b/tests/python/frontend/pytorch/test_forward.py
@@ -3363,23 +3363,6 @@ def test_fn(x, weights=None):
     verify_trace_model(test_fn, [inp, weights.to(torch.float64)], ["llvm"])
 
 
-def convert_traced_model_to_vm_trt(
-    traced_module: torch.jit.TopLevelTracedModule, np_sample_input: np.ndarray, target: str
-) -> tvm.runtime.vm.Executable:
-    """
-    This function converts a traced pytorch model to VM + TRT.
-    """
-    input_shape = np_sample_input.shape
-    input_name = "input0"
-    shape_list = [(input_name, input_shape)]
-    mod, params = relay.frontend.from_pytorch(traced_module, shape_list)
-    mod, config = tensorrt.partition_for_tensorrt(mod, params, remove_no_mac_subgraphs=True)
-    with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]):
-        vm_trt_exec = relay.vm.compile(mod, target=target, params=params)
-
-    return vm_trt_exec
-
-
 if __name__ == "__main__":
     # some structural tests
     test_forward_traced_function()

From cab81b7c4177edfaa682dfcbb4b7ebff4c37cd52 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Wed, 25 Nov 2020 17:58:55 +0000
Subject: [PATCH 23/34] Add it to trt

---
 tests/python/contrib/test_tensorrt.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index d32268d5f80d..b3a8e1070033 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -1038,6 +1038,23 @@ def set_func_attr(func, compile_name, symbol_name):
     tvm.ir.assert_structural_equal(mod_trt, mod_exp, map_free_vars=True)
 
 
+def convert_traced_model_to_vm_trt(
+    traced_module: torch.jit.TopLevelTracedModule, np_sample_input: np.ndarray, target: str
+) -> tvm.runtime.vm.Executable:
+    """
+    This function converts a traced pytorch model to VM + TRT.
+    """
+    input_shape = np_sample_input.shape
+    input_name = "input0"
+    shape_list = [(input_name, input_shape)]
+    mod, params = relay.frontend.from_pytorch(traced_module, shape_list)
+    mod, config = tensorrt.partition_for_tensorrt(mod, params, remove_no_mac_subgraphs=True)
+    with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]):
+        vm_trt_exec = relay.vm.compile(mod, target=target, params=params)
+
+    return vm_trt_exec
+
+
 def test_maskrcnn_resnet50() -> None:
     """
     This function tests the working of pytorch maskrcnn with resnet50 as backbone with

From 555505df262b9e7e2b5d0f0f4e89ce39d521f2a7 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Wed, 25 Nov 2020 20:41:51 +0000
Subject: [PATCH 24/34] import error

---
 tests/python/contrib/test_tensorrt.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index b3a8e1070033..1a0fddc23ce7 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -28,7 +28,6 @@
 from tvm.relay import Any, GlobalVar, transform
 from typing import Dict, Tuple, Union
 from tvm.contrib.download import download
-import cv2
 from tvm.relay.op.contrib import tensorrt
 
 
@@ -1106,6 +1105,7 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray:
             "master/gluoncv/detection/street_small.jpg"
         )
         download(img_url, img_path)
+        import cv2
 
         img = cv2.imread(img_path).astype("float32")
         img = cv2.resize(img, (in_size, in_size))

From ee41e0775877932335b10680532ad2ec1b691e75 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Mon, 30 Nov 2020 08:05:54 +0000
Subject: [PATCH 25/34] Imports

---
 tests/python/contrib/test_tensorrt.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index 1a0fddc23ce7..6b4e5c0fce97 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -29,6 +29,8 @@
 from typing import Dict, Tuple, Union
 from tvm.contrib.download import download
 from tvm.relay.op.contrib import tensorrt
+import torch
+import torchvision
 
 
 def skip_codegen_test():
@@ -1151,4 +1153,5 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray:
 
 
 if __name__ == "__main__":
-    pytest.main([__file__])
+    test_maskrcnn_resnet50()
+    # pytest.main([__file__])

From 51b5c8acaa456c4a1157926ff8f86dcaf142cc97 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Mon, 30 Nov 2020 08:57:57 +0000
Subject: [PATCH 26/34] Add torch to CI

---
 tests/scripts/task_ci_python_setup.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/scripts/task_ci_python_setup.sh b/tests/scripts/task_ci_python_setup.sh
index fe88ac650cc8..9d9f03ef71d8 100755
--- a/tests/scripts/task_ci_python_setup.sh
+++ b/tests/scripts/task_ci_python_setup.sh
@@ -31,3 +31,4 @@ set -o pipefail
 echo "Addtiional setup in" ${CI_IMAGE_NAME}
 
 python3 -m pip install --user tlcpack-sphinx-addon==0.1.2 synr==0.2.1
+python3 -m pip install --user torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html

From 6fea65875838be02271033fb7cb075a4dd40b536 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Mon, 30 Nov 2020 17:49:22 +0000
Subject: [PATCH 27/34] trt_test

---
 tests/python/contrib/test_tensorrt.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index 6b4e5c0fce97..6510f14adcde 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -21,6 +21,9 @@
 
 import tvm
 import tvm.relay.testing
+import torch
+import torchvision
+
 from tvm import relay
 from tvm.relay.op.contrib import tensorrt
 from tvm.contrib import graph_runtime, utils
@@ -29,8 +32,6 @@
 from typing import Dict, Tuple, Union
 from tvm.contrib.download import download
 from tvm.relay.op.contrib import tensorrt
-import torch
-import torchvision
 
 
 def skip_codegen_test():
@@ -1153,5 +1154,4 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray:
 
 
 if __name__ == "__main__":
-    test_maskrcnn_resnet50()
-    # pytest.main([__file__])
+    pytest.main([__file__])

From ea4af58486b33c61376b936c3a5a086ba8e70e15 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Mon, 30 Nov 2020 20:34:25 +0000
Subject: [PATCH 28/34] Check test

---
 tests/python/contrib/test_tensorrt.py | 31 ++++++++++++++++-----------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index 6510f14adcde..ccc3dadc4c45 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -21,8 +21,6 @@
 
 import tvm
 import tvm.relay.testing
-import torch
-import torchvision
 
 from tvm import relay
 from tvm.relay.op.contrib import tensorrt
@@ -1040,7 +1038,20 @@ def set_func_attr(func, compile_name, symbol_name):
     tvm.ir.assert_structural_equal(mod_trt, mod_exp, map_free_vars=True)
 
 
-def convert_traced_model_to_vm_trt(
+
+def test_maskrcnn_resnet50() -> None:
+    """
+    This function tests the working of pytorch maskrcnn with resnet50 as backbone with
+    VM and VM + TRT. Since the order of compiled model outputs is a bit different from
+    original pytorch model, it uses a custom logic for comparison check.
+    """
+    if skip_codegen_test():
+        return
+
+    import torch
+    import torchvision
+
+    def convert_traced_model_to_vm_trt(
     traced_module: torch.jit.TopLevelTracedModule, np_sample_input: np.ndarray, target: str
 ) -> tvm.runtime.vm.Executable:
     """
@@ -1056,16 +1067,6 @@ def convert_traced_model_to_vm_trt(
 
     return vm_trt_exec
 
-
-def test_maskrcnn_resnet50() -> None:
-    """
-    This function tests the working of pytorch maskrcnn with resnet50 as backbone with
-    VM and VM + TRT. Since the order of compiled model outputs is a bit different from
-    original pytorch model, it uses a custom logic for comparison check.
-    """
-    if skip_codegen_test() or skip_runtime_test():
-        return
-
     class TraceWrapper(torch.nn.Module):
         """
         This class is a wrapper over the torch module to convert the outputs into traceable form
@@ -1122,6 +1123,10 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray:
     np_sample_input = get_maskrcnn_input(in_size)
     traced_module = get_traced_maskrcnn_model(np_sample_input)
     vm_trt_exec = convert_traced_model_to_vm_trt(traced_module, np_sample_input, target="llvm")
+
+    if skip_runtime_test():
+        return 
+
     ctx = tvm.cpu()
     vm = tvm.runtime.vm.VirtualMachine(vm_trt_exec, ctx)
     vm.set_input("main", **{"input0": np_sample_input})

From 49a4fa707fab5801474701f32f6b8fa801798fbd Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Mon, 30 Nov 2020 20:35:34 +0000
Subject: [PATCH 29/34] Revert Pytorch install

---
 tests/scripts/task_ci_python_setup.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/scripts/task_ci_python_setup.sh b/tests/scripts/task_ci_python_setup.sh
index 9d9f03ef71d8..fe88ac650cc8 100755
--- a/tests/scripts/task_ci_python_setup.sh
+++ b/tests/scripts/task_ci_python_setup.sh
@@ -31,4 +31,3 @@ set -o pipefail
 echo "Addtiional setup in" ${CI_IMAGE_NAME}
 
 python3 -m pip install --user tlcpack-sphinx-addon==0.1.2 synr==0.2.1
-python3 -m pip install --user torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html

From 28bc758299cdde0e0dc9a683ec550a289eede7d4 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Mon, 30 Nov 2020 20:44:21 +0000
Subject: [PATCH 30/34] Fix

---
 tests/python/contrib/test_tensorrt.py | 29 +++++++++++++--------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index ccc3dadc4c45..67d2e917b7dc 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -1038,7 +1038,6 @@ def set_func_attr(func, compile_name, symbol_name):
     tvm.ir.assert_structural_equal(mod_trt, mod_exp, map_free_vars=True)
 
 
-
 def test_maskrcnn_resnet50() -> None:
     """
     This function tests the working of pytorch maskrcnn with resnet50 as backbone with
@@ -1052,20 +1051,20 @@ def test_maskrcnn_resnet50() -> None:
     import torchvision
 
     def convert_traced_model_to_vm_trt(
-    traced_module: torch.jit.TopLevelTracedModule, np_sample_input: np.ndarray, target: str
-) -> tvm.runtime.vm.Executable:
-    """
-    This function converts a traced pytorch model to VM + TRT.
-    """
-    input_shape = np_sample_input.shape
-    input_name = "input0"
-    shape_list = [(input_name, input_shape)]
-    mod, params = relay.frontend.from_pytorch(traced_module, shape_list)
-    mod, config = tensorrt.partition_for_tensorrt(mod, params, remove_no_mac_subgraphs=True)
-    with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]):
-        vm_trt_exec = relay.vm.compile(mod, target=target, params=params)
+        traced_module: torch.jit.TopLevelTracedModule, np_sample_input: np.ndarray, target: str
+    ) -> tvm.runtime.vm.Executable:
+        """
+        This function converts a traced pytorch model to VM + TRT.
+        """
+        input_shape = np_sample_input.shape
+        input_name = "input0"
+        shape_list = [(input_name, input_shape)]
+        mod, params = relay.frontend.from_pytorch(traced_module, shape_list)
+        mod, config = tensorrt.partition_for_tensorrt(mod, params, remove_no_mac_subgraphs=True)
+        with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]):
+            vm_trt_exec = relay.vm.compile(mod, target=target, params=params)
 
-    return vm_trt_exec
+        return vm_trt_exec
 
     class TraceWrapper(torch.nn.Module):
         """
@@ -1125,7 +1124,7 @@ def get_maskrcnn_input(in_size: int) -> np.ndarray:
     vm_trt_exec = convert_traced_model_to_vm_trt(traced_module, np_sample_input, target="llvm")
 
     if skip_runtime_test():
-        return 
+        return
 
     ctx = tvm.cpu()
     vm = tvm.runtime.vm.VirtualMachine(vm_trt_exec, ctx)

From cf3016bfcac8ce1031f81792736d0c18bee64455 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Mon, 30 Nov 2020 21:02:31 +0000
Subject: [PATCH 31/34] test dynamic batch

---
 tests/python/contrib/test_tensorrt.py | 62 +++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index 67d2e917b7dc..4be6d84f47b1 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -1038,6 +1038,68 @@ def set_func_attr(func, compile_name, symbol_name):
     tvm.ir.assert_structural_equal(mod_trt, mod_exp, map_free_vars=True)
 
 
+def test_tensorrt_dynamic_batch():
+    if skip_codegen_test():
+        return
+
+    batches_to_test = [1, 1, 2, 3, 1, 3, 2]
+    x_shape = (relay.Any(), 1, 8, 8)
+    x_data = np.ones([max(batches_to_test)] + list(x_shape)[1:]).astype("float32")
+    result_dict = {}
+    for use_trt in [True, False]:
+        x = relay.var("x", shape=x_shape, dtype="float32")
+        out = relay.nn.relu(x)
+        f = relay.Function([x], out)
+        mod = tvm.IRModule()
+        mod["main"] = f
+        if use_trt:
+            mod = relay.tensorrt.EnableTrt(mod)
+
+        if not skip_runtime_test():
+            with relay.build_config(opt_level=3):
+                relay_exec = relay.create_executor("vm", mod=mod, ctx=tvm.cpu(0), target="llvm")
+
+            for i, batch_size in enumerate(batches_to_test):
+                result_dict[(i, use_trt)] = relay_exec.evaluate()(x_data[:batch_size, ...])
+
+    if not skip_runtime_test():
+        for i in range(len(batches_to_test)):
+            assert_result_matches(result_dict[(i, True)], result_dict[(i, False)])
+
+
+def test_tensorrt_dynamic_batch_conv():
+    if skip_codegen_test():
+        return
+    batches_to_test = [1, 1, 2, 3, 1, 3, 2]
+    x_shape = (relay.Any(), 32, 8, 8)
+    x_data = np.ones([max(batches_to_test)] + list(x_shape)[1:]).astype("float32")
+    k_shape = (16, 32, 3, 3)
+    params = {"kernel": np.random.uniform(-1, 1, k_shape).astype("float32")}
+    result_dict = {}
+    for use_trt in [True, False]:
+        x = relay.var("x", shape=x_shape, dtype="float32")
+        kernel = relay.var("kernel", shape=k_shape, dtype="float32")
+        out = relay.nn.conv2d(x, kernel, channels=16, kernel_size=(3, 3), groups=1)
+        f = relay.Function([x, kernel], out)
+        mod = tvm.IRModule()
+        mod["main"] = f
+        if use_trt:
+            mod = tensorrt.partition_for_tensorrt(mod, params)
+
+        if not skip_runtime_test():
+            with relay.build_config(opt_level=3):
+                relay_exec = relay.create_executor("vm", mod=mod, ctx=tvm.cpu(0), target="llvm")
+
+            for i, batch_size in enumerate(batches_to_test):
+                result_dict[(i, use_trt)] = relay_exec.evaluate()(
+                    x=x_data[:batch_size, ...], **params
+                )
+
+    if not skip_runtime_test():
+        for i in range(len(batches_to_test)):
+            assert_result_matches(result_dict[(i, True)], result_dict[(i, False)])
+
+
 def test_maskrcnn_resnet50() -> None:
     """
     This function tests the working of pytorch maskrcnn with resnet50 as backbone with

From 56c75bf80d783fd54c66ab722cd9cbc2d5f5fbe7 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Mon, 30 Nov 2020 21:19:48 +0000
Subject: [PATCH 32/34] TRT

---
 python/tvm/relay/op/contrib/tensorrt.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 3fc3e5a0393f..3b697f6bdfcd 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -652,7 +652,7 @@ def reshape_annotate_fn(expr):  # pylint: disable=unused-variable
                 new_shape[i] = original_volume // np.prod([x for x in new_shape if x != -1])
         # Remove batch dimension and see if volumes match
         if shape[0] != new_shape[0]:
-            print("reshape: can't modify batch dimension.")
+            logger.info("reshape: can't modify batch dimension.")
             return False
     return True
 
@@ -850,7 +850,7 @@ def __init__(self):
         self.is_compute_intensive = False
 
     def visit_call(self, call):
-        heavy_ops = set(
+        compute_intensive_ops = set(
             [
                 "nn.conv2d",
                 "nn.conv2d_transpose",
@@ -861,12 +861,15 @@ def visit_call(self, call):
             ]
         )
         if isinstance(call.op, tvm.tir.op.Op):
-            if str(call.op) in heavy_ops:
+            if str(call.op) in compute_intensive_ops:
                 self.is_compute_intensive = True
 
         return super().visit_call(call)
 
-    def is_graph_compute_intensive(self, subgraph):
+    def is_graph_compute_intensive(self, subgraph) -> bool:
+        """
+        This function recursively visits the graph and checks if it's compute intensive"
+        """
         self.visit(subgraph)
         return self.is_compute_intensive
 

From dc6aaef908c9e83496d1d2b2ee1f60561196fd56 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Mon, 30 Nov 2020 21:38:01 +0000
Subject: [PATCH 33/34] Resolve PR comments

---
 python/tvm/relay/op/contrib/tensorrt.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/tvm/relay/op/contrib/tensorrt.py b/python/tvm/relay/op/contrib/tensorrt.py
index 3b697f6bdfcd..acd4f4740b2d 100644
--- a/python/tvm/relay/op/contrib/tensorrt.py
+++ b/python/tvm/relay/op/contrib/tensorrt.py
@@ -206,6 +206,9 @@ def _func_wrapper(expr):
                 ]
                 for arg in args
             ]
+            # Batched multiply operations don't work in implicit batch mode. The following shapes
+            # have been excluded because they occur in PT MaskRCNN model. The long term solution is
+            # to switch to explicit batch mode after performance regressions are solved.
             if all(
                 [list(map(int, shape)) in [[300, 64, 7, 7], [300, 1, 1, 1]] for shape in shapes]
             ):
@@ -881,12 +884,15 @@ def is_valid_subgraph(params, body):
         input_batch_sizes = []
         for var in params:
             # In implicit batch mode, all inputs must have same batch size
+            # TODO: (codeislife99) : Fix different dynamic batch size inputs
+
             if isinstance(var.checked_type, relay.TupleType):
                 for tupe_type in var.checked_type.fields:
                     # Scalar inputs not allowed
                     if len(tupe_type.shape) == 0:
                         logger.info("tensorrt: scalar inputs not supported")
                         return False
+
                     if not isinstance(tupe_type.shape[0], tvm.tir.expr.Any):
                         input_batch_sizes.append(int(tupe_type.shape[0]))
             else:

From 121c46bc95f5c13fddf1dc0afb3188968772fab9 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-27-149.us-east-2.compute.internal>
Date: Mon, 30 Nov 2020 22:58:54 +0000
Subject: [PATCH 34/34] Zero batch size add

---
 src/runtime/contrib/tensorrt/tensorrt_runtime.cc | 1 +
 tests/python/contrib/test_tensorrt.py            | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
index 805e7e1bc7c3..3f87f8d00ee6 100644
--- a/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
+++ b/src/runtime/contrib/tensorrt/tensorrt_runtime.cc
@@ -113,6 +113,7 @@ class TensorRTRuntime : public JSONRuntimeBase {
   void Run() override {
     BuildEngine();
     batch_size_ = data_entry_[input_var_eid_[0]]->shape[0];
+    if (batch_size_ == 0) return;
     auto& engine_and_context = trt_engine_cache_.at(std::make_pair(symbol_name_, batch_size_));
     auto engine = engine_and_context.engine;
     auto context = engine_and_context.context;
diff --git a/tests/python/contrib/test_tensorrt.py b/tests/python/contrib/test_tensorrt.py
index 4be6d84f47b1..10c311a6d363 100644
--- a/tests/python/contrib/test_tensorrt.py
+++ b/tests/python/contrib/test_tensorrt.py
@@ -1042,7 +1042,7 @@ def test_tensorrt_dynamic_batch():
     if skip_codegen_test():
         return
 
-    batches_to_test = [1, 1, 2, 3, 1, 3, 2]
+    batches_to_test = [1, 1, 0, 2, 3, 0, 1, 3, 2]
     x_shape = (relay.Any(), 1, 8, 8)
     x_data = np.ones([max(batches_to_test)] + list(x_shape)[1:]).astype("float32")
     result_dict = {}
@@ -1070,7 +1070,7 @@ def test_tensorrt_dynamic_batch():
 def test_tensorrt_dynamic_batch_conv():
     if skip_codegen_test():
         return
-    batches_to_test = [1, 1, 2, 3, 1, 3, 2]
+    batches_to_test = [1, 1, 0, 2, 3, 0, 1, 3, 2]
     x_shape = (relay.Any(), 32, 8, 8)
     x_data = np.ones([max(batches_to_test)] + list(x_shape)[1:]).astype("float32")
     k_shape = (16, 32, 3, 3)