From f2b9b1f60a67f6d3c39e0ebabc722d757dd9e5ad Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sun, 19 Jan 2020 08:01:00 +0900
Subject: [PATCH 01/18] Add partition test case for conv + bias + relu pattern

---
 .../python/relay/test_pass_partition_graph.py | 126 ++++++++++++++++++
 1 file changed, 126 insertions(+)

diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index 4ffb37311696..213b8992ac6d 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -27,6 +27,7 @@
 from tvm.contrib import util
 from tvm.relay.annotation import compiler_begin, compiler_end
 from tvm.relay.expr_functor import ExprMutator
+from tvm.relay import analysis, expr as _expr
 
 # Leverage the pass manager to write a simple white list based annotator
 @transform.function_pass(opt_level=0)
@@ -165,6 +166,48 @@ def visit_call(self, call):
         return new_call
 
 
+class ConvBiasAddReLUAnnotator(ExprMutator):
+    import enum
+    state = enum.Enum("State", "Init Conv Bias ReLU")
+
+    def __init__(self, backend):
+        super().__init__()
+        self.current_state = self.state.Init
+        self.backend = backend
+
+    def annotate_call(self, call):
+        new_args = []
+        for arg in call.args:
+            new_arg = super().visit(arg)
+            if call.op.name == "nn.conv2d" or isinstance(new_arg, relay.expr.Var):
+                new_arg = compiler_begin(new_arg, self.backend)
+            new_args.append(new_arg)
+        return relay.Call(call.op, new_args, call.attrs, call.type_args)
+
+    def visit_call(self, call):
+        if call.op.name == "nn.conv2d":
+            if self.current_state == self.state.Bias:
+                self.current_state = self.state.Conv
+                ret = self.annotate_call(call)
+                self.current_state = self.state.Conv
+                return ret
+            self.current_state = self.state.Init
+        elif call.op.name == "add":
+            if self.current_state == self.state.ReLU:
+                self.current_state = self.state.Bias
+                return self.annotate_call(call)
+            self.current_state = self.state.Init
+        elif call.op.name == "nn.relu":
+            self.current_state = self.state.ReLU
+            op = self.annotate_call(call)
+            if self.current_state == self.state.Conv:
+                op = compiler_end(op, self.backend)
+            self.current_state = self.state.Init
+            return op
+        self.current_state = self.state.Init
+        return super().visit_call(call)
+
+
 def check_result(mod, map_inputs, out_shape, result, tol=1e-5, target="llvm",
                  ctx=tvm.cpu(), params=None):
     if sys.platform == "win32":
@@ -425,6 +468,88 @@ def test_extern_dnnl_mobilenet():
                  (1, 1000), ref_res.asnumpy(), tol=1e-5, params=params)
 
 
+def test_partition_conv_bias_relu():
+    def get_layers(prefix, data, in_channel, out_channel,
+                   include_bn=True, include_sigmoid=False):
+        weight = relay.const(np.random.randn(out_channel, in_channel, 3, 3))
+        bn_gamma = relay.const(np.random.randn(out_channel))
+        bn_beta = relay.const(np.random.randn(out_channel))
+        bn_mmean = relay.const(np.random.randn(out_channel))
+        bn_mvar = relay.const(np.random.randn(out_channel))
+
+        layer = relay.nn.conv2d(data=data, weight=weight, kernel_size=(3, 3),
+                                channels=out_channel, padding=(1, 1))
+        if include_bn:
+            bn_output = relay.nn.batch_norm(layer, bn_gamma, bn_beta,
+                                            bn_mmean, bn_mvar)
+            layer = bn_output[0]
+        if include_sigmoid:
+            # dummy layer to prevent pattern detection
+            layer = relay.sigmoid(layer)
+        layer = relay.nn.relu(layer)
+        return layer
+
+    def get_net(include_bn=True, include_sigmoid=False):
+        data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32"))
+        layer1 = get_layers("layer1_", data, 3, 16, include_bn, include_sigmoid)
+        layer2 = get_layers("layer2_", layer1, 16, 16, include_bn, include_sigmoid)
+        last = layer2
+        return relay.Function(relay.analysis.free_vars(last), last)
+
+    def get_partitoned_mod(net):
+        remove_bn_pass = transform.Sequential([
+            relay.transform.InferType(),
+            relay.transform.SimplifyInference(),
+            relay.transform.FoldConstant(),
+            relay.transform.FoldScaleAxis(),
+        ])
+        mod, params = tvm.relay.testing.create_workload(net)
+
+        with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
+            mod = remove_bn_pass(mod)
+
+        mod["main"] = ConvBiasAddReLUAnnotator("dnnl").visit(mod["main"])
+        mod = transform.PartitionGraph()(mod)
+        return mod, params
+
+    def get_partitions(mod):
+        partitions = []
+
+        def visit_func(expr):
+            if isinstance(expr, _expr.Function) and expr != mod["main"]:
+                partitions.append(expr)
+        analysis.post_order_visit(mod["main"], visit_func)
+        return partitions
+
+    def test_detect_pattern(include_bn, include_sigmoid, num_expected_partition):
+        net = get_net(include_bn, include_sigmoid)
+        mod, _ = get_partitoned_mod(net)
+        assert(len(get_partitions(mod)) == num_expected_partition)
+
+    def test_partition():
+        # conv + bn + relu -> detection succeed
+        test_detect_pattern(True, False, 2)
+        # conv + relu -> fail
+        test_detect_pattern(False, False, 0)
+        # conv + bn + sigmoid + relu -> fail
+        test_detect_pattern(True, True, 0)
+
+    test_partition()
+
+    # TODO: Enable executor check once the runtime signature issue is resolved
+    # net = get_net()
+    # mod, params = get_partitoned_mod(net)
+
+    # ref_mod, params = tvm.relay.testing.create_workload(net)
+    # ishape = (1, 3, 224, 224)
+    # i_data = np.random.randn(*ishape).astype(np.float32)
+    # ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0))
+    # ref_res = ref_ex.evaluate()(i_data, **params)
+
+    # check_result(mod, {"data": i_data},
+    #              ishape, ref_res.asnumpy(), tol=1e-5, params=params)
+
+
 if __name__ == "__main__":
     test_multi_node_compiler()
     test_extern_ccompiler_single_op()
@@ -432,3 +557,4 @@ def test_extern_dnnl_mobilenet():
     test_extern_ccompiler()
     test_extern_dnnl()
     test_extern_dnnl_mobilenet()
+    test_partition_conv_bias_relu()

From 99e023365af5bb8457862f427e3859042724fb04 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sun, 19 Jan 2020 09:43:07 +0900
Subject: [PATCH 02/18] partitioning mobilenet works

---
 src/relay/backend/build_module.cc             | 37 +++++++++++++
 .../python/relay/test_pass_partition_graph.py | 55 +++++++++++++++----
 2 files changed, 81 insertions(+), 11 deletions(-)

diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc
index 035ab1ba5bee..2d617db51f9a 100644
--- a/src/relay/backend/build_module.cc
+++ b/src/relay/backend/build_module.cc
@@ -145,6 +145,43 @@ struct GraphCodegen {
   }
 };
 
+/*!
+ * \brief Bind params to function by using name
+ * \param func Relay function
+ * \param params params dict
+ * \return relay::Function
+ */
+relay::Function BindParamsByName(relay::Function func,
+                                 const std::unordered_map<std::string, runtime::NDArray>& params) {
+  std::unordered_map<std::string, relay::Var> name_dict;
+  std::unordered_set<relay::Var, ObjectHash, ObjectEqual> repeat_var;
+  for (auto arg : func->params) {
+    const auto& name = arg->name_hint();
+    if (name_dict.count(name)) {
+      repeat_var.insert(arg);
+    } else {
+      name_dict[name] = arg;
+    }
+  }
+
+  std::unordered_map<relay::Var, Expr, ObjectHash, ObjectEqual> bind_dict;
+  for (auto& kv : params) {
+    if (name_dict.count(kv.first) == 0) {
+      continue;
+    }
+    auto arg = name_dict.at(kv.first);
+    if (repeat_var.count(arg)) {
+      LOG(FATAL) << "Multiple args in the function have name " << kv.first;
+    }
+    bind_dict[arg] = ConstantNode::make(kv.second);
+  }
+  Expr bound_expr = relay::Bind(func, bind_dict);
+  Function ret = Downcast<Function>(bound_expr);
+  CHECK(ret.defined()) << "The returning type is expected to be a Relay Function."
+                       << "\n";
+  return ret;
+}
+
 /*!
  * \brief Relay build module
  *
diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index 213b8992ac6d..b5f72058ae97 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -184,6 +184,12 @@ def annotate_call(self, call):
             new_args.append(new_arg)
         return relay.Call(call.op, new_args, call.attrs, call.type_args)
 
+    # def visit_function(self, func):
+    #     print("visiting function")
+    #     new_body = super().visit(func.body)
+    #     return relay.Function(func.params, new_body,
+    #                           func.ret_type, func.type_params, func.attrs)
+
     def visit_call(self, call):
         if call.op.name == "nn.conv2d":
             if self.current_state == self.state.Bias:
@@ -469,6 +475,10 @@ def test_extern_dnnl_mobilenet():
 
 
 def test_partition_conv_bias_relu():
+    if not tvm.get_global_func("relay.ext.dnnl", True):
+        print("skip because DNNL codegen is not available")
+        return
+
     def get_layers(prefix, data, in_channel, out_channel,
                    include_bn=True, include_sigmoid=False):
         weight = relay.const(np.random.randn(out_channel, in_channel, 3, 3))
@@ -496,21 +506,33 @@ def get_net(include_bn=True, include_sigmoid=False):
         last = layer2
         return relay.Function(relay.analysis.free_vars(last), last)
 
-    def get_partitoned_mod(net):
+    def pre_optimize(mod, params):
         remove_bn_pass = transform.Sequential([
             relay.transform.InferType(),
             relay.transform.SimplifyInference(),
             relay.transform.FoldConstant(),
             relay.transform.FoldScaleAxis(),
         ])
-        mod, params = tvm.relay.testing.create_workload(net)
+
+        inputs = {}
+        for name, param in params.items():
+            if isinstance(param, np.ndarray):
+                param = tvm.nd.array(param)
+            inputs[name] = _expr.const(param)
+
+        from tvm.relay._build_module import BindParamsByName
+        mod["main"] = BindParamsByName(mod["main"], inputs)
 
         with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
             mod = remove_bn_pass(mod)
 
+        return mod
+
+    def get_partitoned_mod(mod):
         mod["main"] = ConvBiasAddReLUAnnotator("dnnl").visit(mod["main"])
+        #print(mod["main"])
         mod = transform.PartitionGraph()(mod)
-        return mod, params
+        return mod
 
     def get_partitions(mod):
         partitions = []
@@ -523,7 +545,9 @@ def visit_func(expr):
 
     def test_detect_pattern(include_bn, include_sigmoid, num_expected_partition):
         net = get_net(include_bn, include_sigmoid)
-        mod, _ = get_partitoned_mod(net)
+        mod, params = tvm.relay.testing.create_workload(net)
+        mod = pre_optimize(mod, params)
+        mod = get_partitoned_mod(mod)
         assert(len(get_partitions(mod)) == num_expected_partition)
 
     def test_partition():
@@ -534,7 +558,16 @@ def test_partition():
         # conv + bn + sigmoid + relu -> fail
         test_detect_pattern(True, True, 0)
 
-    test_partition()
+    def test_partition_mobilenet():
+        mod, params = relay.testing.mobilenet.get_workload()
+        mod = pre_optimize(mod, params)
+        # print(mod["main"])
+        mod = get_partitoned_mod(mod)
+        print(mod["main"])
+        print(len(get_partitions(mod["main"])))
+
+    # test_partition()
+    test_partition_mobilenet()
 
     # TODO: Enable executor check once the runtime signature issue is resolved
     # net = get_net()
@@ -551,10 +584,10 @@ def test_partition():
 
 
 if __name__ == "__main__":
-    test_multi_node_compiler()
-    test_extern_ccompiler_single_op()
-    test_extern_ccompiler_default_ops()
-    test_extern_ccompiler()
-    test_extern_dnnl()
-    test_extern_dnnl_mobilenet()
+    # test_multi_node_compiler()
+    # test_extern_ccompiler_single_op()
+    # test_extern_ccompiler_default_ops()
+    # test_extern_ccompiler()
+    # test_extern_dnnl()
+    # test_extern_dnnl_mobilenet()
     test_partition_conv_bias_relu()

From c72b071e3382b6814987a30929dc8a18e2d513a1 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sun, 19 Jan 2020 09:50:28 +0900
Subject: [PATCH 03/18] enable all tests

---
 .../python/relay/test_pass_partition_graph.py | 34 +++++++++----------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index b5f72058ae97..fd82e2116931 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -514,14 +514,15 @@ def pre_optimize(mod, params):
             relay.transform.FoldScaleAxis(),
         ])
 
-        inputs = {}
-        for name, param in params.items():
-            if isinstance(param, np.ndarray):
-                param = tvm.nd.array(param)
-            inputs[name] = _expr.const(param)
+        if params != {}:
+            inputs = {}
+            for name, param in params.items():
+                if isinstance(param, np.ndarray):
+                    param = tvm.nd.array(param)
+                inputs[name] = _expr.const(param)
 
-        from tvm.relay._build_module import BindParamsByName
-        mod["main"] = BindParamsByName(mod["main"], inputs)
+            from tvm.relay._build_module import BindParamsByName
+            mod["main"] = BindParamsByName(mod["main"], inputs)
 
         with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
             mod = remove_bn_pass(mod)
@@ -530,7 +531,6 @@ def pre_optimize(mod, params):
 
     def get_partitoned_mod(mod):
         mod["main"] = ConvBiasAddReLUAnnotator("dnnl").visit(mod["main"])
-        #print(mod["main"])
         mod = transform.PartitionGraph()(mod)
         return mod
 
@@ -561,12 +561,10 @@ def test_partition():
     def test_partition_mobilenet():
         mod, params = relay.testing.mobilenet.get_workload()
         mod = pre_optimize(mod, params)
-        # print(mod["main"])
         mod = get_partitoned_mod(mod)
-        print(mod["main"])
-        print(len(get_partitions(mod["main"])))
+        assert(len(get_partitions(mod)) == 27)
 
-    # test_partition()
+    test_partition()
     test_partition_mobilenet()
 
     # TODO: Enable executor check once the runtime signature issue is resolved
@@ -584,10 +582,10 @@ def test_partition_mobilenet():
 
 
 if __name__ == "__main__":
-    # test_multi_node_compiler()
-    # test_extern_ccompiler_single_op()
-    # test_extern_ccompiler_default_ops()
-    # test_extern_ccompiler()
-    # test_extern_dnnl()
-    # test_extern_dnnl_mobilenet()
+    test_multi_node_compiler()
+    test_extern_ccompiler_single_op()
+    test_extern_ccompiler_default_ops()
+    test_extern_ccompiler()
+    test_extern_dnnl()
+    test_extern_dnnl_mobilenet()
     test_partition_conv_bias_relu()

From 693474b59c5a501243c0cdaaaacee11989b1865e Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sun, 19 Jan 2020 10:06:50 +0900
Subject: [PATCH 04/18] introduce bind_params_by_name as reusable api

---
 python/tvm/relay/build_module.py                |  5 ++++-
 tests/python/relay/test_pass_partition_graph.py | 12 ++++--------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py
index d848d9030c48..1170855d32bb 100644
--- a/python/tvm/relay/build_module.py
+++ b/python/tvm/relay/build_module.py
@@ -158,7 +158,6 @@ def optimize(self, func, target=None, params=None):
 
         return mod, params
 
-
     def _set_params(self, params):
         self._set_params_func(_convert_param_map(params))
 
@@ -318,6 +317,10 @@ def bind_params_by_name(func, params):
     -------
     func : relay.Function
         The function with parameters bound
+<<<<<<< HEAD
+=======
+
+>>>>>>> introduce bind_params_by_name as reusable api
     """
     inputs = _convert_param_map(params)
     return _build_module.BindParamsByName(func, inputs)
diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index fd82e2116931..bf57ceedb7a2 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -28,6 +28,8 @@
 from tvm.relay.annotation import compiler_begin, compiler_end
 from tvm.relay.expr_functor import ExprMutator
 from tvm.relay import analysis, expr as _expr
+from tvm.relay.build_module import bind_params_by_name
+
 
 # Leverage the pass manager to write a simple white list based annotator
 @transform.function_pass(opt_level=0)
@@ -515,14 +517,8 @@ def pre_optimize(mod, params):
         ])
 
         if params != {}:
-            inputs = {}
-            for name, param in params.items():
-                if isinstance(param, np.ndarray):
-                    param = tvm.nd.array(param)
-                inputs[name] = _expr.const(param)
-
-            from tvm.relay._build_module import BindParamsByName
-            mod["main"] = BindParamsByName(mod["main"], inputs)
+            # This is required for constant folding on mobilenet
+            mod["main"] = bind_params_by_name(mod["main"], params)
 
         with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
             mod = remove_bn_pass(mod)

From e902fa4bdf580529ab6bb2f8658b3e0fb13d9b9f Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sun, 19 Jan 2020 10:13:23 +0900
Subject: [PATCH 05/18] remove unused function

---
 tests/python/relay/test_pass_partition_graph.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index bf57ceedb7a2..9c253f4f4d5c 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -186,12 +186,6 @@ def annotate_call(self, call):
             new_args.append(new_arg)
         return relay.Call(call.op, new_args, call.attrs, call.type_args)
 
-    # def visit_function(self, func):
-    #     print("visiting function")
-    #     new_body = super().visit(func.body)
-    #     return relay.Function(func.params, new_body,
-    #                           func.ret_type, func.type_params, func.attrs)
-
     def visit_call(self, call):
         if call.op.name == "nn.conv2d":
             if self.current_state == self.state.Bias:

From 50e78b878055a26e89564ae800c4f0ca4aa2b4a6 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sat, 18 Jan 2020 19:57:15 +0900
Subject: [PATCH 06/18] add fused dnnl op

---
 src/runtime/contrib/dnnl/dnnl.cc       | 63 ++++++++++++++++++--------
 src/runtime/contrib/dnnl/dnnl_kernel.h |  6 +++
 2 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/src/runtime/contrib/dnnl/dnnl.cc b/src/runtime/contrib/dnnl/dnnl.cc
index cc430b2c7c76..5622d8feeed1 100644
--- a/src/runtime/contrib/dnnl/dnnl.cc
+++ b/src/runtime/contrib/dnnl/dnnl.cc
@@ -52,10 +52,10 @@ inline void read_from_dnnl_memory(void* handle, const memory& mem) {
   std::copy(src, src + bytes, reinterpret_cast<uint8_t*>(handle));
 }
 
-extern "C" void dnnl_conv2d(float* data, float* weights, float* out, int p_N_,
-                            int p_C_, int p_H_, int p_W_, int p_O_, int p_G_,
-                            int p_Ph_, int p_Pw_, int p_Kh_, int p_Kw_,
-                            int p_Sh_, int p_Sw_) {
+void dnnl_conv2d_common(float* data, float* weights, float* bias, float* out,
+                        int p_N_, int p_C_, int p_H_, int p_W_,
+                        int p_O_, int p_G_, int p_Ph_, int p_Pw_, int p_Kh_,
+                        int p_Kw_, int p_Sh_, int p_Sw_, primitive_attr attr) {
   using tag = memory::format_tag;
   using dt = memory::data_type;
   engine eng(engine::kind::cpu, 0);
@@ -65,21 +65,16 @@ extern "C" void dnnl_conv2d(float* data, float* weights, float* out, int p_N_,
   memory::dims conv2d_weights_tz = {p_O_, p_C_, p_Kh_, p_Kw_};
   if (p_G_ > 1) conv2d_weights_tz = {p_G_, 1, p_C_ / p_G_, p_Kh_, p_Kw_};
   memory::dims conv2d_bias_tz = {p_O_};
-  memory::dims conv2d_dst_tz = {p_N_, p_O_,
-                                (p_H_ - p_Kh_ + 2 * p_Ph_ + p_Sh_) / p_Sh_,
+  memory::dims conv2d_dst_tz = {p_N_, p_O_, (p_H_ - p_Kh_ + 2 * p_Ph_ + p_Sh_) / p_Sh_,
                                 (p_W_ - p_Kw_ + 2 * p_Pw_ + p_Sw_) / p_Sw_};
   memory::dims conv2d_strides = {p_Sh_, p_Sw_};
   memory::dims conv2d_padding = {p_Ph_, p_Pw_};
 
-  std::vector<float> conv2d_bias(p_O_, 0);
-
-  auto user_src_memory =
-      memory({{conv2d_src_tz}, dt::f32, tag::nchw}, eng, data);
-  auto user_weights_memory = memory(
-      {{conv2d_weights_tz}, dt::f32, (p_G_ > 1) ? tag::goihw : tag::oihw}, eng,
-      weights);
+  auto user_src_memory = memory({{conv2d_src_tz}, dt::f32, tag::nchw}, eng, data);
+  auto user_weights_memory =
+      memory({{conv2d_weights_tz}, dt::f32, (p_G_ > 1) ? tag::goihw : tag::oihw}, eng, weights);
   auto conv2d_user_bias_memory =
-      memory({{conv2d_bias_tz}, dt::f32, tag::x}, eng, conv2d_bias.data());
+      memory({{conv2d_bias_tz}, dt::f32, tag::x}, eng, bias);
 
   auto conv2d_src_md = memory::desc({conv2d_src_tz}, dt::f32, tag::any);
   auto conv2d_bias_md = memory::desc({conv2d_bias_tz}, dt::f32, tag::any);
@@ -87,10 +82,9 @@ extern "C" void dnnl_conv2d(float* data, float* weights, float* out, int p_N_,
   auto conv2d_dst_md = memory::desc({conv2d_dst_tz}, dt::f32, tag::nchw);
 
   auto conv2d_desc = convolution_forward::desc(
-      prop_kind::forward_inference, algorithm::convolution_direct,
-      conv2d_src_md, conv2d_weights_md, conv2d_bias_md, conv2d_dst_md,
-      conv2d_strides, conv2d_padding, conv2d_padding);
-  auto conv2d_prim_desc = convolution_forward::primitive_desc(conv2d_desc, eng);
+      prop_kind::forward_inference, algorithm::convolution_direct, conv2d_src_md, conv2d_weights_md,
+      conv2d_bias_md, conv2d_dst_md, conv2d_strides, conv2d_padding, conv2d_padding);
+  auto conv2d_prim_desc = convolution_forward::primitive_desc(conv2d_desc, attr, eng);
 
   auto conv2d_src_memory = user_src_memory;
   auto conv2d_weights_memory = user_weights_memory;
@@ -105,6 +99,39 @@ extern "C" void dnnl_conv2d(float* data, float* weights, float* out, int p_N_,
   read_from_dnnl_memory(out, conv2d_dst_memory);
 }
 
+extern "C" void dnnl_conv2d(float* data, float* weights, float* out,
+                            int p_N_, int p_C_, int p_H_, int p_W_,
+                            int p_O_, int p_G_, int p_Ph_, int p_Pw_,
+                            int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_) {
+  primitive_attr attr;
+  std::vector<float> bias(p_O_, 0);
+  return dnnl_conv2d_common(data, weights, bias.data(), out,
+                            p_N_, p_C_, p_H_, p_W_, p_O_, p_G_,
+                            p_Ph_, p_Pw_, p_Kh_, p_Kw_, p_Sh_, p_Sw_,
+                            attr);
+}
+
+primitive_attr create_attr_with_relu_post_op() {
+  post_ops ops;
+  ops.append_eltwise(1.f, algorithm::eltwise_relu, 0.f, 0.f);
+
+  primitive_attr attr;
+  attr.set_post_ops(ops);
+
+  return attr;
+}
+
+extern "C" void dnnl_fused_conv2d_bias_relu(float* data, float* weights, float* bias, float* out,
+                                            int p_N_, int p_C_, int p_H_, int p_W_, int p_O_,
+                                            int p_G_, int p_Ph_, int p_Pw_, int p_Kh_, int p_Kw_,
+                                            int p_Sh_, int p_Sw_) {
+  return dnnl_conv2d_common(data, weights, bias, out,
+                            p_N_, p_C_, p_H_, p_W_,
+                            p_O_, p_G_, p_Ph_, p_Pw_,
+                            p_Kh_, p_Kw_, p_Sh_, p_Sw_,
+                            create_attr_with_relu_post_op());
+}
+
 extern "C" void dnnl_dense(float* data, float* weight, float* out, int p_B_,
                            int p_I_, int p_O_) {
   using tag = memory::format_tag;
diff --git a/src/runtime/contrib/dnnl/dnnl_kernel.h b/src/runtime/contrib/dnnl/dnnl_kernel.h
index 4d0b100b92ec..b3111030becf 100644
--- a/src/runtime/contrib/dnnl/dnnl_kernel.h
+++ b/src/runtime/contrib/dnnl/dnnl_kernel.h
@@ -38,6 +38,12 @@ extern "C" TVM_DLL void dnnl_conv2d(float* data, float* weights, float* out, int
                                     int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph_, int p_Pw_,
                                     int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_);
 
+extern "C" TVM_DLL void dnnl_fused_conv2d_bias_relu(float* data, float* weights, float* bias,
+                                                    float* out, int p_N_, int p_C_, int p_H_,
+                                                    int p_W_, int p_O_, int p_G_, int p_Ph_,
+                                                    int p_Pw_, int p_Kh_, int p_Kw_, int p_Sh_,
+                                                    int p_Sw_);
+
 extern "C" TVM_DLL void dnnl_dense(float* data, float* weight, float* out, int p_B_, int p_I_,
                                    int p_O_);
 

From 0d41ce7dcf40fcc1804920bca51c8c99b61a5697 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sat, 18 Jan 2020 21:41:59 +0900
Subject: [PATCH 07/18] refactoring dnnl codegen

---
 src/relay/backend/contrib/dnnl/codegen.cc | 143 +++++++++++++++-------
 1 file changed, 98 insertions(+), 45 deletions(-)

diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index 759a4421bc1d..72446b825486 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -31,6 +31,7 @@
 
 #include <fstream>
 #include <sstream>
+#include <numeric>
 
 #include "../codegen_c/codegen_c.h"
 
@@ -55,66 +56,95 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
   }
 
   void VisitExpr_(const CallNode* call) final {
-    std::ostringstream decl_stream;
-    std::ostringstream buf_stream;
-    // Args: ID
-    std::vector<std::string> args;
-
     // Get the arguments for various DNNL kernels.
-    if (IsOp(call, "nn.conv2d")) {
-      decl_stream << "dnnl_conv2d";
+    auto generate_body = [&](const CallNode* root_call,
+			     const std::vector<std::string>& args,
+			     const std::vector<std::string>& fused_func_args) {
+      // Make function call with input buffers when visiting arguments
+      bool first = true;
+      std::ostringstream arg_stream;
+      arg_stream << "(";
+      for (size_t i = 0; i < root_call->args.size(); ++i) {
+        VisitExpr(root_call->args[i]);
+        for (auto out : out_) {
+          if (!first) {
+            arg_stream << ", ";
+          }
+          first = false;
+          arg_stream << out.first;
+        }
+      }
+      for (auto arg_name : fused_func_args) {
+        arg_stream << ", " << arg_name;
+      }
+
+      // Analyze the output buffer
+      auto type_node = root_call->checked_type().as<TensorTypeNode>();
+      CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32))
+          << "Only support single output tensor with float type";
+
+      auto out = "buf_" + std::to_string(buf_idx_++);
+      auto out_shape = GetShape(root_call->checked_type());
+      auto out_size = std::accumulate(out_shape.begin(), out_shape.end(),
+				      1, std::multiplies<int>());
+      this->PrintIndents();
+
+      std::ostringstream buf_stream;
+      buf_stream << "float* " << out << " = (float*)std::malloc(4 * " << out_size << ");";
+
+      arg_stream << ", " << out;
+
+      // Attach attribute arguments
+      for (size_t i = 0; i < args.size(); ++i) {
+        arg_stream << ", " << args[i];
+      }
+      arg_stream << ");";
+      return std::make_tuple(arg_stream.str(), buf_stream.str(), out, out_size);
+    };
+
+    std::string decl, buf;
+    int out_size = 1;
+    std::string out;
+    std::vector<std::string> args;
+    if (auto conv_call = DetectFusedConv2DBiasReLU(call)) {
+      LOG(INFO) << "found fused op, num_args = " << call->args.size();
+      auto ret = generate_body(conv_call, FusedConv2dBiasReLU(conv_call),
+			       ext_fused_func_args_);
+      decl = "dnnl_fused_conv2d_bias_relu" + std::get<0>(ret);
+      buf = std::get<1>(ret);
+      out = std::get<2>(ret);
+      out_size = std::get<3>(ret);
+    } else if (IsOp(call, "nn.conv2d")) {
+      LOG(INFO) << "found conv";
+      decl = "dnnl_conv2d";
       args = Conv2d(call);
     } else if (IsOp(call, "nn.dense")) {
-      decl_stream << "dnnl_dense";
+      decl = "dnnl_dense";
       args = Dense(call);
     } else if (IsOp(call, "nn.relu")) {
-      decl_stream << "dnnl_relu";
+      LOG(INFO) << "found relu";
+      decl = "dnnl_relu";
       args = Relu(call);
     } else if (IsOp(call, "nn.batch_norm")) {
-      decl_stream << "dnnl_bn";
+      decl = "dnnl_bn";
       args = BatchNorm(call);
     } else if (IsOp(call, "add")) {
-      decl_stream << "dnnl_add";
+      decl = "dnnl_add";
       args = Add(call);
     } else {
       LOG(FATAL) << "Unsupported op: " << AsText(call->op, false);
     }
 
-    // Make function call with input buffers when visiting arguments
-    bool first = true;
-    decl_stream << "(";
-    for (size_t i = 0; i < call->args.size(); ++i) {
-      VisitExpr(call->args[i]);
-      for (auto out : out_) {
-        if (!first) {
-          decl_stream << ", ";
-        }
-        first = false;
-        decl_stream << out.first;
-      }
+    if (out == "") {
+      auto ret = generate_body(call, args, {});
+      decl += std::get<0>(ret);
+      buf = std::get<1>(ret);
+      out = std::get<2>(ret);
+      out_size = std::get<3>(ret);
     }
 
-    // Analyze the output buffer
-    auto type_node = call->checked_type().as<TensorTypeNode>();
-    CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32))
-        << "Only support single output tensor with float type";
-    std::string out = "buf_" + std::to_string(buf_idx_++);
-    auto out_shape = GetShape(call->checked_type());
-    int out_size = 1;
-    for (size_t i = 0; i < out_shape.size(); ++i) {
-      out_size *= out_shape[i];
-    }
-    this->PrintIndents();
-    buf_stream << "float* " << out << " = (float*)std::malloc(4 * " << out_size << ");";
-    buf_decl_.push_back(buf_stream.str());
-    decl_stream << ", " << out;
-
-    // Attach attribute arguments
-    for (size_t i = 0; i < args.size(); ++i) {
-      decl_stream << ", " << args[i];
-    }
-    decl_stream << ");";
-    ext_func_body.push_back(decl_stream.str());
+    buf_decl_.push_back(buf);
+    ext_func_body.push_back(decl);
 
     // Update output buffer
     out_.clear();
@@ -122,10 +152,22 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
   }
 
   std::string JIT(void) {
+    ext_func_args_.insert(ext_func_args_.end(),
+			  ext_fused_func_args_.begin(),
+			  ext_fused_func_args_.end());
     return JitImpl(ext_func_id_, ext_func_args_, buf_decl_, ext_func_body, out_);
   }
 
  private:
+  const CallNode* DetectFusedConv2DBiasReLU(const CallNode* call) {
+    auto arg = call->args[0];
+    if (auto next_call = arg.as<CallNode>()) {
+      if (IsOp(next_call, "nn.conv2d")) {
+      }
+    }
+    return nullptr;
+  }
+
   std::vector<std::string> Conv2d(const CallNode* call) {
     std::vector<std::string> args;
     const auto* conv2d_attr = call->attrs.as<Conv2DAttrs>();
@@ -152,6 +194,10 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
     return args;
   }
 
+  std::vector<std::string> FusedConv2dBiasReLU(const CallNode* call) {
+    return Conv2d(call);
+  }
+
   std::vector<std::string> Dense(const CallNode* call) {
     std::vector<std::string> args;
     auto ishape = GetShape(call->args[0]->checked_type());
@@ -214,6 +260,7 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
   int buf_idx_{0};
   /*! \brief The arguments used by a wrapped function that calls DNNL kernels. */
   std::vector<std::string> ext_func_args_;
+  std::vector<std::string> ext_fused_func_args_;
   /*! \brief statement of the function that will be compiled using DNNL kernels. */
   std::vector<std::string> ext_func_body;
   /*! \brief The declaration of intermeidate buffers. */
@@ -270,10 +317,12 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase {
     code_stream_ << "\n";
 
     if (ref->IsInstance<FunctionNode>()) {
+        LOG(INFO) << "Invoking  GenDNNLFunc on FuncNode";
       GenDNNLFunc(Downcast<Function>(ref));
     } else if (ref->IsInstance<IRModuleNode>()) {
       IRModule mod = Downcast<IRModule>(ref);
       for (const auto& it : mod->functions) {
+        LOG(INFO) << "Invoking  GenDNNLFunc";
         GenDNNLFunc(Downcast<Function>(it.second));
       }
     } else {
@@ -284,6 +333,7 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase {
     // Create a CSourceModule
     const auto* pf = runtime::Registry::Get("module.csource_module_create");
     CHECK(pf != nullptr) << "Cannot find csource module to create the external runtime module";
+    LOG(INFO) << code_stream_.str();
     return (*pf)(code_stream_.str(), "cc");
   }
 
@@ -301,7 +351,10 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase {
  */
 runtime::Module DNNLCompiler(const ObjectRef& ref) {
   DNNLModuleCodegen dnnl;
-  return dnnl.CreateCSourceModule(ref);
+  LOG(INFO) << "Invoking DNNLCompiler";
+  auto ret = dnnl.CreateCSourceModule(ref);
+  LOG(INFO) << "Done invoking DNNLCompiler";
+  return ret;
 }
 
 TVM_REGISTER_GLOBAL("relay.ext.dnnl").set_body_typed(DNNLCompiler);

From 17f825ff42d77e8721943174ea575e2806927c0f Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sat, 18 Jan 2020 21:56:26 +0900
Subject: [PATCH 08/18] cleanup

---
 src/relay/backend/contrib/dnnl/codegen.cc | 92 ++++++++++-------------
 1 file changed, 38 insertions(+), 54 deletions(-)

diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index 72446b825486..44b8e25ddba5 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -51,15 +51,16 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
     out_.push_back({node->name_hint(), 0});
   }
 
-  void VisitExpr_(const TupleGetItemNode* op) final {
-    // Do nothing
-  }
-
   void VisitExpr_(const CallNode* call) final {
-    // Get the arguments for various DNNL kernels.
-    auto generate_body = [&](const CallNode* root_call,
-			     const std::vector<std::string>& args,
-			     const std::vector<std::string>& fused_func_args) {
+    struct Output {
+      std::string decl, buf;
+      int out_size = 1;
+      std::string out;
+    };
+
+    auto generate_body = [&](const CallNode* root_call, const std::string& func_name,
+                             const std::vector<std::string>& args,
+                             const std::vector<std::string>& fused_func_args) {
       // Make function call with input buffers when visiting arguments
       bool first = true;
       std::ostringstream arg_stream;
@@ -74,6 +75,7 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
           arg_stream << out.first;
         }
       }
+
       for (auto arg_name : fused_func_args) {
         arg_stream << ", " << arg_name;
       }
@@ -83,88 +85,70 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
       CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32))
           << "Only support single output tensor with float type";
 
-      auto out = "buf_" + std::to_string(buf_idx_++);
       auto out_shape = GetShape(root_call->checked_type());
-      auto out_size = std::accumulate(out_shape.begin(), out_shape.end(),
-				      1, std::multiplies<int>());
+
+      Output ret;
+      ret.out = "buf_" + std::to_string(buf_idx_++);
+      ret.out_size = std::accumulate(out_shape.begin(), out_shape.end(), 1, std::multiplies<int>());
+
       this->PrintIndents();
 
       std::ostringstream buf_stream;
-      buf_stream << "float* " << out << " = (float*)std::malloc(4 * " << out_size << ");";
-
-      arg_stream << ", " << out;
+      buf_stream << "float* " << ret.out << " = (float*)std::malloc(4 * " << ret.out_size << ");";
+      ret.buf = buf_stream.str();
 
+      arg_stream << ", " << ret.out;
       // Attach attribute arguments
       for (size_t i = 0; i < args.size(); ++i) {
         arg_stream << ", " << args[i];
       }
       arg_stream << ");";
-      return std::make_tuple(arg_stream.str(), buf_stream.str(), out, out_size);
+      ret.decl = func_name + arg_stream.str();
+
+      return ret;
     };
 
-    std::string decl, buf;
-    int out_size = 1;
-    std::string out;
-    std::vector<std::string> args;
+    Output ret;
     if (auto conv_call = DetectFusedConv2DBiasReLU(call)) {
       LOG(INFO) << "found fused op, num_args = " << call->args.size();
-      auto ret = generate_body(conv_call, FusedConv2dBiasReLU(conv_call),
-			       ext_fused_func_args_);
-      decl = "dnnl_fused_conv2d_bias_relu" + std::get<0>(ret);
-      buf = std::get<1>(ret);
-      out = std::get<2>(ret);
-      out_size = std::get<3>(ret);
+      ret = generate_body(conv_call, "dnnl_fused_conv2d_bias_relu",
+                          FusedConv2dBiasReLU(conv_call), ext_fused_func_args_);
     } else if (IsOp(call, "nn.conv2d")) {
-      LOG(INFO) << "found conv";
-      decl = "dnnl_conv2d";
-      args = Conv2d(call);
+      ret = generate_body(call, "dnnl_conv2d", Conv2d(call), {});
     } else if (IsOp(call, "nn.dense")) {
-      decl = "dnnl_dense";
-      args = Dense(call);
+      ret = generate_body(call, "dnnl_dense", Dense(call), {});
     } else if (IsOp(call, "nn.relu")) {
-      LOG(INFO) << "found relu";
-      decl = "dnnl_relu";
-      args = Relu(call);
+      ret = generate_body(call, "dnnl_relu", Relu(call), {});
     } else if (IsOp(call, "nn.batch_norm")) {
-      decl = "dnnl_bn";
-      args = BatchNorm(call);
+      ret = generate_body(call, "dnnl_bn", BatchNorm(call), {});
     } else if (IsOp(call, "add")) {
-      decl = "dnnl_add";
-      args = Add(call);
+      ret = generate_body(call, "dnnl_add", Add(call), {});
     } else {
       LOG(FATAL) << "Unsupported op: " << AsText(call->op, false);
     }
 
-    if (out == "") {
-      auto ret = generate_body(call, args, {});
-      decl += std::get<0>(ret);
-      buf = std::get<1>(ret);
-      out = std::get<2>(ret);
-      out_size = std::get<3>(ret);
-    }
-
-    buf_decl_.push_back(buf);
-    ext_func_body.push_back(decl);
+    buf_decl_.push_back(ret.buf);
+    ext_func_body.push_back(ret.decl);
 
     // Update output buffer
     out_.clear();
-    out_.push_back({out, out_size});
+    out_.push_back({ret.out, ret.out_size});
   }
 
   std::string JIT(void) {
     ext_func_args_.insert(ext_func_args_.end(),
-			  ext_fused_func_args_.begin(),
-			  ext_fused_func_args_.end());
+                          ext_fused_func_args_.begin(),
+                          ext_fused_func_args_.end());
     return JitImpl(ext_func_id_, ext_func_args_, buf_decl_, ext_func_body, out_);
   }
 
  private:
   const CallNode* DetectFusedConv2DBiasReLU(const CallNode* call) {
     auto arg = call->args[0];
-    if (auto next_call = arg.as<CallNode>()) {
-      if (IsOp(next_call, "nn.conv2d")) {
-      }
-    }
+    // if (auto next_call = arg.as<CallNode>()) {
+    //   if (IsOp(next_call, "nn.conv2d")) {
+    //   }
+    // }
     return nullptr;
   }
 

From 40a41675881e5e26eaed9c3aa07a77c82a787598 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sat, 18 Jan 2020 23:35:31 +0900
Subject: [PATCH 09/18] add pattern detection

---
 src/relay/backend/contrib/dnnl/codegen.cc | 31 +++++++++++++++--------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index 44b8e25ddba5..01aee57efa6d 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -51,6 +51,14 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
     out_.push_back({node->name_hint(), 0});
   }
 
+  void VisitExpr_(const ConstantNode* node) final {
+    LOG(INFO) << "Visiting constant node";
+    auto name = "dnnl_input" + std::to_string(ext_func_args_.size());
+    ext_func_args_.push_back(name);
+    out_.clear();
+    out_.push_back({name, 0});
+  }
+
   void VisitExpr_(const CallNode* call) final {
     struct Output {
       std::string decl, buf;
@@ -58,7 +66,7 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
       std::string out;
     };
 
-    auto generate_body = [&](const CallNode* root_call, const std::string& func_name,
+    auto generate_body = [=](const CallNode* root_call, const std::string& func_name,
                              const std::vector<std::string>& args,
                              const std::vector<std::string>& fused_func_args) {
       // Make function call with input buffers when visiting arguments
@@ -110,7 +118,6 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
 
     Output ret;
     if (auto conv_call = DetectFusedConv2DBiasReLU(call)) {
-      LOG(INFO) << "found fused op, num_args = " << call->args.size();
       ret = generate_body(conv_call, "dnnl_fused_conv2d_bias_relu",
                           FusedConv2dBiasReLU(conv_call), ext_fused_func_args_);
     } else if (IsOp(call, "nn.conv2d")) {
@@ -136,20 +143,24 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
   }
 
   std::string JIT(void) {
-    ext_func_args_.insert(ext_func_args_.end(),
-                          ext_fused_func_args_.begin(),
+    ext_func_args_.insert(ext_func_args_.end(), ext_fused_func_args_.begin(),
                           ext_fused_func_args_.end());
     return JitImpl(ext_func_id_, ext_func_args_, buf_decl_, ext_func_body, out_);
   }
 
  private:
   const CallNode* DetectFusedConv2DBiasReLU(const CallNode* call) {
-    auto arg = call->args[0];
-    // if (auto next_call = arg.as<CallNode>()) {
-    //   if (IsOp(next_call, "nn.conv2d")) {
-    //   }
-    // }
-    return nullptr;
+    if (!IsOp(call, "nn.relu")) return nullptr;
+    auto relu_arg = call->args[0];
+    // TODO: a better way to get CallNode* from Expr?
+    const CallNode* add_call = Downcast<Call>(relu_arg).operator->();
+    if (!add_call || !IsOp(add_call, "add")) return nullptr;
+    auto add_arg = add_call->args[0];
+    const CallNode* conv_call = Downcast<Call>(add_arg).operator->();
+    if (!conv_call || !IsOp(conv_call, "nn.conv2d")) return nullptr;
+    ext_fused_func_args_.push_back("dnnl_input_bias");
+    LOG(INFO) << "fused op found";
+    return conv_call;
   }
 
   std::vector<std::string> Conv2d(const CallNode* call) {

From 03fba62b2a4f31f9624c5d6e5aa0c5454c87147a Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sun, 19 Jan 2020 05:44:10 +0900
Subject: [PATCH 10/18] improve Expr to CallNode* conversion

---
 src/relay/backend/contrib/dnnl/codegen.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index 01aee57efa6d..085fc86032f4 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -152,13 +152,13 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
   const CallNode* DetectFusedConv2DBiasReLU(const CallNode* call) {
     if (!IsOp(call, "nn.relu")) return nullptr;
     auto relu_arg = call->args[0];
-    // TODO: a better way to get CallNode* from Expr?
-    const CallNode* add_call = Downcast<Call>(relu_arg).operator->();
+    const CallNode* add_call = relu_arg.as<CallNode>();
     if (!add_call || !IsOp(add_call, "add")) return nullptr;
     auto add_arg = add_call->args[0];
-    const CallNode* conv_call = Downcast<Call>(add_arg).operator->();
+    const CallNode* conv_call = add_arg.as<CallNode>();
     if (!conv_call || !IsOp(conv_call, "nn.conv2d")) return nullptr;
-    ext_fused_func_args_.push_back("dnnl_input_bias");
+    auto bias_name = "dnnl_fused_input" + std::to_string(ext_fused_func_args_.size());
+    ext_fused_func_args_.push_back(bias_name);
     LOG(INFO) << "fused op found";
     return conv_call;
   }

From c572c6b3f618459630f97bdffbd900ef7cffb257 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sun, 19 Jan 2020 06:02:38 +0900
Subject: [PATCH 11/18] add fuse test

---
 src/relay/backend/contrib/dnnl/codegen.cc     |  2 --
 .../python/relay/test_pass_partition_graph.py | 34 ++++++++++---------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index 085fc86032f4..79b387d8e76a 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -346,9 +346,7 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase {
  */
 runtime::Module DNNLCompiler(const ObjectRef& ref) {
   DNNLModuleCodegen dnnl;
-  LOG(INFO) << "Invoking DNNLCompiler";
   auto ret = dnnl.CreateCSourceModule(ref);
-  LOG(INFO) << "Done invoking DNNLCompiler";
   return ret;
 }
 
diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index 9c253f4f4d5c..5c13695b43e8 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -555,27 +555,29 @@ def test_partition_mobilenet():
         assert(len(get_partitions(mod)) == 27)
 
     test_partition()
-    test_partition_mobilenet()
+    # test_partition_mobilenet()
 
     # TODO: Enable executor check once the runtime signature issue is resolved
-    # net = get_net()
-    # mod, params = get_partitoned_mod(net)
+    net = get_net()
+    mod, params = tvm.relay.testing.create_workload(net)
+    mod = pre_optimize(mod, params)
+    mod = get_partitoned_mod(mod)
 
-    # ref_mod, params = tvm.relay.testing.create_workload(net)
-    # ishape = (1, 3, 224, 224)
-    # i_data = np.random.randn(*ishape).astype(np.float32)
-    # ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0))
-    # ref_res = ref_ex.evaluate()(i_data, **params)
+    ref_mod, params = tvm.relay.testing.create_workload(net)
+    ishape = (1, 3, 224, 224)
+    i_data = np.random.randn(*ishape).astype(np.float32)
+    ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0))
+    ref_res = ref_ex.evaluate()(i_data, **params)
 
-    # check_result(mod, {"data": i_data},
-    #              ishape, ref_res.asnumpy(), tol=1e-5, params=params)
+    check_result(mod, {"data": i_data},
+                 ishape, ref_res.asnumpy(), tol=1e-5, params=params)
 
 
 if __name__ == "__main__":
-    test_multi_node_compiler()
-    test_extern_ccompiler_single_op()
-    test_extern_ccompiler_default_ops()
-    test_extern_ccompiler()
-    test_extern_dnnl()
-    test_extern_dnnl_mobilenet()
+    # test_multi_node_compiler()
+    # test_extern_ccompiler_single_op()
+    # test_extern_ccompiler_default_ops()
+    # test_extern_ccompiler()
+    # test_extern_dnnl()
+    # test_extern_dnnl_mobilenet()
     test_partition_conv_bias_relu()

From a56829b743a930ddc817812af92068a79537d3f0 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sun, 19 Jan 2020 07:56:13 +0900
Subject: [PATCH 12/18] uncomment other tests

---
 tests/python/relay/test_pass_partition_graph.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index 5c13695b43e8..9027017c9ad8 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -574,10 +574,10 @@ def test_partition_mobilenet():
 
 
 if __name__ == "__main__":
-    # test_multi_node_compiler()
-    # test_extern_ccompiler_single_op()
-    # test_extern_ccompiler_default_ops()
-    # test_extern_ccompiler()
-    # test_extern_dnnl()
-    # test_extern_dnnl_mobilenet()
+    test_multi_node_compiler()
+    test_extern_ccompiler_single_op()
+    test_extern_ccompiler_default_ops()
+    test_extern_ccompiler()
+    test_extern_dnnl()
+    test_extern_dnnl_mobilenet()
     test_partition_conv_bias_relu()

From 92e06c47937fa74a0e3560bb40122307e345d42a Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sun, 19 Jan 2020 16:33:07 +0900
Subject: [PATCH 13/18] add compiler_begin on bias param

---
 src/relay/backend/contrib/dnnl/codegen.cc     | 12 -----
 .../python/relay/test_pass_partition_graph.py | 52 +++++++++++--------
 2 files changed, 30 insertions(+), 34 deletions(-)

diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index 79b387d8e76a..7ca95c3c1254 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -51,14 +51,6 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
     out_.push_back({node->name_hint(), 0});
   }
 
-  void VisitExpr_(const ConstantNode* node) final {
-    LOG(INFO) << "Visiting constant node";
-    auto name = "dnnl_input" + std::to_string(ext_func_args_.size());
-    ext_func_args_.push_back(name);
-    out_.clear();
-    out_.push_back({name, 0});
-  }
-
   void VisitExpr_(const CallNode* call) final {
     struct Output {
       std::string decl, buf;
@@ -159,7 +151,6 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase {
     if (!conv_call || !IsOp(conv_call, "nn.conv2d")) return nullptr;
     auto bias_name = "dnnl_fused_input" + std::to_string(ext_fused_func_args_.size());
     ext_fused_func_args_.push_back(bias_name);
-    LOG(INFO) << "fused op found";
     return conv_call;
   }
 
@@ -312,12 +303,10 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase {
     code_stream_ << "\n";
 
     if (ref->IsInstance<FunctionNode>()) {
-        LOG(INFO) << "Invoking  GenDNNLFunc on FuncNode";
       GenDNNLFunc(Downcast<Function>(ref));
     } else if (ref->IsInstance<IRModuleNode>()) {
       IRModule mod = Downcast<IRModule>(ref);
       for (const auto& it : mod->functions) {
-        LOG(INFO) << "Invoking  GenDNNLFunc";
         GenDNNLFunc(Downcast<Function>(it.second));
       }
     } else {
@@ -328,7 +317,6 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase {
     // Create a CSourceModule
     const auto* pf = runtime::Registry::Get("module.csource_module_create");
     CHECK(pf != nullptr) << "Cannot find csource module to create the external runtime module";
-    LOG(INFO) << code_stream_.str();
     return (*pf)(code_stream_.str(), "cc");
   }
 
diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index 9027017c9ad8..10d1c2dbd88d 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -179,9 +179,10 @@ def __init__(self, backend):
 
     def annotate_call(self, call):
         new_args = []
+        has_arg = "nn.conv2d"
         for arg in call.args:
             new_arg = super().visit(arg)
-            if call.op.name == "nn.conv2d" or isinstance(new_arg, relay.expr.Var):
+            if call.op.name == "nn.conv2d" or isinstance(new_arg, (relay.expr.Var, relay.expr.Constant)):
                 new_arg = compiler_begin(new_arg, self.backend)
             new_args.append(new_arg)
         return relay.Call(call.op, new_args, call.attrs, call.type_args)
@@ -245,6 +246,7 @@ def check_vm_result():
     def check_graph_runtime_result():
         with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
             json, lib, param = relay.build(mod, target=target, params=params)
+        # print(json)
         lib = update_lib(lib)
         rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx)
 
@@ -257,7 +259,7 @@ def check_graph_runtime_result():
 
         tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol)
 
-    check_vm_result()
+    # check_vm_result()
     check_graph_runtime_result()
 
 
@@ -554,30 +556,36 @@ def test_partition_mobilenet():
         mod = get_partitoned_mod(mod)
         assert(len(get_partitions(mod)) == 27)
 
-    test_partition()
-    # test_partition_mobilenet()
+    def test_exec(mod, params, ref_mod, ref_params, out_shape):
+        ishape = (1, 3, 224, 224)
+        i_data = np.random.randn(*ishape).astype(np.float32)
+        ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0))
+        ref_res = ref_ex.evaluate()(i_data, **ref_params)
 
-    # TODO: Enable executor check once the runtime signature issue is resolved
-    net = get_net()
-    mod, params = tvm.relay.testing.create_workload(net)
-    mod = pre_optimize(mod, params)
-    mod = get_partitoned_mod(mod)
+        mod = pre_optimize(mod, params)
+        mod = get_partitoned_mod(mod)
 
-    ref_mod, params = tvm.relay.testing.create_workload(net)
-    ishape = (1, 3, 224, 224)
-    i_data = np.random.randn(*ishape).astype(np.float32)
-    ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0))
-    ref_res = ref_ex.evaluate()(i_data, **params)
+        check_result(mod, {"data": i_data},
+                     out_shape, ref_res.asnumpy(), tol=1e-5, params=params)
 
-    check_result(mod, {"data": i_data},
-                 ishape, ref_res.asnumpy(), tol=1e-5, params=params)
+    test_partition()
+    test_partition_mobilenet()
+
+    # net = get_net()
+    # mod, params = tvm.relay.testing.create_workload(net)
+    # ref_mod, ref_params = tvm.relay.testing.create_workload(net)
+    # test_exec(mod, params, ref_mod, ref_params, (1, 16, 224, 224))
+
+    mod, params = relay.testing.mobilenet.get_workload()
+    ref_mod, ref_params = relay.testing.mobilenet.get_workload()
+    test_exec(mod, params, ref_mod, ref_params, (1, 1000))
 
 
 if __name__ == "__main__":
-    test_multi_node_compiler()
-    test_extern_ccompiler_single_op()
-    test_extern_ccompiler_default_ops()
-    test_extern_ccompiler()
-    test_extern_dnnl()
-    test_extern_dnnl_mobilenet()
+    # test_multi_node_compiler()
+    # test_extern_ccompiler_single_op()
+    # test_extern_ccompiler_default_ops()
+    # test_extern_ccompiler()
+    # test_extern_dnnl()
+    # test_extern_dnnl_mobilenet()
     test_partition_conv_bias_relu()

From 264e054f5902ac6b9f144225d5e0b3a87257fa96 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sun, 19 Jan 2020 16:35:26 +0900
Subject: [PATCH 14/18] enable other tests

---
 .../python/relay/test_pass_partition_graph.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index 10d1c2dbd88d..a244c5087214 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -179,7 +179,6 @@ def __init__(self, backend):
 
     def annotate_call(self, call):
         new_args = []
-        has_arg = "nn.conv2d"
         for arg in call.args:
             new_arg = super().visit(arg)
             if call.op.name == "nn.conv2d" or isinstance(new_arg, (relay.expr.Var, relay.expr.Constant)):
@@ -571,9 +570,9 @@ def test_exec(mod, params, ref_mod, ref_params, out_shape):
     test_partition()
     test_partition_mobilenet()
 
-    # net = get_net()
-    # mod, params = tvm.relay.testing.create_workload(net)
-    # ref_mod, ref_params = tvm.relay.testing.create_workload(net)
+    net = get_net()
+    mod, params = tvm.relay.testing.create_workload(net)
+    ref_mod, ref_params = tvm.relay.testing.create_workload(net)
     # test_exec(mod, params, ref_mod, ref_params, (1, 16, 224, 224))
 
     mod, params = relay.testing.mobilenet.get_workload()
@@ -582,10 +581,10 @@ def test_exec(mod, params, ref_mod, ref_params, out_shape):
 
 
 if __name__ == "__main__":
-    # test_multi_node_compiler()
-    # test_extern_ccompiler_single_op()
-    # test_extern_ccompiler_default_ops()
-    # test_extern_ccompiler()
-    # test_extern_dnnl()
-    # test_extern_dnnl_mobilenet()
+    test_multi_node_compiler()
+    test_extern_ccompiler_single_op()
+    test_extern_ccompiler_default_ops()
+    test_extern_ccompiler()
+    test_extern_dnnl()
+    test_extern_dnnl_mobilenet()
     test_partition_conv_bias_relu()

From 3ef57f66fccdd8dd97bb26756b67ed0d387c7264 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sun, 19 Jan 2020 16:42:22 +0900
Subject: [PATCH 15/18] minor fix

---
 src/relay/backend/contrib/dnnl/codegen.cc       | 3 +--
 tests/python/relay/test_pass_partition_graph.py | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc
index 7ca95c3c1254..807714a33bd5 100644
--- a/src/relay/backend/contrib/dnnl/codegen.cc
+++ b/src/relay/backend/contrib/dnnl/codegen.cc
@@ -334,8 +334,7 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase {
  */
 runtime::Module DNNLCompiler(const ObjectRef& ref) {
   DNNLModuleCodegen dnnl;
-  auto ret = dnnl.CreateCSourceModule(ref);
-  return ret;
+  return dnnl.CreateCSourceModule(ref);
 }
 
 TVM_REGISTER_GLOBAL("relay.ext.dnnl").set_body_typed(DNNLCompiler);
diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index a244c5087214..69be0e602d43 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -245,7 +245,6 @@ def check_vm_result():
     def check_graph_runtime_result():
         with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
             json, lib, param = relay.build(mod, target=target, params=params)
-        # print(json)
         lib = update_lib(lib)
         rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx)
 
@@ -258,7 +257,7 @@ def check_graph_runtime_result():
 
         tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol)
 
-    # check_vm_result()
+    check_vm_result()
     check_graph_runtime_result()
 
 

From ee50eed9c5af29886627b94896ac2cbe8bbb629f Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Sun, 19 Jan 2020 16:57:11 +0900
Subject: [PATCH 16/18] fixed test on simple net

---
 tests/python/relay/test_pass_partition_graph.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index 69be0e602d43..45951a8f3b15 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -477,11 +477,11 @@ def test_partition_conv_bias_relu():
 
     def get_layers(prefix, data, in_channel, out_channel,
                    include_bn=True, include_sigmoid=False):
-        weight = relay.const(np.random.randn(out_channel, in_channel, 3, 3))
-        bn_gamma = relay.const(np.random.randn(out_channel))
-        bn_beta = relay.const(np.random.randn(out_channel))
-        bn_mmean = relay.const(np.random.randn(out_channel))
-        bn_mvar = relay.const(np.random.randn(out_channel))
+        weight = relay.var(prefix + "weight")
+        bn_gamma = relay.var(prefix + "bn_gamma")
+        bn_beta = relay.var(prefix + "bn_beta")
+        bn_mmean = relay.var(prefix + "bn_mean")
+        bn_mvar = relay.var(prefix + "bn_var")
 
         layer = relay.nn.conv2d(data=data, weight=weight, kernel_size=(3, 3),
                                 channels=out_channel, padding=(1, 1))
@@ -511,7 +511,7 @@ def pre_optimize(mod, params):
         ])
 
         if params != {}:
-            # This is required for constant folding on mobilenet
+            # This is required for constant folding
             mod["main"] = bind_params_by_name(mod["main"], params)
 
         with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
@@ -572,7 +572,7 @@ def test_exec(mod, params, ref_mod, ref_params, out_shape):
     net = get_net()
     mod, params = tvm.relay.testing.create_workload(net)
     ref_mod, ref_params = tvm.relay.testing.create_workload(net)
-    # test_exec(mod, params, ref_mod, ref_params, (1, 16, 224, 224))
+    test_exec(mod, params, ref_mod, ref_params, (1, 16, 224, 224))
 
     mod, params = relay.testing.mobilenet.get_workload()
     ref_mod, ref_params = relay.testing.mobilenet.get_workload()

From 2e2da6b08da478b104c32d043375dcbd54ca2c95 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Tue, 21 Jan 2020 07:53:06 +0900
Subject: [PATCH 17/18] rebase and address comments

---
 src/relay/backend/build_module.cc             | 37 -------------------
 .../python/relay/test_pass_partition_graph.py | 13 ++++---
 2 files changed, 7 insertions(+), 43 deletions(-)

diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc
index 2d617db51f9a..035ab1ba5bee 100644
--- a/src/relay/backend/build_module.cc
+++ b/src/relay/backend/build_module.cc
@@ -145,43 +145,6 @@ struct GraphCodegen {
   }
 };
 
-/*!
- * \brief Bind params to function by using name
- * \param func Relay function
- * \param params params dict
- * \return relay::Function
- */
-relay::Function BindParamsByName(relay::Function func,
-                                 const std::unordered_map<std::string, runtime::NDArray>& params) {
-  std::unordered_map<std::string, relay::Var> name_dict;
-  std::unordered_set<relay::Var, ObjectHash, ObjectEqual> repeat_var;
-  for (auto arg : func->params) {
-    const auto& name = arg->name_hint();
-    if (name_dict.count(name)) {
-      repeat_var.insert(arg);
-    } else {
-      name_dict[name] = arg;
-    }
-  }
-
-  std::unordered_map<relay::Var, Expr, ObjectHash, ObjectEqual> bind_dict;
-  for (auto& kv : params) {
-    if (name_dict.count(kv.first) == 0) {
-      continue;
-    }
-    auto arg = name_dict.at(kv.first);
-    if (repeat_var.count(arg)) {
-      LOG(FATAL) << "Multiple args in the function have name " << kv.first;
-    }
-    bind_dict[arg] = ConstantNode::make(kv.second);
-  }
-  Expr bound_expr = relay::Bind(func, bind_dict);
-  Function ret = Downcast<Function>(bound_expr);
-  CHECK(ret.defined()) << "The returning type is expected to be a Relay Function."
-                       << "\n";
-  return ret;
-}
-
 /*!
  * \brief Relay build module
  *
diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py
index 45951a8f3b15..4459505595c4 100644
--- a/tests/python/relay/test_pass_partition_graph.py
+++ b/tests/python/relay/test_pass_partition_graph.py
@@ -29,6 +29,7 @@
 from tvm.relay.expr_functor import ExprMutator
 from tvm.relay import analysis, expr as _expr
 from tvm.relay.build_module import bind_params_by_name
+from tvm.relay.backend import compile_engine
 
 
 # Leverage the pass manager to write a simple white list based annotator
@@ -475,7 +476,7 @@ def test_partition_conv_bias_relu():
         print("skip because DNNL codegen is not available")
         return
 
-    def get_layers(prefix, data, in_channel, out_channel,
+    def get_blocks(prefix, data, in_channel, out_channel,
                    include_bn=True, include_sigmoid=False):
         weight = relay.var(prefix + "weight")
         bn_gamma = relay.var(prefix + "bn_gamma")
@@ -497,8 +498,8 @@ def get_layers(prefix, data, in_channel, out_channel,
 
     def get_net(include_bn=True, include_sigmoid=False):
         data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32"))
-        layer1 = get_layers("layer1_", data, 3, 16, include_bn, include_sigmoid)
-        layer2 = get_layers("layer2_", layer1, 16, 16, include_bn, include_sigmoid)
+        layer1 = get_blocks("layer1_", data, 3, 16, include_bn, include_sigmoid)
+        layer2 = get_blocks("layer2_", layer1, 16, 16, include_bn, include_sigmoid)
         last = layer2
         return relay.Function(relay.analysis.free_vars(last), last)
 
@@ -510,9 +511,8 @@ def pre_optimize(mod, params):
             relay.transform.FoldScaleAxis(),
         ])
 
-        if params != {}:
-            # This is required for constant folding
-            mod["main"] = bind_params_by_name(mod["main"], params)
+        # This is required for constant folding
+        mod["main"] = bind_params_by_name(mod["main"], params)
 
         with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
             mod = remove_bn_pass(mod)
@@ -559,6 +559,7 @@ def test_exec(mod, params, ref_mod, ref_params, out_shape):
         i_data = np.random.randn(*ishape).astype(np.float32)
         ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0))
         ref_res = ref_ex.evaluate()(i_data, **ref_params)
+        compile_engine.get().clear()
 
         mod = pre_optimize(mod, params)
         mod = get_partitoned_mod(mod)

From af627a93381fd1b59d4c23fef5f7b91bc0bcf9fd Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Tue, 21 Jan 2020 07:55:41 +0900
Subject: [PATCH 18/18] rebase fix

---
 python/tvm/relay/build_module.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py
index 1170855d32bb..774ab07d8dc4 100644
--- a/python/tvm/relay/build_module.py
+++ b/python/tvm/relay/build_module.py
@@ -317,10 +317,6 @@ def bind_params_by_name(func, params):
     -------
     func : relay.Function
         The function with parameters bound
-<<<<<<< HEAD
-=======
-
->>>>>>> introduce bind_params_by_name as reusable api
     """
     inputs = _convert_param_map(params)
     return _build_module.BindParamsByName(func, inputs)