From f2b9b1f60a67f6d3c39e0ebabc722d757dd9e5ad Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 19 Jan 2020 08:01:00 +0900 Subject: [PATCH 01/18] Add partition test case for conv + bias + relu pattern --- .../python/relay/test_pass_partition_graph.py | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index 4ffb37311696..213b8992ac6d 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -27,6 +27,7 @@ from tvm.contrib import util from tvm.relay.annotation import compiler_begin, compiler_end from tvm.relay.expr_functor import ExprMutator +from tvm.relay import analysis, expr as _expr # Leverage the pass manager to write a simple white list based annotator @transform.function_pass(opt_level=0) @@ -165,6 +166,48 @@ def visit_call(self, call): return new_call +class ConvBiasAddReLUAnnotator(ExprMutator): + import enum + state = enum.Enum("State", "Init Conv Bias ReLU") + + def __init__(self, backend): + super().__init__() + self.current_state = self.state.Init + self.backend = backend + + def annotate_call(self, call): + new_args = [] + for arg in call.args: + new_arg = super().visit(arg) + if call.op.name == "nn.conv2d" or isinstance(new_arg, relay.expr.Var): + new_arg = compiler_begin(new_arg, self.backend) + new_args.append(new_arg) + return relay.Call(call.op, new_args, call.attrs, call.type_args) + + def visit_call(self, call): + if call.op.name == "nn.conv2d": + if self.current_state == self.state.Bias: + self.current_state = self.state.Conv + ret = self.annotate_call(call) + self.current_state = self.state.Conv + return ret + self.current_state = self.state.Init + elif call.op.name == "add": + if self.current_state == self.state.ReLU: + self.current_state = self.state.Bias + return self.annotate_call(call) + self.current_state = self.state.Init + elif call.op.name == "nn.relu": + self.current_state = self.state.ReLU + op = self.annotate_call(call) + if self.current_state == self.state.Conv: + op = compiler_end(op, self.backend) + self.current_state = self.state.Init + return op + self.current_state = self.state.Init + return super().visit_call(call) + + def check_result(mod, map_inputs, out_shape, result, tol=1e-5, target="llvm", ctx=tvm.cpu(), params=None): if sys.platform == "win32": @@ -425,6 +468,88 @@ def test_extern_dnnl_mobilenet(): (1, 1000), ref_res.asnumpy(), tol=1e-5, params=params) +def test_partition_conv_bias_relu(): + def get_layers(prefix, data, in_channel, out_channel, + include_bn=True, include_sigmoid=False): + weight = relay.const(np.random.randn(out_channel, in_channel, 3, 3)) + bn_gamma = relay.const(np.random.randn(out_channel)) + bn_beta = relay.const(np.random.randn(out_channel)) + bn_mmean = relay.const(np.random.randn(out_channel)) + bn_mvar = relay.const(np.random.randn(out_channel)) + + layer = relay.nn.conv2d(data=data, weight=weight, kernel_size=(3, 3), + channels=out_channel, padding=(1, 1)) + if include_bn: + bn_output = relay.nn.batch_norm(layer, bn_gamma, bn_beta, + bn_mmean, bn_mvar) + layer = bn_output[0] + if include_sigmoid: + # dummy layer to prevent pattern detection + layer = relay.sigmoid(layer) + layer = relay.nn.relu(layer) + return layer + + def get_net(include_bn=True, include_sigmoid=False): + data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32")) + layer1 = get_layers("layer1_", data, 3, 16, include_bn, include_sigmoid) + layer2 = get_layers("layer2_", layer1, 16, 16, include_bn, include_sigmoid) + last = layer2 + return relay.Function(relay.analysis.free_vars(last), last) + + def get_partitoned_mod(net): + remove_bn_pass = transform.Sequential([ + relay.transform.InferType(), + relay.transform.SimplifyInference(), + relay.transform.FoldConstant(), + relay.transform.FoldScaleAxis(), + ]) + mod, params = tvm.relay.testing.create_workload(net) + + with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]): + mod = remove_bn_pass(mod) + + mod["main"] = ConvBiasAddReLUAnnotator("dnnl").visit(mod["main"]) + mod = transform.PartitionGraph()(mod) + return mod, params + + def get_partitions(mod): + partitions = [] + + def visit_func(expr): + if isinstance(expr, _expr.Function) and expr != mod["main"]: + partitions.append(expr) + analysis.post_order_visit(mod["main"], visit_func) + return partitions + + def test_detect_pattern(include_bn, include_sigmoid, num_expected_partition): + net = get_net(include_bn, include_sigmoid) + mod, _ = get_partitoned_mod(net) + assert(len(get_partitions(mod)) == num_expected_partition) + + def test_partition(): + # conv + bn + relu -> detection succeed + test_detect_pattern(True, False, 2) + # conv + relu -> fail + test_detect_pattern(False, False, 0) + # conv + bn + sigmoid + relu -> fail + test_detect_pattern(True, True, 0) + + test_partition() + + # TODO: Enable executor check once the runtime signature issue is resolved + # net = get_net() + # mod, params = get_partitoned_mod(net) + + # ref_mod, params = tvm.relay.testing.create_workload(net) + # ishape = (1, 3, 224, 224) + # i_data = np.random.randn(*ishape).astype(np.float32) + # ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) + # ref_res = ref_ex.evaluate()(i_data, **params) + + # check_result(mod, {"data": i_data}, + # ishape, ref_res.asnumpy(), tol=1e-5, params=params) + + if __name__ == "__main__": test_multi_node_compiler() test_extern_ccompiler_single_op() @@ -432,3 +557,4 @@ def test_extern_dnnl_mobilenet(): test_extern_ccompiler() test_extern_dnnl() test_extern_dnnl_mobilenet() + test_partition_conv_bias_relu() From 99e023365af5bb8457862f427e3859042724fb04 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 19 Jan 2020 09:43:07 +0900 Subject: [PATCH 02/18] partitioning mobilenet works --- src/relay/backend/build_module.cc | 37 +++++++++++++ .../python/relay/test_pass_partition_graph.py | 55 +++++++++++++++---- 2 files changed, 81 insertions(+), 11 deletions(-) diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc index 035ab1ba5bee..2d617db51f9a 100644 --- a/src/relay/backend/build_module.cc +++ b/src/relay/backend/build_module.cc @@ -145,6 +145,43 @@ struct GraphCodegen { } }; +/*! + * \brief Bind params to function by using name + * \param func Relay function + * \param params params dict + * \return relay::Function + */ +relay::Function BindParamsByName(relay::Function func, + const std::unordered_map& params) { + std::unordered_map name_dict; + std::unordered_set repeat_var; + for (auto arg : func->params) { + const auto& name = arg->name_hint(); + if (name_dict.count(name)) { + repeat_var.insert(arg); + } else { + name_dict[name] = arg; + } + } + + std::unordered_map bind_dict; + for (auto& kv : params) { + if (name_dict.count(kv.first) == 0) { + continue; + } + auto arg = name_dict.at(kv.first); + if (repeat_var.count(arg)) { + LOG(FATAL) << "Multiple args in the function have name " << kv.first; + } + bind_dict[arg] = ConstantNode::make(kv.second); + } + Expr bound_expr = relay::Bind(func, bind_dict); + Function ret = Downcast(bound_expr); + CHECK(ret.defined()) << "The returning type is expected to be a Relay Function." + << "\n"; + return ret; +} + /*! * \brief Relay build module * diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index 213b8992ac6d..b5f72058ae97 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -184,6 +184,12 @@ def annotate_call(self, call): new_args.append(new_arg) return relay.Call(call.op, new_args, call.attrs, call.type_args) + # def visit_function(self, func): + # print("visiting function") + # new_body = super().visit(func.body) + # return relay.Function(func.params, new_body, + # func.ret_type, func.type_params, func.attrs) + def visit_call(self, call): if call.op.name == "nn.conv2d": if self.current_state == self.state.Bias: @@ -469,6 +475,10 @@ def test_extern_dnnl_mobilenet(): def test_partition_conv_bias_relu(): + if not tvm.get_global_func("relay.ext.dnnl", True): + print("skip because DNNL codegen is not available") + return + def get_layers(prefix, data, in_channel, out_channel, include_bn=True, include_sigmoid=False): weight = relay.const(np.random.randn(out_channel, in_channel, 3, 3)) @@ -496,21 +506,33 @@ def get_net(include_bn=True, include_sigmoid=False): last = layer2 return relay.Function(relay.analysis.free_vars(last), last) - def get_partitoned_mod(net): + def pre_optimize(mod, params): remove_bn_pass = transform.Sequential([ relay.transform.InferType(), relay.transform.SimplifyInference(), relay.transform.FoldConstant(), relay.transform.FoldScaleAxis(), ]) - mod, params = tvm.relay.testing.create_workload(net) + + inputs = {} + for name, param in params.items(): + if isinstance(param, np.ndarray): + param = tvm.nd.array(param) + inputs[name] = _expr.const(param) + + from tvm.relay._build_module import BindParamsByName + mod["main"] = BindParamsByName(mod["main"], inputs) with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]): mod = remove_bn_pass(mod) + return mod + + def get_partitoned_mod(mod): mod["main"] = ConvBiasAddReLUAnnotator("dnnl").visit(mod["main"]) + #print(mod["main"]) mod = transform.PartitionGraph()(mod) - return mod, params + return mod def get_partitions(mod): partitions = [] @@ -523,7 +545,9 @@ def visit_func(expr): def test_detect_pattern(include_bn, include_sigmoid, num_expected_partition): net = get_net(include_bn, include_sigmoid) - mod, _ = get_partitoned_mod(net) + mod, params = tvm.relay.testing.create_workload(net) + mod = pre_optimize(mod, params) + mod = get_partitoned_mod(mod) assert(len(get_partitions(mod)) == num_expected_partition) def test_partition(): @@ -534,7 +558,16 @@ def test_partition(): # conv + bn + sigmoid + relu -> fail test_detect_pattern(True, True, 0) - test_partition() + def test_partition_mobilenet(): + mod, params = relay.testing.mobilenet.get_workload() + mod = pre_optimize(mod, params) + # print(mod["main"]) + mod = get_partitoned_mod(mod) + print(mod["main"]) + print(len(get_partitions(mod["main"]))) + + # test_partition() + test_partition_mobilenet() # TODO: Enable executor check once the runtime signature issue is resolved # net = get_net() @@ -551,10 +584,10 @@ def test_partition(): if __name__ == "__main__": - test_multi_node_compiler() - test_extern_ccompiler_single_op() - test_extern_ccompiler_default_ops() - test_extern_ccompiler() - test_extern_dnnl() - test_extern_dnnl_mobilenet() + # test_multi_node_compiler() + # test_extern_ccompiler_single_op() + # test_extern_ccompiler_default_ops() + # test_extern_ccompiler() + # test_extern_dnnl() + # test_extern_dnnl_mobilenet() test_partition_conv_bias_relu() From c72b071e3382b6814987a30929dc8a18e2d513a1 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 19 Jan 2020 09:50:28 +0900 Subject: [PATCH 03/18] enable all tests --- .../python/relay/test_pass_partition_graph.py | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index b5f72058ae97..fd82e2116931 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -514,14 +514,15 @@ def pre_optimize(mod, params): relay.transform.FoldScaleAxis(), ]) - inputs = {} - for name, param in params.items(): - if isinstance(param, np.ndarray): - param = tvm.nd.array(param) - inputs[name] = _expr.const(param) + if params != {}: + inputs = {} + for name, param in params.items(): + if isinstance(param, np.ndarray): + param = tvm.nd.array(param) + inputs[name] = _expr.const(param) - from tvm.relay._build_module import BindParamsByName - mod["main"] = BindParamsByName(mod["main"], inputs) + from tvm.relay._build_module import BindParamsByName + mod["main"] = BindParamsByName(mod["main"], inputs) with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]): mod = remove_bn_pass(mod) @@ -530,7 +531,6 @@ def pre_optimize(mod, params): def get_partitoned_mod(mod): mod["main"] = ConvBiasAddReLUAnnotator("dnnl").visit(mod["main"]) - #print(mod["main"]) mod = transform.PartitionGraph()(mod) return mod @@ -561,12 +561,10 @@ def test_partition(): def test_partition_mobilenet(): mod, params = relay.testing.mobilenet.get_workload() mod = pre_optimize(mod, params) - # print(mod["main"]) mod = get_partitoned_mod(mod) - print(mod["main"]) - print(len(get_partitions(mod["main"]))) + assert(len(get_partitions(mod)) == 27) - # test_partition() + test_partition() test_partition_mobilenet() # TODO: Enable executor check once the runtime signature issue is resolved @@ -584,10 +582,10 @@ def test_partition_mobilenet(): if __name__ == "__main__": - # test_multi_node_compiler() - # test_extern_ccompiler_single_op() - # test_extern_ccompiler_default_ops() - # test_extern_ccompiler() - # test_extern_dnnl() - # test_extern_dnnl_mobilenet() + test_multi_node_compiler() + test_extern_ccompiler_single_op() + test_extern_ccompiler_default_ops() + test_extern_ccompiler() + test_extern_dnnl() + test_extern_dnnl_mobilenet() test_partition_conv_bias_relu() From 693474b59c5a501243c0cdaaaacee11989b1865e Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 19 Jan 2020 10:06:50 +0900 Subject: [PATCH 04/18] introduce bind_params_by_name as reusable api --- python/tvm/relay/build_module.py | 5 ++++- tests/python/relay/test_pass_partition_graph.py | 12 ++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py index d848d9030c48..1170855d32bb 100644 --- a/python/tvm/relay/build_module.py +++ b/python/tvm/relay/build_module.py @@ -158,7 +158,6 @@ def optimize(self, func, target=None, params=None): return mod, params - def _set_params(self, params): self._set_params_func(_convert_param_map(params)) @@ -318,6 +317,10 @@ def bind_params_by_name(func, params): ------- func : relay.Function The function with parameters bound +<<<<<<< HEAD +======= + +>>>>>>> introduce bind_params_by_name as reusable api """ inputs = _convert_param_map(params) return _build_module.BindParamsByName(func, inputs) diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index fd82e2116931..bf57ceedb7a2 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -28,6 +28,8 @@ from tvm.relay.annotation import compiler_begin, compiler_end from tvm.relay.expr_functor import ExprMutator from tvm.relay import analysis, expr as _expr +from tvm.relay.build_module import bind_params_by_name + # Leverage the pass manager to write a simple white list based annotator @transform.function_pass(opt_level=0) @@ -515,14 +517,8 @@ def pre_optimize(mod, params): ]) if params != {}: - inputs = {} - for name, param in params.items(): - if isinstance(param, np.ndarray): - param = tvm.nd.array(param) - inputs[name] = _expr.const(param) - - from tvm.relay._build_module import BindParamsByName - mod["main"] = BindParamsByName(mod["main"], inputs) + # This is required for constant folding on mobilenet + mod["main"] = bind_params_by_name(mod["main"], params) with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]): mod = remove_bn_pass(mod) From e902fa4bdf580529ab6bb2f8658b3e0fb13d9b9f Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 19 Jan 2020 10:13:23 +0900 Subject: [PATCH 05/18] remove unused function --- tests/python/relay/test_pass_partition_graph.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index bf57ceedb7a2..9c253f4f4d5c 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -186,12 +186,6 @@ def annotate_call(self, call): new_args.append(new_arg) return relay.Call(call.op, new_args, call.attrs, call.type_args) - # def visit_function(self, func): - # print("visiting function") - # new_body = super().visit(func.body) - # return relay.Function(func.params, new_body, - # func.ret_type, func.type_params, func.attrs) - def visit_call(self, call): if call.op.name == "nn.conv2d": if self.current_state == self.state.Bias: From 50e78b878055a26e89564ae800c4f0ca4aa2b4a6 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sat, 18 Jan 2020 19:57:15 +0900 Subject: [PATCH 06/18] add fused dnnl op --- src/runtime/contrib/dnnl/dnnl.cc | 63 ++++++++++++++++++-------- src/runtime/contrib/dnnl/dnnl_kernel.h | 6 +++ 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/src/runtime/contrib/dnnl/dnnl.cc b/src/runtime/contrib/dnnl/dnnl.cc index cc430b2c7c76..5622d8feeed1 100644 --- a/src/runtime/contrib/dnnl/dnnl.cc +++ b/src/runtime/contrib/dnnl/dnnl.cc @@ -52,10 +52,10 @@ inline void read_from_dnnl_memory(void* handle, const memory& mem) { std::copy(src, src + bytes, reinterpret_cast(handle)); } -extern "C" void dnnl_conv2d(float* data, float* weights, float* out, int p_N_, - int p_C_, int p_H_, int p_W_, int p_O_, int p_G_, - int p_Ph_, int p_Pw_, int p_Kh_, int p_Kw_, - int p_Sh_, int p_Sw_) { +void dnnl_conv2d_common(float* data, float* weights, float* bias, float* out, + int p_N_, int p_C_, int p_H_, int p_W_, + int p_O_, int p_G_, int p_Ph_, int p_Pw_, int p_Kh_, + int p_Kw_, int p_Sh_, int p_Sw_, primitive_attr attr) { using tag = memory::format_tag; using dt = memory::data_type; engine eng(engine::kind::cpu, 0); @@ -65,21 +65,16 @@ extern "C" void dnnl_conv2d(float* data, float* weights, float* out, int p_N_, memory::dims conv2d_weights_tz = {p_O_, p_C_, p_Kh_, p_Kw_}; if (p_G_ > 1) conv2d_weights_tz = {p_G_, 1, p_C_ / p_G_, p_Kh_, p_Kw_}; memory::dims conv2d_bias_tz = {p_O_}; - memory::dims conv2d_dst_tz = {p_N_, p_O_, - (p_H_ - p_Kh_ + 2 * p_Ph_ + p_Sh_) / p_Sh_, + memory::dims conv2d_dst_tz = {p_N_, p_O_, (p_H_ - p_Kh_ + 2 * p_Ph_ + p_Sh_) / p_Sh_, (p_W_ - p_Kw_ + 2 * p_Pw_ + p_Sw_) / p_Sw_}; memory::dims conv2d_strides = {p_Sh_, p_Sw_}; memory::dims conv2d_padding = {p_Ph_, p_Pw_}; - std::vector conv2d_bias(p_O_, 0); - - auto user_src_memory = - memory({{conv2d_src_tz}, dt::f32, tag::nchw}, eng, data); - auto user_weights_memory = memory( - {{conv2d_weights_tz}, dt::f32, (p_G_ > 1) ? tag::goihw : tag::oihw}, eng, - weights); + auto user_src_memory = memory({{conv2d_src_tz}, dt::f32, tag::nchw}, eng, data); + auto user_weights_memory = + memory({{conv2d_weights_tz}, dt::f32, (p_G_ > 1) ? tag::goihw : tag::oihw}, eng, weights); auto conv2d_user_bias_memory = - memory({{conv2d_bias_tz}, dt::f32, tag::x}, eng, conv2d_bias.data()); + memory({{conv2d_bias_tz}, dt::f32, tag::x}, eng, bias); auto conv2d_src_md = memory::desc({conv2d_src_tz}, dt::f32, tag::any); auto conv2d_bias_md = memory::desc({conv2d_bias_tz}, dt::f32, tag::any); @@ -87,10 +82,9 @@ extern "C" void dnnl_conv2d(float* data, float* weights, float* out, int p_N_, auto conv2d_dst_md = memory::desc({conv2d_dst_tz}, dt::f32, tag::nchw); auto conv2d_desc = convolution_forward::desc( - prop_kind::forward_inference, algorithm::convolution_direct, - conv2d_src_md, conv2d_weights_md, conv2d_bias_md, conv2d_dst_md, - conv2d_strides, conv2d_padding, conv2d_padding); - auto conv2d_prim_desc = convolution_forward::primitive_desc(conv2d_desc, eng); + prop_kind::forward_inference, algorithm::convolution_direct, conv2d_src_md, conv2d_weights_md, + conv2d_bias_md, conv2d_dst_md, conv2d_strides, conv2d_padding, conv2d_padding); + auto conv2d_prim_desc = convolution_forward::primitive_desc(conv2d_desc, attr, eng); auto conv2d_src_memory = user_src_memory; auto conv2d_weights_memory = user_weights_memory; @@ -105,6 +99,39 @@ extern "C" void dnnl_conv2d(float* data, float* weights, float* out, int p_N_, read_from_dnnl_memory(out, conv2d_dst_memory); } +extern "C" void dnnl_conv2d(float* data, float* weights, float* out, + int p_N_, int p_C_, int p_H_, int p_W_, + int p_O_, int p_G_, int p_Ph_, int p_Pw_, + int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_) { + primitive_attr attr; + std::vector bias(p_O_, 0); + return dnnl_conv2d_common(data, weights, bias.data(), out, + p_N_, p_C_, p_H_, p_W_, p_O_, p_G_, + p_Ph_, p_Pw_, p_Kh_, p_Kw_, p_Sh_, p_Sw_, + attr); +} + +primitive_attr create_attr_with_relu_post_op() { + post_ops ops; + ops.append_eltwise(1.f, algorithm::eltwise_relu, 0.f, 0.f); + + primitive_attr attr; + attr.set_post_ops(ops); + + return attr; +} + +extern "C" void dnnl_fused_conv2d_bias_relu(float* data, float* weights, float* bias, float* out, + int p_N_, int p_C_, int p_H_, int p_W_, int p_O_, + int p_G_, int p_Ph_, int p_Pw_, int p_Kh_, int p_Kw_, + int p_Sh_, int p_Sw_) { + return dnnl_conv2d_common(data, weights, bias, out, + p_N_, p_C_, p_H_, p_W_, + p_O_, p_G_, p_Ph_, p_Pw_, + p_Kh_, p_Kw_, p_Sh_, p_Sw_, + create_attr_with_relu_post_op()); +} + extern "C" void dnnl_dense(float* data, float* weight, float* out, int p_B_, int p_I_, int p_O_) { using tag = memory::format_tag; diff --git a/src/runtime/contrib/dnnl/dnnl_kernel.h b/src/runtime/contrib/dnnl/dnnl_kernel.h index 4d0b100b92ec..b3111030becf 100644 --- a/src/runtime/contrib/dnnl/dnnl_kernel.h +++ b/src/runtime/contrib/dnnl/dnnl_kernel.h @@ -38,6 +38,12 @@ extern "C" TVM_DLL void dnnl_conv2d(float* data, float* weights, float* out, int int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph_, int p_Pw_, int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_); +extern "C" TVM_DLL void dnnl_fused_conv2d_bias_relu(float* data, float* weights, float* bias, + float* out, int p_N_, int p_C_, int p_H_, + int p_W_, int p_O_, int p_G_, int p_Ph_, + int p_Pw_, int p_Kh_, int p_Kw_, int p_Sh_, + int p_Sw_); + extern "C" TVM_DLL void dnnl_dense(float* data, float* weight, float* out, int p_B_, int p_I_, int p_O_); From 0d41ce7dcf40fcc1804920bca51c8c99b61a5697 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sat, 18 Jan 2020 21:41:59 +0900 Subject: [PATCH 07/18] refactoring dnnl codegen --- src/relay/backend/contrib/dnnl/codegen.cc | 143 +++++++++++++++------- 1 file changed, 98 insertions(+), 45 deletions(-) diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index 759a4421bc1d..72446b825486 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -31,6 +31,7 @@ #include #include +#include #include "../codegen_c/codegen_c.h" @@ -55,66 +56,95 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { } void VisitExpr_(const CallNode* call) final { - std::ostringstream decl_stream; - std::ostringstream buf_stream; - // Args: ID - std::vector args; - // Get the arguments for various DNNL kernels. - if (IsOp(call, "nn.conv2d")) { - decl_stream << "dnnl_conv2d"; + auto generate_body = [&](const CallNode* root_call, + const std::vector& args, + const std::vector& fused_func_args) { + // Make function call with input buffers when visiting arguments + bool first = true; + std::ostringstream arg_stream; + arg_stream << "("; + for (size_t i = 0; i < root_call->args.size(); ++i) { + VisitExpr(root_call->args[i]); + for (auto out : out_) { + if (!first) { + arg_stream << ", "; + } + first = false; + arg_stream << out.first; + } + } + for (auto arg_name : fused_func_args) { + arg_stream << ", " << arg_name; + } + + // Analyze the output buffer + auto type_node = root_call->checked_type().as(); + CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32)) + << "Only support single output tensor with float type"; + + auto out = "buf_" + std::to_string(buf_idx_++); + auto out_shape = GetShape(root_call->checked_type()); + auto out_size = std::accumulate(out_shape.begin(), out_shape.end(), + 1, std::multiplies()); + this->PrintIndents(); + + std::ostringstream buf_stream; + buf_stream << "float* " << out << " = (float*)std::malloc(4 * " << out_size << ");"; + + arg_stream << ", " << out; + + // Attach attribute arguments + for (size_t i = 0; i < args.size(); ++i) { + arg_stream << ", " << args[i]; + } + arg_stream << ");"; + return std::make_tuple(arg_stream.str(), buf_stream.str(), out, out_size); + }; + + std::string decl, buf; + int out_size = 1; + std::string out; + std::vector args; + if (auto conv_call = DetectFusedConv2DBiasReLU(call)) { + LOG(INFO) << "found fused op, num_args = " << call->args.size(); + auto ret = generate_body(conv_call, FusedConv2dBiasReLU(conv_call), + ext_fused_func_args_); + decl = "dnnl_fused_conv2d_bias_relu" + std::get<0>(ret); + buf = std::get<1>(ret); + out = std::get<2>(ret); + out_size = std::get<3>(ret); + } else if (IsOp(call, "nn.conv2d")) { + LOG(INFO) << "found conv"; + decl = "dnnl_conv2d"; args = Conv2d(call); } else if (IsOp(call, "nn.dense")) { - decl_stream << "dnnl_dense"; + decl = "dnnl_dense"; args = Dense(call); } else if (IsOp(call, "nn.relu")) { - decl_stream << "dnnl_relu"; + LOG(INFO) << "found relu"; + decl = "dnnl_relu"; args = Relu(call); } else if (IsOp(call, "nn.batch_norm")) { - decl_stream << "dnnl_bn"; + decl = "dnnl_bn"; args = BatchNorm(call); } else if (IsOp(call, "add")) { - decl_stream << "dnnl_add"; + decl = "dnnl_add"; args = Add(call); } else { LOG(FATAL) << "Unsupported op: " << AsText(call->op, false); } - // Make function call with input buffers when visiting arguments - bool first = true; - decl_stream << "("; - for (size_t i = 0; i < call->args.size(); ++i) { - VisitExpr(call->args[i]); - for (auto out : out_) { - if (!first) { - decl_stream << ", "; - } - first = false; - decl_stream << out.first; - } + if (out == "") { + auto ret = generate_body(call, args, {}); + decl += std::get<0>(ret); + buf = std::get<1>(ret); + out = std::get<2>(ret); + out_size = std::get<3>(ret); } - // Analyze the output buffer - auto type_node = call->checked_type().as(); - CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32)) - << "Only support single output tensor with float type"; - std::string out = "buf_" + std::to_string(buf_idx_++); - auto out_shape = GetShape(call->checked_type()); - int out_size = 1; - for (size_t i = 0; i < out_shape.size(); ++i) { - out_size *= out_shape[i]; - } - this->PrintIndents(); - buf_stream << "float* " << out << " = (float*)std::malloc(4 * " << out_size << ");"; - buf_decl_.push_back(buf_stream.str()); - decl_stream << ", " << out; - - // Attach attribute arguments - for (size_t i = 0; i < args.size(); ++i) { - decl_stream << ", " << args[i]; - } - decl_stream << ");"; - ext_func_body.push_back(decl_stream.str()); + buf_decl_.push_back(buf); + ext_func_body.push_back(decl); // Update output buffer out_.clear(); @@ -122,10 +152,22 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { } std::string JIT(void) { + ext_func_args_.insert(ext_func_args_.end(), + ext_fused_func_args_.begin(), + ext_fused_func_args_.end()); return JitImpl(ext_func_id_, ext_func_args_, buf_decl_, ext_func_body, out_); } private: + const CallNode* DetectFusedConv2DBiasReLU(const CallNode* call) { + auto arg = call->args[0]; + if (auto next_call = arg.as()) { + if (IsOp(next_call, "nn.conv2d")) { + } + } + return nullptr; + } + std::vector Conv2d(const CallNode* call) { std::vector args; const auto* conv2d_attr = call->attrs.as(); @@ -152,6 +194,10 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { return args; } + std::vector FusedConv2dBiasReLU(const CallNode* call) { + return Conv2d(call); + } + std::vector Dense(const CallNode* call) { std::vector args; auto ishape = GetShape(call->args[0]->checked_type()); @@ -214,6 +260,7 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { int buf_idx_{0}; /*! \brief The arguments used by a wrapped function that calls DNNL kernels. */ std::vector ext_func_args_; + std::vector ext_fused_func_args_; /*! \brief statement of the function that will be compiled using DNNL kernels. */ std::vector ext_func_body; /*! \brief The declaration of intermeidate buffers. */ @@ -270,10 +317,12 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase { code_stream_ << "\n"; if (ref->IsInstance()) { + LOG(INFO) << "Invoking GenDNNLFunc on FuncNode"; GenDNNLFunc(Downcast(ref)); } else if (ref->IsInstance()) { IRModule mod = Downcast(ref); for (const auto& it : mod->functions) { + LOG(INFO) << "Invoking GenDNNLFunc"; GenDNNLFunc(Downcast(it.second)); } } else { @@ -284,6 +333,7 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase { // Create a CSourceModule const auto* pf = runtime::Registry::Get("module.csource_module_create"); CHECK(pf != nullptr) << "Cannot find csource module to create the external runtime module"; + LOG(INFO) << code_stream_.str(); return (*pf)(code_stream_.str(), "cc"); } @@ -301,7 +351,10 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase { */ runtime::Module DNNLCompiler(const ObjectRef& ref) { DNNLModuleCodegen dnnl; - return dnnl.CreateCSourceModule(ref); + LOG(INFO) << "Invoking DNNLCompiler"; + auto ret = dnnl.CreateCSourceModule(ref); + LOG(INFO) << "Done invoking DNNLCompiler"; + return ret; } TVM_REGISTER_GLOBAL("relay.ext.dnnl").set_body_typed(DNNLCompiler); From 17f825ff42d77e8721943174ea575e2806927c0f Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sat, 18 Jan 2020 21:56:26 +0900 Subject: [PATCH 08/18] cleanup --- src/relay/backend/contrib/dnnl/codegen.cc | 92 ++++++++++------------- 1 file changed, 38 insertions(+), 54 deletions(-) diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index 72446b825486..44b8e25ddba5 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -51,15 +51,16 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { out_.push_back({node->name_hint(), 0}); } - void VisitExpr_(const TupleGetItemNode* op) final { - // Do nothing - } - void VisitExpr_(const CallNode* call) final { - // Get the arguments for various DNNL kernels. - auto generate_body = [&](const CallNode* root_call, - const std::vector& args, - const std::vector& fused_func_args) { + struct Output { + std::string decl, buf; + int out_size = 1; + std::string out; + }; + + auto generate_body = [&](const CallNode* root_call, const std::string& func_name, + const std::vector& args, + const std::vector& fused_func_args) { // Make function call with input buffers when visiting arguments bool first = true; std::ostringstream arg_stream; @@ -74,6 +75,7 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { arg_stream << out.first; } } + for (auto arg_name : fused_func_args) { arg_stream << ", " << arg_name; } @@ -83,88 +85,70 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { CHECK(type_node != nullptr && runtime::TypeMatch(type_node->dtype, kDLFloat, 32)) << "Only support single output tensor with float type"; - auto out = "buf_" + std::to_string(buf_idx_++); auto out_shape = GetShape(root_call->checked_type()); - auto out_size = std::accumulate(out_shape.begin(), out_shape.end(), - 1, std::multiplies()); + + Output ret; + ret.out = "buf_" + std::to_string(buf_idx_++); + ret.out_size = std::accumulate(out_shape.begin(), out_shape.end(), 1, std::multiplies()); + this->PrintIndents(); std::ostringstream buf_stream; - buf_stream << "float* " << out << " = (float*)std::malloc(4 * " << out_size << ");"; - - arg_stream << ", " << out; + buf_stream << "float* " << ret.out << " = (float*)std::malloc(4 * " << ret.out_size << ");"; + ret.buf = buf_stream.str(); + arg_stream << ", " << ret.out; // Attach attribute arguments for (size_t i = 0; i < args.size(); ++i) { arg_stream << ", " << args[i]; } arg_stream << ");"; - return std::make_tuple(arg_stream.str(), buf_stream.str(), out, out_size); + ret.decl = func_name + arg_stream.str(); + + return ret; }; - std::string decl, buf; - int out_size = 1; - std::string out; - std::vector args; + Output ret; if (auto conv_call = DetectFusedConv2DBiasReLU(call)) { LOG(INFO) << "found fused op, num_args = " << call->args.size(); - auto ret = generate_body(conv_call, FusedConv2dBiasReLU(conv_call), - ext_fused_func_args_); - decl = "dnnl_fused_conv2d_bias_relu" + std::get<0>(ret); - buf = std::get<1>(ret); - out = std::get<2>(ret); - out_size = std::get<3>(ret); + ret = generate_body(conv_call, "dnnl_fused_conv2d_bias_relu", + FusedConv2dBiasReLU(conv_call), ext_fused_func_args_); } else if (IsOp(call, "nn.conv2d")) { - LOG(INFO) << "found conv"; - decl = "dnnl_conv2d"; - args = Conv2d(call); + ret = generate_body(call, "dnnl_conv2d", Conv2d(call), {}); } else if (IsOp(call, "nn.dense")) { - decl = "dnnl_dense"; - args = Dense(call); + ret = generate_body(call, "dnnl_dense", Dense(call), {}); } else if (IsOp(call, "nn.relu")) { - LOG(INFO) << "found relu"; - decl = "dnnl_relu"; - args = Relu(call); + ret = generate_body(call, "dnnl_relu", Relu(call), {}); } else if (IsOp(call, "nn.batch_norm")) { - decl = "dnnl_bn"; - args = BatchNorm(call); + ret = generate_body(call, "dnnl_bn", BatchNorm(call), {}); } else if (IsOp(call, "add")) { - decl = "dnnl_add"; - args = Add(call); + ret = generate_body(call, "dnnl_add", Add(call), {}); } else { LOG(FATAL) << "Unsupported op: " << AsText(call->op, false); } - if (out == "") { - auto ret = generate_body(call, args, {}); - decl += std::get<0>(ret); - buf = std::get<1>(ret); - out = std::get<2>(ret); - out_size = std::get<3>(ret); - } - - buf_decl_.push_back(buf); - ext_func_body.push_back(decl); + buf_decl_.push_back(ret.buf); + ext_func_body.push_back(ret.decl); // Update output buffer out_.clear(); - out_.push_back({out, out_size}); + out_.push_back({ret.out, ret.out_size}); } std::string JIT(void) { ext_func_args_.insert(ext_func_args_.end(), - ext_fused_func_args_.begin(), - ext_fused_func_args_.end()); + ext_fused_func_args_.begin(), + ext_fused_func_args_.end()); return JitImpl(ext_func_id_, ext_func_args_, buf_decl_, ext_func_body, out_); } private: const CallNode* DetectFusedConv2DBiasReLU(const CallNode* call) { auto arg = call->args[0]; - if (auto next_call = arg.as()) { - if (IsOp(next_call, "nn.conv2d")) { - } - } + // if (auto next_call = arg.as()) { + // if (IsOp(next_call, "nn.conv2d")) { + // } + // } return nullptr; } From 40a41675881e5e26eaed9c3aa07a77c82a787598 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sat, 18 Jan 2020 23:35:31 +0900 Subject: [PATCH 09/18] add pattern detection --- src/relay/backend/contrib/dnnl/codegen.cc | 31 +++++++++++++++-------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index 44b8e25ddba5..01aee57efa6d 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -51,6 +51,14 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { out_.push_back({node->name_hint(), 0}); } + void VisitExpr_(const ConstantNode* node) final { + LOG(INFO) << "Visiting constant node"; + auto name = "dnnl_input" + std::to_string(ext_func_args_.size()); + ext_func_args_.push_back(name); + out_.clear(); + out_.push_back({name, 0}); + } + void VisitExpr_(const CallNode* call) final { struct Output { std::string decl, buf; @@ -58,7 +66,7 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { std::string out; }; - auto generate_body = [&](const CallNode* root_call, const std::string& func_name, + auto generate_body = [=](const CallNode* root_call, const std::string& func_name, const std::vector& args, const std::vector& fused_func_args) { // Make function call with input buffers when visiting arguments @@ -110,7 +118,6 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { Output ret; if (auto conv_call = DetectFusedConv2DBiasReLU(call)) { - LOG(INFO) << "found fused op, num_args = " << call->args.size(); ret = generate_body(conv_call, "dnnl_fused_conv2d_bias_relu", FusedConv2dBiasReLU(conv_call), ext_fused_func_args_); } else if (IsOp(call, "nn.conv2d")) { @@ -136,20 +143,24 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { } std::string JIT(void) { - ext_func_args_.insert(ext_func_args_.end(), - ext_fused_func_args_.begin(), + ext_func_args_.insert(ext_func_args_.end(), ext_fused_func_args_.begin(), ext_fused_func_args_.end()); return JitImpl(ext_func_id_, ext_func_args_, buf_decl_, ext_func_body, out_); } private: const CallNode* DetectFusedConv2DBiasReLU(const CallNode* call) { - auto arg = call->args[0]; - // if (auto next_call = arg.as()) { - // if (IsOp(next_call, "nn.conv2d")) { - // } - // } - return nullptr; + if (!IsOp(call, "nn.relu")) return nullptr; + auto relu_arg = call->args[0]; + // TODO: a better way to get CallNode* from Expr? + const CallNode* add_call = Downcast(relu_arg).operator->(); + if (!add_call || !IsOp(add_call, "add")) return nullptr; + auto add_arg = add_call->args[0]; + const CallNode* conv_call = Downcast(add_arg).operator->(); + if (!conv_call || !IsOp(conv_call, "nn.conv2d")) return nullptr; + ext_fused_func_args_.push_back("dnnl_input_bias"); + LOG(INFO) << "fused op found"; + return conv_call; } std::vector Conv2d(const CallNode* call) { From 03fba62b2a4f31f9624c5d6e5aa0c5454c87147a Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 19 Jan 2020 05:44:10 +0900 Subject: [PATCH 10/18] improve Expr to CallNode* conversion --- src/relay/backend/contrib/dnnl/codegen.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index 01aee57efa6d..085fc86032f4 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -152,13 +152,13 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { const CallNode* DetectFusedConv2DBiasReLU(const CallNode* call) { if (!IsOp(call, "nn.relu")) return nullptr; auto relu_arg = call->args[0]; - // TODO: a better way to get CallNode* from Expr? - const CallNode* add_call = Downcast(relu_arg).operator->(); + const CallNode* add_call = relu_arg.as(); if (!add_call || !IsOp(add_call, "add")) return nullptr; auto add_arg = add_call->args[0]; - const CallNode* conv_call = Downcast(add_arg).operator->(); + const CallNode* conv_call = add_arg.as(); if (!conv_call || !IsOp(conv_call, "nn.conv2d")) return nullptr; - ext_fused_func_args_.push_back("dnnl_input_bias"); + auto bias_name = "dnnl_fused_input" + std::to_string(ext_fused_func_args_.size()); + ext_fused_func_args_.push_back(bias_name); LOG(INFO) << "fused op found"; return conv_call; } From c572c6b3f618459630f97bdffbd900ef7cffb257 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 19 Jan 2020 06:02:38 +0900 Subject: [PATCH 11/18] add fuse test --- src/relay/backend/contrib/dnnl/codegen.cc | 2 -- .../python/relay/test_pass_partition_graph.py | 34 ++++++++++--------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index 085fc86032f4..79b387d8e76a 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -346,9 +346,7 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase { */ runtime::Module DNNLCompiler(const ObjectRef& ref) { DNNLModuleCodegen dnnl; - LOG(INFO) << "Invoking DNNLCompiler"; auto ret = dnnl.CreateCSourceModule(ref); - LOG(INFO) << "Done invoking DNNLCompiler"; return ret; } diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index 9c253f4f4d5c..5c13695b43e8 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -555,27 +555,29 @@ def test_partition_mobilenet(): assert(len(get_partitions(mod)) == 27) test_partition() - test_partition_mobilenet() + # test_partition_mobilenet() # TODO: Enable executor check once the runtime signature issue is resolved - # net = get_net() - # mod, params = get_partitoned_mod(net) + net = get_net() + mod, params = tvm.relay.testing.create_workload(net) + mod = pre_optimize(mod, params) + mod = get_partitoned_mod(mod) - # ref_mod, params = tvm.relay.testing.create_workload(net) - # ishape = (1, 3, 224, 224) - # i_data = np.random.randn(*ishape).astype(np.float32) - # ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) - # ref_res = ref_ex.evaluate()(i_data, **params) + ref_mod, params = tvm.relay.testing.create_workload(net) + ishape = (1, 3, 224, 224) + i_data = np.random.randn(*ishape).astype(np.float32) + ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) + ref_res = ref_ex.evaluate()(i_data, **params) - # check_result(mod, {"data": i_data}, - # ishape, ref_res.asnumpy(), tol=1e-5, params=params) + check_result(mod, {"data": i_data}, + ishape, ref_res.asnumpy(), tol=1e-5, params=params) if __name__ == "__main__": - test_multi_node_compiler() - test_extern_ccompiler_single_op() - test_extern_ccompiler_default_ops() - test_extern_ccompiler() - test_extern_dnnl() - test_extern_dnnl_mobilenet() + # test_multi_node_compiler() + # test_extern_ccompiler_single_op() + # test_extern_ccompiler_default_ops() + # test_extern_ccompiler() + # test_extern_dnnl() + # test_extern_dnnl_mobilenet() test_partition_conv_bias_relu() From a56829b743a930ddc817812af92068a79537d3f0 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 19 Jan 2020 07:56:13 +0900 Subject: [PATCH 12/18] uncomment other tests --- tests/python/relay/test_pass_partition_graph.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index 5c13695b43e8..9027017c9ad8 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -574,10 +574,10 @@ def test_partition_mobilenet(): if __name__ == "__main__": - # test_multi_node_compiler() - # test_extern_ccompiler_single_op() - # test_extern_ccompiler_default_ops() - # test_extern_ccompiler() - # test_extern_dnnl() - # test_extern_dnnl_mobilenet() + test_multi_node_compiler() + test_extern_ccompiler_single_op() + test_extern_ccompiler_default_ops() + test_extern_ccompiler() + test_extern_dnnl() + test_extern_dnnl_mobilenet() test_partition_conv_bias_relu() From 92e06c47937fa74a0e3560bb40122307e345d42a Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 19 Jan 2020 16:33:07 +0900 Subject: [PATCH 13/18] add compiler_begin on bias param --- src/relay/backend/contrib/dnnl/codegen.cc | 12 ----- .../python/relay/test_pass_partition_graph.py | 52 +++++++++++-------- 2 files changed, 30 insertions(+), 34 deletions(-) diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index 79b387d8e76a..7ca95c3c1254 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -51,14 +51,6 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { out_.push_back({node->name_hint(), 0}); } - void VisitExpr_(const ConstantNode* node) final { - LOG(INFO) << "Visiting constant node"; - auto name = "dnnl_input" + std::to_string(ext_func_args_.size()); - ext_func_args_.push_back(name); - out_.clear(); - out_.push_back({name, 0}); - } - void VisitExpr_(const CallNode* call) final { struct Output { std::string decl, buf; @@ -159,7 +151,6 @@ class CodegenDNNL : public ExprVisitor, public CodegenCBase { if (!conv_call || !IsOp(conv_call, "nn.conv2d")) return nullptr; auto bias_name = "dnnl_fused_input" + std::to_string(ext_fused_func_args_.size()); ext_fused_func_args_.push_back(bias_name); - LOG(INFO) << "fused op found"; return conv_call; } @@ -312,12 +303,10 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase { code_stream_ << "\n"; if (ref->IsInstance()) { - LOG(INFO) << "Invoking GenDNNLFunc on FuncNode"; GenDNNLFunc(Downcast(ref)); } else if (ref->IsInstance()) { IRModule mod = Downcast(ref); for (const auto& it : mod->functions) { - LOG(INFO) << "Invoking GenDNNLFunc"; GenDNNLFunc(Downcast(it.second)); } } else { @@ -328,7 +317,6 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase { // Create a CSourceModule const auto* pf = runtime::Registry::Get("module.csource_module_create"); CHECK(pf != nullptr) << "Cannot find csource module to create the external runtime module"; - LOG(INFO) << code_stream_.str(); return (*pf)(code_stream_.str(), "cc"); } diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index 9027017c9ad8..10d1c2dbd88d 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -179,9 +179,10 @@ def __init__(self, backend): def annotate_call(self, call): new_args = [] + has_arg = "nn.conv2d" for arg in call.args: new_arg = super().visit(arg) - if call.op.name == "nn.conv2d" or isinstance(new_arg, relay.expr.Var): + if call.op.name == "nn.conv2d" or isinstance(new_arg, (relay.expr.Var, relay.expr.Constant)): new_arg = compiler_begin(new_arg, self.backend) new_args.append(new_arg) return relay.Call(call.op, new_args, call.attrs, call.type_args) @@ -245,6 +246,7 @@ def check_vm_result(): def check_graph_runtime_result(): with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]): json, lib, param = relay.build(mod, target=target, params=params) + # print(json) lib = update_lib(lib) rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx) @@ -257,7 +259,7 @@ def check_graph_runtime_result(): tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol) - check_vm_result() + # check_vm_result() check_graph_runtime_result() @@ -554,30 +556,36 @@ def test_partition_mobilenet(): mod = get_partitoned_mod(mod) assert(len(get_partitions(mod)) == 27) - test_partition() - # test_partition_mobilenet() + def test_exec(mod, params, ref_mod, ref_params, out_shape): + ishape = (1, 3, 224, 224) + i_data = np.random.randn(*ishape).astype(np.float32) + ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) + ref_res = ref_ex.evaluate()(i_data, **ref_params) - # TODO: Enable executor check once the runtime signature issue is resolved - net = get_net() - mod, params = tvm.relay.testing.create_workload(net) - mod = pre_optimize(mod, params) - mod = get_partitoned_mod(mod) + mod = pre_optimize(mod, params) + mod = get_partitoned_mod(mod) - ref_mod, params = tvm.relay.testing.create_workload(net) - ishape = (1, 3, 224, 224) - i_data = np.random.randn(*ishape).astype(np.float32) - ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) - ref_res = ref_ex.evaluate()(i_data, **params) + check_result(mod, {"data": i_data}, + out_shape, ref_res.asnumpy(), tol=1e-5, params=params) - check_result(mod, {"data": i_data}, - ishape, ref_res.asnumpy(), tol=1e-5, params=params) + test_partition() + test_partition_mobilenet() + + # net = get_net() + # mod, params = tvm.relay.testing.create_workload(net) + # ref_mod, ref_params = tvm.relay.testing.create_workload(net) + # test_exec(mod, params, ref_mod, ref_params, (1, 16, 224, 224)) + + mod, params = relay.testing.mobilenet.get_workload() + ref_mod, ref_params = relay.testing.mobilenet.get_workload() + test_exec(mod, params, ref_mod, ref_params, (1, 1000)) if __name__ == "__main__": - test_multi_node_compiler() - test_extern_ccompiler_single_op() - test_extern_ccompiler_default_ops() - test_extern_ccompiler() - test_extern_dnnl() - test_extern_dnnl_mobilenet() + # test_multi_node_compiler() + # test_extern_ccompiler_single_op() + # test_extern_ccompiler_default_ops() + # test_extern_ccompiler() + # test_extern_dnnl() + # test_extern_dnnl_mobilenet() test_partition_conv_bias_relu() From 264e054f5902ac6b9f144225d5e0b3a87257fa96 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 19 Jan 2020 16:35:26 +0900 Subject: [PATCH 14/18] enable other tests --- .../python/relay/test_pass_partition_graph.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index 10d1c2dbd88d..a244c5087214 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -179,7 +179,6 @@ def __init__(self, backend): def annotate_call(self, call): new_args = [] - has_arg = "nn.conv2d" for arg in call.args: new_arg = super().visit(arg) if call.op.name == "nn.conv2d" or isinstance(new_arg, (relay.expr.Var, relay.expr.Constant)): @@ -571,9 +570,9 @@ def test_exec(mod, params, ref_mod, ref_params, out_shape): test_partition() test_partition_mobilenet() - # net = get_net() - # mod, params = tvm.relay.testing.create_workload(net) - # ref_mod, ref_params = tvm.relay.testing.create_workload(net) + net = get_net() + mod, params = tvm.relay.testing.create_workload(net) + ref_mod, ref_params = tvm.relay.testing.create_workload(net) # test_exec(mod, params, ref_mod, ref_params, (1, 16, 224, 224)) mod, params = relay.testing.mobilenet.get_workload() @@ -582,10 +581,10 @@ def test_exec(mod, params, ref_mod, ref_params, out_shape): if __name__ == "__main__": - # test_multi_node_compiler() - # test_extern_ccompiler_single_op() - # test_extern_ccompiler_default_ops() - # test_extern_ccompiler() - # test_extern_dnnl() - # test_extern_dnnl_mobilenet() + test_multi_node_compiler() + test_extern_ccompiler_single_op() + test_extern_ccompiler_default_ops() + test_extern_ccompiler() + test_extern_dnnl() + test_extern_dnnl_mobilenet() test_partition_conv_bias_relu() From 3ef57f66fccdd8dd97bb26756b67ed0d387c7264 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 19 Jan 2020 16:42:22 +0900 Subject: [PATCH 15/18] minor fix --- src/relay/backend/contrib/dnnl/codegen.cc | 3 +-- tests/python/relay/test_pass_partition_graph.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index 7ca95c3c1254..807714a33bd5 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -334,8 +334,7 @@ class DNNLModuleCodegen : public CSourceModuleCodegenBase { */ runtime::Module DNNLCompiler(const ObjectRef& ref) { DNNLModuleCodegen dnnl; - auto ret = dnnl.CreateCSourceModule(ref); - return ret; + return dnnl.CreateCSourceModule(ref); } TVM_REGISTER_GLOBAL("relay.ext.dnnl").set_body_typed(DNNLCompiler); diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index a244c5087214..69be0e602d43 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -245,7 +245,6 @@ def check_vm_result(): def check_graph_runtime_result(): with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]): json, lib, param = relay.build(mod, target=target, params=params) - # print(json) lib = update_lib(lib) rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx) @@ -258,7 +257,7 @@ def check_graph_runtime_result(): tvm.testing.assert_allclose(out.asnumpy(), result, rtol=tol, atol=tol) - # check_vm_result() + check_vm_result() check_graph_runtime_result() From ee50eed9c5af29886627b94896ac2cbe8bbb629f Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 19 Jan 2020 16:57:11 +0900 Subject: [PATCH 16/18] fixed test on simple net --- tests/python/relay/test_pass_partition_graph.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index 69be0e602d43..45951a8f3b15 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -477,11 +477,11 @@ def test_partition_conv_bias_relu(): def get_layers(prefix, data, in_channel, out_channel, include_bn=True, include_sigmoid=False): - weight = relay.const(np.random.randn(out_channel, in_channel, 3, 3)) - bn_gamma = relay.const(np.random.randn(out_channel)) - bn_beta = relay.const(np.random.randn(out_channel)) - bn_mmean = relay.const(np.random.randn(out_channel)) - bn_mvar = relay.const(np.random.randn(out_channel)) + weight = relay.var(prefix + "weight") + bn_gamma = relay.var(prefix + "bn_gamma") + bn_beta = relay.var(prefix + "bn_beta") + bn_mmean = relay.var(prefix + "bn_mean") + bn_mvar = relay.var(prefix + "bn_var") layer = relay.nn.conv2d(data=data, weight=weight, kernel_size=(3, 3), channels=out_channel, padding=(1, 1)) @@ -511,7 +511,7 @@ def pre_optimize(mod, params): ]) if params != {}: - # This is required for constant folding on mobilenet + # This is required for constant folding mod["main"] = bind_params_by_name(mod["main"], params) with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]): @@ -572,7 +572,7 @@ def test_exec(mod, params, ref_mod, ref_params, out_shape): net = get_net() mod, params = tvm.relay.testing.create_workload(net) ref_mod, ref_params = tvm.relay.testing.create_workload(net) - # test_exec(mod, params, ref_mod, ref_params, (1, 16, 224, 224)) + test_exec(mod, params, ref_mod, ref_params, (1, 16, 224, 224)) mod, params = relay.testing.mobilenet.get_workload() ref_mod, ref_params = relay.testing.mobilenet.get_workload() From 2e2da6b08da478b104c32d043375dcbd54ca2c95 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Tue, 21 Jan 2020 07:53:06 +0900 Subject: [PATCH 17/18] rebase and address comments --- src/relay/backend/build_module.cc | 37 ------------------- .../python/relay/test_pass_partition_graph.py | 13 ++++--- 2 files changed, 7 insertions(+), 43 deletions(-) diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc index 2d617db51f9a..035ab1ba5bee 100644 --- a/src/relay/backend/build_module.cc +++ b/src/relay/backend/build_module.cc @@ -145,43 +145,6 @@ struct GraphCodegen { } }; -/*! - * \brief Bind params to function by using name - * \param func Relay function - * \param params params dict - * \return relay::Function - */ -relay::Function BindParamsByName(relay::Function func, - const std::unordered_map& params) { - std::unordered_map name_dict; - std::unordered_set repeat_var; - for (auto arg : func->params) { - const auto& name = arg->name_hint(); - if (name_dict.count(name)) { - repeat_var.insert(arg); - } else { - name_dict[name] = arg; - } - } - - std::unordered_map bind_dict; - for (auto& kv : params) { - if (name_dict.count(kv.first) == 0) { - continue; - } - auto arg = name_dict.at(kv.first); - if (repeat_var.count(arg)) { - LOG(FATAL) << "Multiple args in the function have name " << kv.first; - } - bind_dict[arg] = ConstantNode::make(kv.second); - } - Expr bound_expr = relay::Bind(func, bind_dict); - Function ret = Downcast(bound_expr); - CHECK(ret.defined()) << "The returning type is expected to be a Relay Function." - << "\n"; - return ret; -} - /*! * \brief Relay build module * diff --git a/tests/python/relay/test_pass_partition_graph.py b/tests/python/relay/test_pass_partition_graph.py index 45951a8f3b15..4459505595c4 100644 --- a/tests/python/relay/test_pass_partition_graph.py +++ b/tests/python/relay/test_pass_partition_graph.py @@ -29,6 +29,7 @@ from tvm.relay.expr_functor import ExprMutator from tvm.relay import analysis, expr as _expr from tvm.relay.build_module import bind_params_by_name +from tvm.relay.backend import compile_engine # Leverage the pass manager to write a simple white list based annotator @@ -475,7 +476,7 @@ def test_partition_conv_bias_relu(): print("skip because DNNL codegen is not available") return - def get_layers(prefix, data, in_channel, out_channel, + def get_blocks(prefix, data, in_channel, out_channel, include_bn=True, include_sigmoid=False): weight = relay.var(prefix + "weight") bn_gamma = relay.var(prefix + "bn_gamma") @@ -497,8 +498,8 @@ def get_layers(prefix, data, in_channel, out_channel, def get_net(include_bn=True, include_sigmoid=False): data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32")) - layer1 = get_layers("layer1_", data, 3, 16, include_bn, include_sigmoid) - layer2 = get_layers("layer2_", layer1, 16, 16, include_bn, include_sigmoid) + layer1 = get_blocks("layer1_", data, 3, 16, include_bn, include_sigmoid) + layer2 = get_blocks("layer2_", layer1, 16, 16, include_bn, include_sigmoid) last = layer2 return relay.Function(relay.analysis.free_vars(last), last) @@ -510,9 +511,8 @@ def pre_optimize(mod, params): relay.transform.FoldScaleAxis(), ]) - if params != {}: - # This is required for constant folding - mod["main"] = bind_params_by_name(mod["main"], params) + # This is required for constant folding + mod["main"] = bind_params_by_name(mod["main"], params) with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]): mod = remove_bn_pass(mod) @@ -559,6 +559,7 @@ def test_exec(mod, params, ref_mod, ref_params, out_shape): i_data = np.random.randn(*ishape).astype(np.float32) ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) ref_res = ref_ex.evaluate()(i_data, **ref_params) + compile_engine.get().clear() mod = pre_optimize(mod, params) mod = get_partitoned_mod(mod) From af627a93381fd1b59d4c23fef5f7b91bc0bcf9fd Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Tue, 21 Jan 2020 07:55:41 +0900 Subject: [PATCH 18/18] rebase fix --- python/tvm/relay/build_module.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py index 1170855d32bb..774ab07d8dc4 100644 --- a/python/tvm/relay/build_module.py +++ b/python/tvm/relay/build_module.py @@ -317,10 +317,6 @@ def bind_params_by_name(func, params): ------- func : relay.Function The function with parameters bound -<<<<<<< HEAD -======= - ->>>>>>> introduce bind_params_by_name as reusable api """ inputs = _convert_param_map(params) return _build_module.BindParamsByName(func, inputs)