From 2408739dcff1f1fc41805a242e8ff767517047b2 Mon Sep 17 00:00:00 2001 From: ligeng Date: Mon, 8 Nov 2021 12:57:16 +0000 Subject: [PATCH 01/27] f wrong type check in conv2d_transpose --- src/relay/op/nn/convolution.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/relay/op/nn/convolution.h b/src/relay/op/nn/convolution.h index c27227b2eb73..a46a6ddec76d 100644 --- a/src/relay/op/nn/convolution.h +++ b/src/relay/op/nn/convolution.h @@ -1053,18 +1053,18 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a const auto trans_in_layout = tir::BijectiveLayout(in_layout, kNCHW); ICHECK(trans_in_layout.defined()) - << "Conv only support input layouts that are convertible from NCHW." + << "Conv2DTransposed only support input layouts that are convertible from NCHW." << " But got " << in_layout; const auto trans_kernel_layout = tir::BijectiveLayout(kernel_layout, kOIHW); ICHECK(trans_kernel_layout.defined()) - << "Conv only support kernel layouts that are convertible from OIHW." + << "Conv2DTransposed only support kernel layouts that are convertible from OIHW." << " But got " << kernel_layout; Layout out_layout(param->out_layout == "" ? param->data_layout : param->out_layout); const auto trans_out_layout = tir::BijectiveLayout(out_layout, kNCHW); ICHECK(trans_out_layout.defined()) - << "Conv only support output layouts that are convertible from NCHW." + << "Conv2DTransposed only support output layouts that are convertible from NCHW." << " But got " << out_layout; IndexExpr channels, dilated_ksize_y, dilated_ksize_x; @@ -1099,16 +1099,20 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a // check the size ICHECK(reporter->AssertEQ(param->kernel_size[0], wshape[2]) && reporter->AssertEQ(param->kernel_size[1], wshape[3])) - << "Conv2D: shape of weight is inconsistent with kernel_size, " + << "Conv2DTransposed: shape of weight is inconsistent with kernel_size, " << " kernel_size=" << param->kernel_size << " wshape=" << Array(wshape); } if (param->channels.defined()) { - ICHECK(reporter->AssertEQ(param->channels, wshape[1])) - << "Conv2D: shape of weight is inconsistent with channels, " + ICHECK(reporter->AssertEQ(param->channels, wshape[0])) + << "Conv2DTransposed: shape of weight is inconsistent with channels, " << " channels=" << param->channels << " wshape=" << Array(wshape); } if (!dshape_nchw[1].as() && !wshape[0].as()) { - ICHECK(reporter->AssertEQ(indexdiv(dshape_nchw[1], param->groups), wshape[0])); + ICHECK(reporter->AssertEQ(indexdiv(dshape_nchw[1], param->groups), wshape[1])) + << "Conv2DTransposed: data.shape[1] // groups != weight.shape[1], " + << " data.shape= " << Array(dshape_nchw) + << " groups= " << param->groups + << " weight.shape= " << Array(wshape); } channels = wshape[1]; dilated_ksize_y = 1 + (wshape[2] - 1) * param->dilation[0]; From 5dc34a92c06dc7c4d7ec00dbf2845130dc5ece7f Mon Sep 17 00:00:00 2001 From: ligeng Date: Mon, 8 Nov 2021 13:22:25 +0000 Subject: [PATCH 02/27] add test case for conv2d transpose --- tests/python/relay/test_op_level2.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/python/relay/test_op_level2.py b/tests/python/relay/test_op_level2.py index da2877063c45..f9c912809208 100644 --- a/tests/python/relay/test_op_level2.py +++ b/tests/python/relay/test_op_level2.py @@ -858,6 +858,30 @@ def test_conv2d_transpose_nchw_run(): ) tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5, atol=1e-5) +# @tvm.testing.uses_gpu +@pytest.mark.skip(reason="Currently Conv2dTranpose only supports groups == 1 as in L284 in python/tvm/relay/op/strategy/x86.py") +def test_conv2d_transpose_nchw_groups_run(): + # Add groups testing + dshape = (1, 16, 18, 18) + kshape = (16, 1, 3, 3) + oshape = (1, 16, 36, 36) + x = relay.var("x", shape=dshape) + w = relay.var("w") + y = relay.nn.conv2d_transpose( + x, w, channels=10, kernel_size=(3, 3), strides=(2, 2), padding=(1, 1), output_padding=(1, 1), groups=16 + ) + func = relay.Function([x, w], y) + dtype = "float32" + data = np.random.uniform(size=dshape).astype(dtype) + kernel = np.random.uniform(size=kshape).astype(dtype) + ref_res = tvm.topi.testing.conv2d_transpose_nchw_python(data, kernel, 2, 1, (1, 1)) + + for target, dev in tvm.testing.enabled_targets(): + op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)( + data, kernel + ) + tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5, atol=1e-5) + @tvm.testing.uses_gpu def test_conv2d_transpose_nhwc_run(): From 2885a5760c98e0745e2b8f585d2f89acd7233481 Mon Sep 17 00:00:00 2001 From: ligeng Date: Mon, 8 Nov 2021 14:59:32 +0000 Subject: [PATCH 03/27] add groups support for conv2d_transpose --- python/tvm/topi/nn/conv2d_transpose.py | 88 ++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/python/tvm/topi/nn/conv2d_transpose.py b/python/tvm/topi/nn/conv2d_transpose.py index 22188bcd45a4..699ca7e72187 100644 --- a/python/tvm/topi/nn/conv2d_transpose.py +++ b/python/tvm/topi/nn/conv2d_transpose.py @@ -16,6 +16,8 @@ # under the License. # pylint: disable=invalid-name, unused-variable, unused-argument """Transposed 2D convolution operators (sometimes called Deconvolution).""" +import collections + import tvm from tvm import te from tvm import relay @@ -24,6 +26,18 @@ from .utils import get_pad_tuple from ..utils import simplify +def _ntuple(n): + def parse(x): + if isinstance(x, collections.abc.Iterable): + assert len(x) == n, f"Input can only have {n} elements, but got {len(x)} instead: {x}." + return x + return tuple(repeat(x, n)) + return parse + +_single = _ntuple(1) +_pair = _ntuple(2) +_triple = _ntuple(3) +_quadruple = _ntuple(4) def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype, output_padding): """Transposed 2D convolution nchw forward operator. @@ -111,11 +125,85 @@ def declaration_conv2d_transpose_impl(data, kernel, strides, padding, out_dtype, axis=[dc, dh, dw], ), tag="conv2d_transpose_nchw", + ) return Output +def group_conv2d_transpose_nchw(data, kernel, strides=1, padding=0, output_padding=0, groups=1, dilation=1, out_dtype=None): + # some pre-processing and prelimnary checks + if out_dtype is None: + out_dtype = data.dtype + + # strides = _pair(strides) + # padding = _pair(padding) + # output_padding = _pair(output_padding) + # dilation = _pair(dilation) + batch, in_channels, in_height, in_width = data.shape + _, out_c, filter_h, filter_w = kernel.shape + assert in_channels % groups == 0, "input channels must divide group size" + # assert out_c % groups == 0, "output channels must divide group size" + + batch, in_c, in_h, in_w = data.shape + _, out_c, filter_h, filter_w = kernel.shape + stride_h, stride_w = strides + opad_h, opad_w = output_padding + assert opad_h < stride_h and opad_w < stride_w, f"[{output_padding}] opad_h:{opad_h} < stride_h:{stride_h} and opad_w:{opad_w} < stride_w:{stride_w} does not satisfy." + # dilate data + data_dilate = dilate(data, [1, 1, stride_h, stride_w], name="data_dilate") + # pad data + fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple(padding, (filter_h, filter_w)) + bpad_top = filter_h - 1 - fpad_top + bpad_bottom = filter_h - 1 - fpad_bottom + opad_h + bpad_left = filter_w - 1 - fpad_left + bpad_right = filter_w - 1 - fpad_right + opad_w + data_pad = pad( + data_dilate, [0, 0, bpad_top, bpad_left], [0, 0, bpad_bottom, bpad_right], name="data_pad" + ) + # transform kernel layout from IOHW to OIHW, and rotate kernel by 180 degrees + kernel_transform = te.compute( + (out_c, in_c, filter_h, filter_w), + lambda i, o, h, w: kernel[o][i][filter_h - 1 - h][filter_w - 1 - w], + name="kernel_transform", + ) + + batch, in_c, in_h, in_w = data_pad.shape + out_c, _, filter_h, filter_w = kernel_transform.shape + + # convolution stage + out_c = simplify(out_c) + out_channels = simplify(out_c * groups) + + out_h = simplify(in_h - filter_h + 1) + out_w = simplify(in_w - filter_w + 1) + dc = te.reduce_axis((0, in_c // groups), name="dc") + dh = te.reduce_axis((0, filter_h), name="dh") + dw = te.reduce_axis((0, filter_w), name="dw") + + # data: batch, in_channels, out_h, out_w + # weight: out_channels // G, in_channels, out_h, out_w + return te.compute( + (batch, out_channels, out_h, out_w), + lambda b, c, h, w: te.sum( + data_pad[ + b, + c // (out_channels // groups) * (in_channels // groups) + dc, + h + dh, + w + dw + ].astype(out_dtype) + * kernel_transform[ + c % (out_channels // groups), + c // (out_channels // groups) * (in_channels // groups) + dc, + dh, + dw + ].astype(out_dtype), + axis=[dc, dh, dw], + ), + tag="conv2d_transpose_nchw", + ) + + @tvm.target.generic_func def conv2d_transpose_legalize(attrs, inputs, types): """Legalizes Transposed 2D convolution op. From 8f5a97908d321103c2b4aa97bbbec6fb934a7b24 Mon Sep 17 00:00:00 2001 From: ligeng Date: Mon, 8 Nov 2021 15:00:40 +0000 Subject: [PATCH 04/27] add naive implementation and schedule for conv2d with groups --- python/tvm/relay/op/strategy/arm_cpu.py | 20 +++++++++---- python/tvm/relay/op/strategy/cuda.py | 20 +++++++++---- python/tvm/relay/op/strategy/generic.py | 37 +++++++++++++++++++++---- python/tvm/relay/op/strategy/x86.py | 21 ++++++++++---- python/tvm/topi/generic/nn.py | 17 ++++++++++++ 5 files changed, 92 insertions(+), 23 deletions(-) diff --git a/python/tvm/relay/op/strategy/arm_cpu.py b/python/tvm/relay/op/strategy/arm_cpu.py index 06dfc87038fe..7dc7806cc30e 100644 --- a/python/tvm/relay/op/strategy/arm_cpu.py +++ b/python/tvm/relay/op/strategy/arm_cpu.py @@ -373,13 +373,21 @@ def conv2d_transpose_strategy_arm_cpu(attrs, inputs, out_type, target): groups = attrs.groups assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" - assert groups == 1, "only support groups == 1 for now" + # assert groups == 1, "only support groups == 1 for now" strategy = _op.OpStrategy() - strategy.add_implementation( - wrap_compute_conv2d_transpose(topi.arm_cpu.conv2d_transpose_nchw), - wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_transpose_nchw), - name="conv2d_tranpose_nchw.arm_cpu", - ) + if groups == 1: + strategy.add_implementation( + wrap_compute_conv2d_transpose(topi.arm_cpu.conv2d_transpose_nchw), + wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_transpose_nchw), + name="conv2d_tranpose_nchw.arm_cpu", + ) + else: + # FIXME: Here should be a specialized implementation and schedule instead general one. + strategy.add_implementation( + wrap_compute_group_conv2d_transpose(topi.nn.group_conv2d_transpose_nchw), + wrap_topi_schedule(topi.generic.schedule_group_conv2d_transpose_nchw), + name="group_conv2d_transpose_nchw.arm_cpu", + ) return strategy diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index 5f24dbda9d35..e139bb4d6c5e 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -557,13 +557,21 @@ def conv2d_transpose_strategy_cuda(attrs, inputs, out_type, target): groups = attrs.groups assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" - assert groups == 1, "only support groups == 1 for now" + # assert groups == 1, "only support groups == 1 for now" strategy = _op.OpStrategy() - strategy.add_implementation( - wrap_compute_conv2d_transpose(topi.cuda.conv2d_transpose_nchw), - wrap_topi_schedule(topi.cuda.schedule_conv2d_transpose_nchw), - name="conv2d_transpose_nchw.cuda", - ) + if groups == 1: + strategy.add_implementation( + wrap_compute_conv2d_transpose(topi.cuda.conv2d_transpose_nchw), + wrap_topi_schedule(topi.cuda.schedule_conv2d_transpose_nchw), + name="conv2d_transpose_nchw.cuda", + ) + else: + # FIXME: Here should be a specialized implementation and schedule instead general one. + strategy.add_implementation( + wrap_compute_group_conv2d_transpose(topi.nn.group_conv2d_transpose_nchw), + wrap_topi_schedule(topi.generic.schedule_group_conv2d_transpose_nchw), + name="group_conv2d_transpose_nchw.cuda", + ) return strategy diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index 777f17ba6084..6b35a87f1ad0 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -461,6 +461,26 @@ def compute_conv2d_transpose(attrs, inputs, out_dtype): return compute_conv2d_transpose +# FIXME: This is a temporal fix for groups > 1 in Conv2dTranspose. This should be merged with above functions in the future. +def wrap_compute_group_conv2d_transpose(topi_compute): + """wrap conv2d_transpose topi compute""" + def compute_conv2d_transpose(attrs, inputs, out_dtype): + """Compute definition of conv2d_transpose""" + padding = get_const_tuple(attrs.padding) + strides = get_const_tuple(attrs.strides) + out_dtype = attrs.out_dtype + out_dtype = inputs[0].dtype if out_dtype in ("same", "") else out_dtype + groups = attrs.groups + dilation = attrs.dilation + output_padding = get_const_tuple(attrs.output_padding) + out = topi_compute(inputs[0], inputs[1], strides, padding, + output_padding, groups, dilation, out_dtype) + return [out] + + return compute_conv2d_transpose + + + @override_native_generic_func("conv2d_transpose_strategy") def conv2d_transpose_strategy(attrs, inputs, out_type, target): @@ -473,11 +493,18 @@ def conv2d_transpose_strategy(attrs, inputs, out_type, target): assert dilation == (1, 1), "not support dilate now" assert groups == 1, "only support groups == 1 for now" strategy = _op.OpStrategy() - strategy.add_implementation( - wrap_compute_conv2d_transpose(topi.nn.conv2d_transpose_nchw), - wrap_topi_schedule(topi.generic.schedule_conv2d_transpose_nchw), - name="conv2d_transpose_nchw.generic", - ) + if groups == 1: + strategy.add_implementation( + wrap_compute_conv2d_transpose(topi.nn.conv2d_transpose_nchw), + wrap_topi_schedule(topi.generic.schedule_conv2d_transpose_nchw), + name="conv2d_transpose_nchw.generic", + ) + else: + strategy.add_implementation( + wrap_compute_group_conv2d_transpose(topi.nn.group_conv2d_transpose_nchw), + wrap_topi_schedule(topi.generic.schedule_group_conv2d_transpose_nchw), + name="group_conv2d_transpose_nchw.generic", + ) return strategy diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py index 1c8d1b478cb1..99a80942ceb7 100644 --- a/python/tvm/relay/op/strategy/x86.py +++ b/python/tvm/relay/op/strategy/x86.py @@ -281,13 +281,22 @@ def conv2d_transpose_strategy_cpu(attrs, inputs, out_type, target): groups = attrs.groups assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" - assert groups == 1, "only support groups == 1 for now" + # assert groups == 1, "only support groups == 1 for now" strategy = _op.OpStrategy() - strategy.add_implementation( - wrap_compute_conv2d_transpose(topi.x86.conv2d_transpose_nchw), - wrap_topi_schedule(topi.x86.schedule_conv2d_transpose_nchw), - name="conv2d_transpose_nchw.x86", - ) + + if groups == 1: + strategy.add_implementation( + wrap_compute_conv2d_transpose(topi.x86.conv2d_transpose_nchw), + wrap_topi_schedule(topi.x86.schedule_conv2d_transpose_nchw), + name="conv2d_transpose_nchw.x86", + ) + else: + strategy.add_implementation( + wrap_compute_group_conv2d_transpose(topi.nn.group_conv2d_transpose_nchw), + wrap_topi_schedule(topi.generic.schedule_group_conv2d_transpose_nchw), + # FIXME: Here should be a manually written schedule instead default one. + name="group_conv2d_transpose_nchw.x86", + ) return strategy diff --git a/python/tvm/topi/generic/nn.py b/python/tvm/topi/generic/nn.py index 1b3214154687..22a90aa2cd07 100644 --- a/python/tvm/topi/generic/nn.py +++ b/python/tvm/topi/generic/nn.py @@ -428,6 +428,23 @@ def schedule_group_conv2d_nchw(outs): return _default_schedule(outs, False) +def schedule_group_conv2d_transpose_nchw(outs): + """Schedule for group_conv2d_transpose_nchw + + Parameters + ---------- + outs: Array of Tensor + The computation graph description of group_conv2d_nhwc + in the format of an array of tensors. + + Returns + ------- + sch: Schedule + The computation schedule for the op. + """ + return _default_schedule(outs, False) + + def schedule_group_conv2d_nhwc(outs): """Schedule for group_conv2d_nhwc From 9bea044ba5b54eaa8c283d004d24d8e9b76f10f6 Mon Sep 17 00:00:00 2001 From: ligeng Date: Mon, 8 Nov 2021 16:02:38 +0000 Subject: [PATCH 05/27] enable tests for cpu and arm_cpu, raise error for cuda platform --- python/tvm/relay/op/strategy/cuda.py | 3 ++- tests/python/relay/test_op_level2.py | 14 ++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index e139bb4d6c5e..05bb26ecc399 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -566,10 +566,11 @@ def conv2d_transpose_strategy_cuda(attrs, inputs, out_type, target): name="conv2d_transpose_nchw.cuda", ) else: + raise NotImplementedError("CUDA schedule is not enable for conv2d transpose when groups > 1. See https://github.com/apache/tvm/pull/9465 for details.") # FIXME: Here should be a specialized implementation and schedule instead general one. strategy.add_implementation( wrap_compute_group_conv2d_transpose(topi.nn.group_conv2d_transpose_nchw), - wrap_topi_schedule(topi.generic.schedule_group_conv2d_transpose_nchw), + wrap_topi_schedule(topi.cuda.schedule_group_conv2d_transpose_nchw), name="group_conv2d_transpose_nchw.cuda", ) return strategy diff --git a/tests/python/relay/test_op_level2.py b/tests/python/relay/test_op_level2.py index f9c912809208..ecd479d53e42 100644 --- a/tests/python/relay/test_op_level2.py +++ b/tests/python/relay/test_op_level2.py @@ -858,8 +858,8 @@ def test_conv2d_transpose_nchw_run(): ) tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5, atol=1e-5) -# @tvm.testing.uses_gpu -@pytest.mark.skip(reason="Currently Conv2dTranpose only supports groups == 1 as in L284 in python/tvm/relay/op/strategy/x86.py") + +@tvm.testing.uses_gpu def test_conv2d_transpose_nchw_groups_run(): # Add groups testing dshape = (1, 16, 18, 18) @@ -868,19 +868,21 @@ def test_conv2d_transpose_nchw_groups_run(): x = relay.var("x", shape=dshape) w = relay.var("w") y = relay.nn.conv2d_transpose( - x, w, channels=10, kernel_size=(3, 3), strides=(2, 2), padding=(1, 1), output_padding=(1, 1), groups=16 + x, w, channels=16, kernel_size=(3, 3), strides=(2, 2), padding=(1, 1), output_padding=(1, 1), groups=16 ) func = relay.Function([x, w], y) dtype = "float32" data = np.random.uniform(size=dshape).astype(dtype) kernel = np.random.uniform(size=kshape).astype(dtype) - ref_res = tvm.topi.testing.conv2d_transpose_nchw_python(data, kernel, 2, 1, (1, 1)) - + # ref_res = tvm.topi.testing.conv2d_transpose_nchw_python(data, kernel, 2, 1, (1, 1)) + # FIXME: the testing.conv2d_transpose does not support groups yet. Currently only test whether it is exectuable for target, dev in tvm.testing.enabled_targets(): + if "cuda" in target: + continue op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)( data, kernel ) - tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5, atol=1e-5) + # tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-7, atol=1e-7) @tvm.testing.uses_gpu From a1c7308668ca66931681b05e533ae2b46e4b8c2e Mon Sep 17 00:00:00 2001 From: ligeng Date: Wed, 10 Nov 2021 17:17:16 +0000 Subject: [PATCH 06/27] revert the cuda and generic strategy --- python/tvm/relay/op/strategy/cuda.py | 23 +++++---------- python/tvm/relay/op/strategy/generic.py | 39 ++++--------------------- 2 files changed, 13 insertions(+), 49 deletions(-) diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index 05bb26ecc399..2c33837de89e 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -557,22 +557,13 @@ def conv2d_transpose_strategy_cuda(attrs, inputs, out_type, target): groups = attrs.groups assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" - # assert groups == 1, "only support groups == 1 for now" + assert groups == 1, "only support groups == 1 for now" strategy = _op.OpStrategy() - if groups == 1: - strategy.add_implementation( - wrap_compute_conv2d_transpose(topi.cuda.conv2d_transpose_nchw), - wrap_topi_schedule(topi.cuda.schedule_conv2d_transpose_nchw), - name="conv2d_transpose_nchw.cuda", - ) - else: - raise NotImplementedError("CUDA schedule is not enable for conv2d transpose when groups > 1. See https://github.com/apache/tvm/pull/9465 for details.") - # FIXME: Here should be a specialized implementation and schedule instead general one. - strategy.add_implementation( - wrap_compute_group_conv2d_transpose(topi.nn.group_conv2d_transpose_nchw), - wrap_topi_schedule(topi.cuda.schedule_group_conv2d_transpose_nchw), - name="group_conv2d_transpose_nchw.cuda", - ) + strategy.add_implementation( + wrap_compute_conv2d_transpose(topi.cuda.conv2d_transpose_nchw), + wrap_topi_schedule(topi.cuda.schedule_conv2d_transpose_nchw), + name="conv2d_transpose_nchw.cuda", + ) return strategy @@ -1248,4 +1239,4 @@ def einsum_strategy_cuda(attrs, inputs, out_type, target): wrap_topi_schedule(topi.generic.schedule_extern), name="einsum.cuda", ) - return strategy + return strategy \ No newline at end of file diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index 6b35a87f1ad0..8ef54ceaca2e 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -461,26 +461,6 @@ def compute_conv2d_transpose(attrs, inputs, out_dtype): return compute_conv2d_transpose -# FIXME: This is a temporal fix for groups > 1 in Conv2dTranspose. This should be merged with above functions in the future. -def wrap_compute_group_conv2d_transpose(topi_compute): - """wrap conv2d_transpose topi compute""" - def compute_conv2d_transpose(attrs, inputs, out_dtype): - """Compute definition of conv2d_transpose""" - padding = get_const_tuple(attrs.padding) - strides = get_const_tuple(attrs.strides) - out_dtype = attrs.out_dtype - out_dtype = inputs[0].dtype if out_dtype in ("same", "") else out_dtype - groups = attrs.groups - dilation = attrs.dilation - output_padding = get_const_tuple(attrs.output_padding) - out = topi_compute(inputs[0], inputs[1], strides, padding, - output_padding, groups, dilation, out_dtype) - return [out] - - return compute_conv2d_transpose - - - @override_native_generic_func("conv2d_transpose_strategy") def conv2d_transpose_strategy(attrs, inputs, out_type, target): @@ -493,18 +473,11 @@ def conv2d_transpose_strategy(attrs, inputs, out_type, target): assert dilation == (1, 1), "not support dilate now" assert groups == 1, "only support groups == 1 for now" strategy = _op.OpStrategy() - if groups == 1: - strategy.add_implementation( - wrap_compute_conv2d_transpose(topi.nn.conv2d_transpose_nchw), - wrap_topi_schedule(topi.generic.schedule_conv2d_transpose_nchw), - name="conv2d_transpose_nchw.generic", - ) - else: - strategy.add_implementation( - wrap_compute_group_conv2d_transpose(topi.nn.group_conv2d_transpose_nchw), - wrap_topi_schedule(topi.generic.schedule_group_conv2d_transpose_nchw), - name="group_conv2d_transpose_nchw.generic", - ) + strategy.add_implementation( + wrap_compute_conv2d_transpose(topi.nn.conv2d_transpose_nchw), + wrap_topi_schedule(topi.generic.schedule_conv2d_transpose_nchw), + name="conv2d_transpose_nchw.generic", + ) return strategy @@ -1757,4 +1730,4 @@ def einsum_strategy(attrs, inputs, out_type, target): wrap_topi_schedule(topi.generic.schedule_einsum), name="einsum.generic", ) - return strategy + return strategy \ No newline at end of file From fcc2f00d54a97bad166a8a04c5edd49974acd517 Mon Sep 17 00:00:00 2001 From: ligeng Date: Wed, 10 Nov 2021 17:42:36 +0000 Subject: [PATCH 07/27] revert back the x86 strategy --- python/tvm/relay/op/strategy/x86.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py index 99a80942ceb7..8e032d6e8115 100644 --- a/python/tvm/relay/op/strategy/x86.py +++ b/python/tvm/relay/op/strategy/x86.py @@ -281,22 +281,13 @@ def conv2d_transpose_strategy_cpu(attrs, inputs, out_type, target): groups = attrs.groups assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" - # assert groups == 1, "only support groups == 1 for now" + assert groups == 1, "only support groups == 1 for now" strategy = _op.OpStrategy() - - if groups == 1: - strategy.add_implementation( - wrap_compute_conv2d_transpose(topi.x86.conv2d_transpose_nchw), - wrap_topi_schedule(topi.x86.schedule_conv2d_transpose_nchw), - name="conv2d_transpose_nchw.x86", - ) - else: - strategy.add_implementation( - wrap_compute_group_conv2d_transpose(topi.nn.group_conv2d_transpose_nchw), - wrap_topi_schedule(topi.generic.schedule_group_conv2d_transpose_nchw), - # FIXME: Here should be a manually written schedule instead default one. - name="group_conv2d_transpose_nchw.x86", - ) + strategy.add_implementation( + wrap_compute_conv2d_transpose(topi.x86.conv2d_transpose_nchw), + wrap_topi_schedule(topi.x86.schedule_conv2d_transpose_nchw), + name="conv2d_transpose_nchw.x86", + ) return strategy @@ -691,4 +682,4 @@ def conv2d_winograd_without_weight_transfrom_strategy_cpu(attrs, inputs, out_typ raise RuntimeError( "Unsupported conv2d_winograd_without_weight_transfrom layout {}".format(layout) ) - return strategy + return strategy \ No newline at end of file From 59e807baf7fa9fdacbfc3510115c9f12120081aa Mon Sep 17 00:00:00 2001 From: ligeng Date: Wed, 10 Nov 2021 17:44:33 +0000 Subject: [PATCH 08/27] revert back the arm_cpu strategy --- python/tvm/relay/op/strategy/arm_cpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/relay/op/strategy/arm_cpu.py b/python/tvm/relay/op/strategy/arm_cpu.py index 7dc7806cc30e..10db6cc86324 100644 --- a/python/tvm/relay/op/strategy/arm_cpu.py +++ b/python/tvm/relay/op/strategy/arm_cpu.py @@ -422,4 +422,4 @@ def schedule_bitserial_dense_arm_cpu(attrs, inputs, out_type, target): wrap_topi_schedule(topi.arm_cpu.schedule_bitserial_dense), name="bitserial_dense.arm_cpu", ) - return strategy + return strategy \ No newline at end of file From 3eacec4070f55bd456ef2e86c7daeaf0005c6012 Mon Sep 17 00:00:00 2001 From: ligeng Date: Wed, 10 Nov 2021 17:46:09 +0000 Subject: [PATCH 09/27] revert back the arm_cpu strategy --- python/tvm/relay/op/strategy/arm_cpu.py | 2 +- python/tvm/relay/op/strategy/cuda.py | 2 +- python/tvm/relay/op/strategy/generic.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tvm/relay/op/strategy/arm_cpu.py b/python/tvm/relay/op/strategy/arm_cpu.py index 10db6cc86324..7dc7806cc30e 100644 --- a/python/tvm/relay/op/strategy/arm_cpu.py +++ b/python/tvm/relay/op/strategy/arm_cpu.py @@ -422,4 +422,4 @@ def schedule_bitserial_dense_arm_cpu(attrs, inputs, out_type, target): wrap_topi_schedule(topi.arm_cpu.schedule_bitserial_dense), name="bitserial_dense.arm_cpu", ) - return strategy \ No newline at end of file + return strategy diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index 2c33837de89e..5f24dbda9d35 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -1239,4 +1239,4 @@ def einsum_strategy_cuda(attrs, inputs, out_type, target): wrap_topi_schedule(topi.generic.schedule_extern), name="einsum.cuda", ) - return strategy \ No newline at end of file + return strategy diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index 8ef54ceaca2e..777f17ba6084 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -1730,4 +1730,4 @@ def einsum_strategy(attrs, inputs, out_type, target): wrap_topi_schedule(topi.generic.schedule_einsum), name="einsum.generic", ) - return strategy \ No newline at end of file + return strategy From 478ff9aad244d20cb241a9c902ebb4b99df145e7 Mon Sep 17 00:00:00 2001 From: ligeng Date: Wed, 10 Nov 2021 17:47:43 +0000 Subject: [PATCH 10/27] revert back the arm_cpu strategy --- python/tvm/relay/op/strategy/arm_cpu.py | 20 ++++++-------------- src/relay/op/nn/convolution.h | 13 +++++++------ 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/python/tvm/relay/op/strategy/arm_cpu.py b/python/tvm/relay/op/strategy/arm_cpu.py index 7dc7806cc30e..06dfc87038fe 100644 --- a/python/tvm/relay/op/strategy/arm_cpu.py +++ b/python/tvm/relay/op/strategy/arm_cpu.py @@ -373,21 +373,13 @@ def conv2d_transpose_strategy_arm_cpu(attrs, inputs, out_type, target): groups = attrs.groups assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" - # assert groups == 1, "only support groups == 1 for now" + assert groups == 1, "only support groups == 1 for now" strategy = _op.OpStrategy() - if groups == 1: - strategy.add_implementation( - wrap_compute_conv2d_transpose(topi.arm_cpu.conv2d_transpose_nchw), - wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_transpose_nchw), - name="conv2d_tranpose_nchw.arm_cpu", - ) - else: - # FIXME: Here should be a specialized implementation and schedule instead general one. - strategy.add_implementation( - wrap_compute_group_conv2d_transpose(topi.nn.group_conv2d_transpose_nchw), - wrap_topi_schedule(topi.generic.schedule_group_conv2d_transpose_nchw), - name="group_conv2d_transpose_nchw.arm_cpu", - ) + strategy.add_implementation( + wrap_compute_conv2d_transpose(topi.arm_cpu.conv2d_transpose_nchw), + wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_transpose_nchw), + name="conv2d_tranpose_nchw.arm_cpu", + ) return strategy diff --git a/src/relay/op/nn/convolution.h b/src/relay/op/nn/convolution.h index a46a6ddec76d..717cfa426a61 100644 --- a/src/relay/op/nn/convolution.h +++ b/src/relay/op/nn/convolution.h @@ -1044,7 +1044,7 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a if (data == nullptr) return false; static const Layout kNCHW("NCHW"); - static const Layout kOIHW("OIHW"); + static const Layout kOIHW("OIHW"); // FIXME: weight layout should be IOHW as discussed in https://github.com/apache/tvm/pull/9336 const Conv2DTransposeAttrs* param = attrs.as(); ICHECK(param != nullptr); @@ -1103,13 +1103,14 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a << " kernel_size=" << param->kernel_size << " wshape=" << Array(wshape); } if (param->channels.defined()) { - ICHECK(reporter->AssertEQ(param->channels, wshape[0])) - << "Conv2DTransposed: shape of weight is inconsistent with channels, " - << " channels=" << param->channels << " wshape=" << Array(wshape); + ICHECK(reporter->AssertEQ(indexdiv(param->channels, param->groups), wshape[1])) + << "Conv2DTransposed: shape of weight is inconsistent with out_channels, " + << " out_channels // groups != weigt.shape[1] " + << " out_channels=" << param->channels << " weigt.shape=" << Array(wshape); } if (!dshape_nchw[1].as() && !wshape[0].as()) { - ICHECK(reporter->AssertEQ(indexdiv(dshape_nchw[1], param->groups), wshape[1])) - << "Conv2DTransposed: data.shape[1] // groups != weight.shape[1], " + ICHECK(reporter->AssertEQ(dshape_nchw[1], wshape[0])) + << "Conv2DTransposed: shape of weight is inconsistent with in_channels." << " data.shape= " << Array(dshape_nchw) << " groups= " << param->groups << " weight.shape= " << Array(wshape); From 11c2c7564ff2d49625845a988cba506a1c034fcf Mon Sep 17 00:00:00 2001 From: ligeng Date: Wed, 10 Nov 2021 17:49:11 +0000 Subject: [PATCH 11/27] fix EOF of x86 --- python/tvm/relay/op/strategy/x86.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py index 8e032d6e8115..1c8d1b478cb1 100644 --- a/python/tvm/relay/op/strategy/x86.py +++ b/python/tvm/relay/op/strategy/x86.py @@ -682,4 +682,4 @@ def conv2d_winograd_without_weight_transfrom_strategy_cpu(attrs, inputs, out_typ raise RuntimeError( "Unsupported conv2d_winograd_without_weight_transfrom layout {}".format(layout) ) - return strategy \ No newline at end of file + return strategy From 064ee1a4ae97fc4640f39a6066c19d09f1883bc2 Mon Sep 17 00:00:00 2001 From: ligeng Date: Wed, 10 Nov 2021 17:52:42 +0000 Subject: [PATCH 12/27] clang lint updated c++ code --- src/relay/op/nn/convolution.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/relay/op/nn/convolution.h b/src/relay/op/nn/convolution.h index 717cfa426a61..70f480e8d8cf 100644 --- a/src/relay/op/nn/convolution.h +++ b/src/relay/op/nn/convolution.h @@ -1105,8 +1105,8 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a if (param->channels.defined()) { ICHECK(reporter->AssertEQ(indexdiv(param->channels, param->groups), wshape[1])) << "Conv2DTransposed: shape of weight is inconsistent with out_channels, " - << " out_channels // groups != weigt.shape[1] " - << " out_channels=" << param->channels << " weigt.shape=" << Array(wshape); + << " out_channels // groups != weight.shape[1] " + << " out_channels=" << param->channels << " weight.shape=" << Array(wshape); } if (!dshape_nchw[1].as() && !wshape[0].as()) { ICHECK(reporter->AssertEQ(dshape_nchw[1], wshape[0])) From efea36a171c8bf6fbe3873e369d11d00af3a52f9 Mon Sep 17 00:00:00 2001 From: ligeng Date: Wed, 10 Nov 2021 18:07:10 +0000 Subject: [PATCH 13/27] update topi implementation --- python/tvm/topi/nn/conv2d_transpose.py | 61 +++++++++++++++++++++----- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/python/tvm/topi/nn/conv2d_transpose.py b/python/tvm/topi/nn/conv2d_transpose.py index 699ca7e72187..652a6a8f5c99 100644 --- a/python/tvm/topi/nn/conv2d_transpose.py +++ b/python/tvm/topi/nn/conv2d_transpose.py @@ -39,6 +39,7 @@ def parse(x): _triple = _ntuple(3) _quadruple = _ntuple(4) + def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype, output_padding): """Transposed 2D convolution nchw forward operator. @@ -131,25 +132,63 @@ def declaration_conv2d_transpose_impl(data, kernel, strides, padding, out_dtype, return Output -def group_conv2d_transpose_nchw(data, kernel, strides=1, padding=0, output_padding=0, groups=1, dilation=1, out_dtype=None): +def group_conv2d_transpose_nchw(data, kernel, stride, padding, out_dtype, output_padding, groups): + """Group convolution operator in NCHW layout. + + Parameters + ---------- + data : tvm.te.Tensor + 4-D with shape [batch, in_channel, in_height, in_width] + + kernel : tvm.te.Tensor + 4-D with shape [in_channel, out_channel // groups, filter_height, filter_width] + + stride : int or a list/tuple of two ints + Stride size, or [stride_height, stride_width] + + padding : int or a list/tuple of 2 or 4 ints + padding size, or + [pad_height, pad_width] for 2 ints, or + [pad_top, pad_left, pad_bottom, pad_right] for 4 ints + + out_dtype : str + The output data type. This is used for mixed precision. + + output_padding : tuple of ints + Used to get the right output shape for gradients + + groups : int + number of groups + + out_dtype : str + The output type. This is used for mixed precision. + + Returns + ------- + Output : tvm.te.Tensor + 4-D with shape [batch, out_channel, out_height, out_width] + """ + if groups == 1: + return conv2d_transpose_nchw(data, kernel, stride, padding, out_dtype, output_padding) + # some pre-processing and prelimnary checks if out_dtype is None: out_dtype = data.dtype - # strides = _pair(strides) + batch, in_c, in_h, in_w = data.shape + _, out_c, filter_h, filter_w = kernel.shape + # assert in_channels % groups == 0, "input channels must divide group size" + assert out_c % groups == 0, "output channels must divide group size" + + strides = _pair(strides) # padding = _pair(padding) # output_padding = _pair(output_padding) # dilation = _pair(dilation) - batch, in_channels, in_height, in_width = data.shape - _, out_c, filter_h, filter_w = kernel.shape - assert in_channels % groups == 0, "input channels must divide group size" - # assert out_c % groups == 0, "output channels must divide group size" - - batch, in_c, in_h, in_w = data.shape - _, out_c, filter_h, filter_w = kernel.shape + stride_h, stride_w = strides opad_h, opad_w = output_padding - assert opad_h < stride_h and opad_w < stride_w, f"[{output_padding}] opad_h:{opad_h} < stride_h:{stride_h} and opad_w:{opad_w} < stride_w:{stride_w} does not satisfy." + assert opad_h < stride_h and opad_w < stride_w, \ + f"[{output_padding}] opad_h:{opad_h} < stride_h:{stride_h} and opad_w:{opad_w} < stride_w:{stride_w} does not satisfy." # dilate data data_dilate = dilate(data, [1, 1, stride_h, stride_w], name="data_dilate") # pad data @@ -200,7 +239,7 @@ def group_conv2d_transpose_nchw(data, kernel, strides=1, padding=0, output_paddi ].astype(out_dtype), axis=[dc, dh, dw], ), - tag="conv2d_transpose_nchw", + tag="group_conv2d_transpose_nchw", ) From 7e212001c931592fbb57219902523f5eea644df0 Mon Sep 17 00:00:00 2001 From: ligeng Date: Wed, 10 Nov 2021 18:08:13 +0000 Subject: [PATCH 14/27] Revert test --- tests/python/relay/test_op_level2.py | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/tests/python/relay/test_op_level2.py b/tests/python/relay/test_op_level2.py index ecd479d53e42..bb74795def7d 100644 --- a/tests/python/relay/test_op_level2.py +++ b/tests/python/relay/test_op_level2.py @@ -859,32 +859,6 @@ def test_conv2d_transpose_nchw_run(): tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-5, atol=1e-5) -@tvm.testing.uses_gpu -def test_conv2d_transpose_nchw_groups_run(): - # Add groups testing - dshape = (1, 16, 18, 18) - kshape = (16, 1, 3, 3) - oshape = (1, 16, 36, 36) - x = relay.var("x", shape=dshape) - w = relay.var("w") - y = relay.nn.conv2d_transpose( - x, w, channels=16, kernel_size=(3, 3), strides=(2, 2), padding=(1, 1), output_padding=(1, 1), groups=16 - ) - func = relay.Function([x, w], y) - dtype = "float32" - data = np.random.uniform(size=dshape).astype(dtype) - kernel = np.random.uniform(size=kshape).astype(dtype) - # ref_res = tvm.topi.testing.conv2d_transpose_nchw_python(data, kernel, 2, 1, (1, 1)) - # FIXME: the testing.conv2d_transpose does not support groups yet. Currently only test whether it is exectuable - for target, dev in tvm.testing.enabled_targets(): - if "cuda" in target: - continue - op_res1 = relay.create_executor("graph", device=dev, target=target).evaluate(func)( - data, kernel - ) - # tvm.testing.assert_allclose(op_res1.numpy(), ref_res, rtol=1e-7, atol=1e-7) - - @tvm.testing.uses_gpu def test_conv2d_transpose_nhwc_run(): dshape_nhwc = (1, 18, 18, 3) @@ -1913,4 +1887,4 @@ def _test_correlation( if __name__ == "__main__": - sys.exit(pytest.main(sys.argv)) + sys.exit(pytest.main(sys.argv)) \ No newline at end of file From 14c771bd9a592481c8561b6e51aaefac1d3a6fbe Mon Sep 17 00:00:00 2001 From: ligeng Date: Wed, 10 Nov 2021 18:08:36 +0000 Subject: [PATCH 15/27] Revert test --- tests/python/relay/test_op_level2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/relay/test_op_level2.py b/tests/python/relay/test_op_level2.py index bb74795def7d..da2877063c45 100644 --- a/tests/python/relay/test_op_level2.py +++ b/tests/python/relay/test_op_level2.py @@ -1887,4 +1887,4 @@ def _test_correlation( if __name__ == "__main__": - sys.exit(pytest.main(sys.argv)) \ No newline at end of file + sys.exit(pytest.main(sys.argv)) From 8d48f22eca1875ca8098bd5136f9722252e057d9 Mon Sep 17 00:00:00 2001 From: ligeng Date: Wed, 10 Nov 2021 19:28:59 +0000 Subject: [PATCH 16/27] add generic/x86/arm specialization for conv2d_transpose with groups > 1 --- python/tvm/relay/op/strategy/cuda.py | 2 +- python/tvm/relay/op/strategy/generic.py | 32 ++++++++++++++++++------- python/tvm/relay/op/strategy/x86.py | 24 ++++++++++++++----- python/tvm/topi/nn/conv2d_transpose.py | 16 ++++++------- 4 files changed, 50 insertions(+), 24 deletions(-) diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index 5f24dbda9d35..eee5d9a685b3 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -557,7 +557,7 @@ def conv2d_transpose_strategy_cuda(attrs, inputs, out_type, target): groups = attrs.groups assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" - assert groups == 1, "only support groups == 1 for now" + assert groups == 1, "only support groups == 1 when targetting cuda/gpu" strategy = _op.OpStrategy() strategy.add_implementation( wrap_compute_conv2d_transpose(topi.cuda.conv2d_transpose_nchw), diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index 777f17ba6084..a82b4c8f708a 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -446,7 +446,7 @@ def deformable_conv2d_strategy(attrs, inputs, out_type, target): # conv2d_transpose -def wrap_compute_conv2d_transpose(topi_compute): +def wrap_compute_conv2d_transpose(topi_compute, has_groups=False): """wrap conv2d_transpose topi compute""" def compute_conv2d_transpose(attrs, inputs, out_dtype): @@ -456,7 +456,11 @@ def compute_conv2d_transpose(attrs, inputs, out_dtype): out_dtype = attrs.out_dtype out_dtype = inputs[0].dtype if out_dtype in ("same", "") else out_dtype output_padding = get_const_tuple(attrs.output_padding) - out = topi_compute(inputs[0], inputs[1], strides, padding, out_dtype, output_padding) + # out = topi_compute(inputs[0], inputs[1], strides, padding, out_dtype, output_padding) + args = [inputs[0], inputs[1], strides, padding, out_dtype, output_padding] + if has_groups: + args.append(attrs.groups) + out = topi_compute(*args) return [out] return compute_conv2d_transpose @@ -471,13 +475,25 @@ def conv2d_transpose_strategy(attrs, inputs, out_type, target): groups = attrs.groups assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" - assert groups == 1, "only support groups == 1 for now" + # assert groups == 1, "only support groups == 1 for now" strategy = _op.OpStrategy() - strategy.add_implementation( - wrap_compute_conv2d_transpose(topi.nn.conv2d_transpose_nchw), - wrap_topi_schedule(topi.generic.schedule_conv2d_transpose_nchw), - name="conv2d_transpose_nchw.generic", - ) + # strategy.add_implementation( + # wrap_compute_conv2d_transpose(topi.nn.conv2d_transpose_nchw), + # wrap_topi_schedule(topi.generic.schedule_conv2d_transpose_nchw), + # name="conv2d_transpose_nchw.generic", + # ) + if groups == 1: + strategy.add_implementation( + wrap_compute_conv2d_transpose(topi.nn.conv2d_transpose_nchw), + wrap_topi_schedule(topi.generic.schedule_conv2d_transpose_nchw), + name="conv2d_transpose_nchw.generic", + ) + else: # group_transpose_conv2d + strategy.add_implementation( + wrap_compute_conv2d_transpose(topi.nn.group_conv2d_transpose_nchw, has_groups=True), + wrap_topi_schedule(topi.generic.schedule_group_conv2d_transpose_nchw), + name="group_conv2d_transpose_nchw.generic", + ) return strategy diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py index 1c8d1b478cb1..b25cf35b2000 100644 --- a/python/tvm/relay/op/strategy/x86.py +++ b/python/tvm/relay/op/strategy/x86.py @@ -281,13 +281,25 @@ def conv2d_transpose_strategy_cpu(attrs, inputs, out_type, target): groups = attrs.groups assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" - assert groups == 1, "only support groups == 1 for now" + # assert groups == 1, "only support groups == 1 for now" strategy = _op.OpStrategy() - strategy.add_implementation( - wrap_compute_conv2d_transpose(topi.x86.conv2d_transpose_nchw), - wrap_topi_schedule(topi.x86.schedule_conv2d_transpose_nchw), - name="conv2d_transpose_nchw.x86", - ) + # strategy.add_implementation( + # wrap_compute_conv2d_transpose(topi.x86.conv2d_transpose_nchw), + # wrap_topi_schedule(topi.x86.schedule_conv2d_transpose_nchw), + # name="conv2d_transpose_nchw.x86", + # ) + if groups == 1: + strategy.add_implementation( + wrap_compute_conv2d_transpose(topi.x86.conv2d_transpose_nchw), + wrap_topi_schedule(topi.x86.schedule_conv2d_transpose_nchw), + name="conv2d_transpose_nchw.x86", + ) + else: + strategy.add_implementation( + wrap_compute_conv2d_transpose(topi.nn.group_conv2d_transpose_nchw, has_groups=True), + wrap_topi_schedule(topi.generic.schedule_group_conv2d_transpose_nchw), + name="group_conv2d_transpose_nchw.x86", + ) return strategy diff --git a/python/tvm/topi/nn/conv2d_transpose.py b/python/tvm/topi/nn/conv2d_transpose.py index 652a6a8f5c99..8173b6c002ef 100644 --- a/python/tvm/topi/nn/conv2d_transpose.py +++ b/python/tvm/topi/nn/conv2d_transpose.py @@ -126,7 +126,6 @@ def declaration_conv2d_transpose_impl(data, kernel, strides, padding, out_dtype, axis=[dc, dh, dw], ), tag="conv2d_transpose_nchw", - ) return Output @@ -175,12 +174,12 @@ def group_conv2d_transpose_nchw(data, kernel, stride, padding, out_dtype, output if out_dtype is None: out_dtype = data.dtype - batch, in_c, in_h, in_w = data.shape + batch, in_channels, in_h, in_w = data.shape _, out_c, filter_h, filter_w = kernel.shape - # assert in_channels % groups == 0, "input channels must divide group size" - assert out_c % groups == 0, "output channels must divide group size" + assert in_channels % groups == 0, f"input channels {in_channels} must divide group size {groups}" + # assert out_c % groups == 0, f"output channels {in_c} must divide group size {groups}" - strides = _pair(strides) + strides = _pair(stride) # padding = _pair(padding) # output_padding = _pair(output_padding) # dilation = _pair(dilation) @@ -202,21 +201,20 @@ def group_conv2d_transpose_nchw(data, kernel, stride, padding, out_dtype, output ) # transform kernel layout from IOHW to OIHW, and rotate kernel by 180 degrees kernel_transform = te.compute( - (out_c, in_c, filter_h, filter_w), + (out_c, in_channels, filter_h, filter_w), lambda i, o, h, w: kernel[o][i][filter_h - 1 - h][filter_w - 1 - w], name="kernel_transform", ) - batch, in_c, in_h, in_w = data_pad.shape + batch, in_channels, in_h, in_w = data_pad.shape out_c, _, filter_h, filter_w = kernel_transform.shape # convolution stage - out_c = simplify(out_c) out_channels = simplify(out_c * groups) out_h = simplify(in_h - filter_h + 1) out_w = simplify(in_w - filter_w + 1) - dc = te.reduce_axis((0, in_c // groups), name="dc") + dc = te.reduce_axis((0, in_channels // groups), name="dc") dh = te.reduce_axis((0, filter_h), name="dh") dw = te.reduce_axis((0, filter_w), name="dw") From 2d766f2117cbe68c5e7396c3a2219dc39559ed53 Mon Sep 17 00:00:00 2001 From: ligeng Date: Wed, 10 Nov 2021 19:32:26 +0000 Subject: [PATCH 17/27] remove commentted codes --- python/tvm/relay/op/strategy/generic.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index a82b4c8f708a..1f7a7554b10e 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -477,11 +477,6 @@ def conv2d_transpose_strategy(attrs, inputs, out_type, target): assert dilation == (1, 1), "not support dilate now" # assert groups == 1, "only support groups == 1 for now" strategy = _op.OpStrategy() - # strategy.add_implementation( - # wrap_compute_conv2d_transpose(topi.nn.conv2d_transpose_nchw), - # wrap_topi_schedule(topi.generic.schedule_conv2d_transpose_nchw), - # name="conv2d_transpose_nchw.generic", - # ) if groups == 1: strategy.add_implementation( wrap_compute_conv2d_transpose(topi.nn.conv2d_transpose_nchw), From 795053e6fc7d3f4537c1f942b5d01032c76d17d2 Mon Sep 17 00:00:00 2001 From: Alicja Kwasniewska Date: Wed, 10 Nov 2021 19:49:18 +0000 Subject: [PATCH 18/27] fix lint --- .../topi/testing/conv2d_transpose_python.py | 40 ++++- .../test_topi_group_conv2d_transpose.py | 156 ++++++++++++++++++ 2 files changed, 195 insertions(+), 1 deletion(-) create mode 100644 tests/python/topi/python/test_topi_group_conv2d_transpose.py diff --git a/python/tvm/topi/testing/conv2d_transpose_python.py b/python/tvm/topi/testing/conv2d_transpose_python.py index c7c0d9f2529a..5a0f1b235eb0 100644 --- a/python/tvm/topi/testing/conv2d_transpose_python.py +++ b/python/tvm/topi/testing/conv2d_transpose_python.py @@ -22,7 +22,7 @@ from tvm.topi.nn.utils import get_pad_tuple -def conv2d_transpose_nchw_python(a_np, w_np, stride, padding, output_padding): +def _conv2d_transpose_nchw_python(a_np, w_np, stride, padding, output_padding): """Transposed convolution operator in NCHW layout. Parameters @@ -141,3 +141,41 @@ def conv2d_transpose_nhwc_python( ) res_nhwc = np.transpose(res_nchw, (0, 2, 3, 1)) return res_nhwc + + +def conv2d_transpose_nchw_python(a_np, w_np, stride, padding, output_padding, groups=1): + """Convolution operator in NCHW layout. + + Parameters + ---------- + a_np : numpy.ndarray + 4-D with shape [batch, in_channel, in_height, in_width] + + w_np : numpy.ndarray + 4-D with shape [in_channel, num_filter // groups, filter_height, filter_width] + + stride : int or a list/tuple of two ints + Stride size, or [stride_height, stride_width] + + padding : int or str + Padding size, or ['VALID', 'SAME'] + + output_padding : int or a list/tuple of two ints + Use to disambiguate the output shape. + + groups : int + Number of groups + + Returns + ------- + b_np : np.ndarray + 4-D with shape [batch, out_channel, out_height, out_width] + """ + a_slices = np.array_split(a_np, groups, axis=1) + w_slices = np.array_split(w_np, groups, axis=0) + b_slices = [ + _conv2d_transpose_nchw_python(a_slice, w_slice, stride, padding, output_padding) + for a_slice, w_slice in zip(a_slices, w_slices) + ] + b_np = np.concatenate(b_slices, axis=1) + return b_np diff --git a/tests/python/topi/python/test_topi_group_conv2d_transpose.py b/tests/python/topi/python/test_topi_group_conv2d_transpose.py new file mode 100644 index 000000000000..90b7500c6cd4 --- /dev/null +++ b/tests/python/topi/python/test_topi_group_conv2d_transpose.py @@ -0,0 +1,156 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Example code to do group transpose convolution.""" + +import numpy as np +import tvm +import tvm.testing +import tvm.topi.testing +from tvm import te, topi +from tvm.contrib.pickle_memoize import memoize +from tvm.topi.utils import get_const_tuple + +_group_conv2d_nchw_implement = { + "generic": ( + topi.nn.group_conv2d_transpose_nchw, + topi.generic.schedule_group_conv2d_transpose_nchw, + ), +} + + +def verify_group_conv2d_transpose_nchw( + batch, + in_channel, + in_size, + num_filter, + kernel, + stride, + padding, + output_padding, + groups, +): + print( + "Workload: (%d, %d, %s, %d, %s, %s, %s, %s, %d)" + % (batch, in_channel, in_size, num_filter, kernel, stride, padding, output_padding, groups) + ) + + in_height, in_width = in_size + kernel_height, kernel_width = kernel + + A = te.placeholder((batch, in_channel, in_height, in_width), name="A") + W = te.placeholder((in_channel, num_filter // groups, kernel_height, kernel_width), name="W") + bias = te.placeholder((num_filter, 1, 1), name="bias") + + a_shape = get_const_tuple(A.shape) + w_shape = get_const_tuple(W.shape) + bias_shape = get_const_tuple(bias.shape) + dtype = A.dtype + + @memoize("topi.tests.test_topi_group_conv2d_transpose.verify_group_conv2d_transpose_nchw") + def get_ref_data(): + a_np = np.random.uniform(size=a_shape).astype(dtype) + w_np = np.random.uniform(size=w_shape).astype(dtype) + b_np = np.random.uniform(size=bias_shape).astype(dtype) + c_np = tvm.topi.testing.conv2d_transpose_nchw_python( + a_np, w_np, stride, padding, output_padding, groups + ).astype(dtype) + + return a_np, w_np, b_np, c_np + + a_np, w_np, b_np, c_np = get_ref_data() + + def check_target(target): + dev = tvm.device(target, 0) + if not tvm.testing.device_enabled(target): + print("Skip because %s is not enabled" % target) + return + + print("Running on target: %s" % target) + with tvm.target.Target(target): + fcompute, fschedule = tvm.topi.testing.dispatch(target, _group_conv2d_nchw_implement) + C = fcompute(A, W, stride, padding, dtype, output_padding, groups) + s = fschedule([C]) + + a = tvm.nd.array(a_np, dev) + w = tvm.nd.array(w_np, dev) + c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), dev) + func = tvm.build( + s, + [A, W, C], + target, + name="group_conv2d_transpose_%d_%d_%s_%d_%s_%s_%s_%s_%d" + % ( + batch, + in_channel, + in_size, + num_filter, + kernel, + stride, + padding, + output_padding, + groups, + ), + ) + func(a, w, c) + c = c.numpy() + for measurement, reference in zip(c, c_np): + tvm.testing.assert_allclose(measurement, reference, rtol=1e-5) + + for target in ["llvm"]: + check_target(target) + + +@tvm.testing.uses_gpu +def test_group_conv2d_transpose_nchw(): + verify_group_conv2d_transpose_nchw(1, 1, (224, 224), 1, (1, 1), (1, 1), (0, 0, 0, 0), (0, 0), 1) + verify_group_conv2d_transpose_nchw( + 1, 3, (224, 224), 32, (3, 3), (1, 1), (0, 0, 0, 0), (0, 0), 1 + ) + verify_group_conv2d_transpose_nchw( + 1, 3, (224, 224), 32, (3, 3), (3, 3), (0, 0, 0, 0), (0, 0), 1 + ) + verify_group_conv2d_transpose_nchw( + 1, 3, (224, 224), 32, (3, 3), (1, 1), (0, 0, 0, 0), (0, 0), 1 + ) + verify_group_conv2d_transpose_nchw( + 1, 3, (224, 224), 32, (3, 3), (2, 2), (1, 1, 1, 1), (0, 0), 1 + ) + verify_group_conv2d_transpose_nchw(1, 4, (32, 32), 4, (5, 5), (1, 1), (0, 0, 0, 0), (0, 0), 2) + verify_group_conv2d_transpose_nchw(1, 9, (32, 32), 9, (5, 5), (1, 1), (0, 0, 0, 0), (0, 0), 3) + verify_group_conv2d_transpose_nchw(1, 4, (32, 32), 16, (5, 5), (2, 2), (1, 1, 1, 1), (0, 0), 4) + verify_group_conv2d_transpose_nchw( + 1, 32, (8192, 1), 8, (31, 1), (2, 1), (14, 0, 15, 0), (0, 0), 2 + ) + verify_group_conv2d_transpose_nchw( + 1, 512, (8, 1), 256, (31, 1), (2, 1), (14, 0, 15, 0), (0, 0), 16 + ) + verify_group_conv2d_transpose_nchw( + 1, 512, (8, 1), 256, (31, 1), (2, 1), (14, 0, 15, 0), (1, 0), 16 + ) + verify_group_conv2d_transpose_nchw( + 1, 64, (64, 64), 64, (4, 4), (1, 1), (0, 0, 0, 0), (0, 0), 64 + ) + verify_group_conv2d_transpose_nchw( + 1, 128, (32, 32), 128, (4, 4), (1, 1), (0, 0, 0, 0), (0, 0), 128 + ) + verify_group_conv2d_transpose_nchw( + 1, 256, (16, 16), 256, (4, 4), (1, 1), (0, 0, 0, 0), (0, 0), 256 + ) + + +if __name__ == "__main__": + test_group_conv2d_transpose_nchw() From 61db0d3f65b26f6b5466fff1a74ab242c02b80ea Mon Sep 17 00:00:00 2001 From: ligeng Date: Wed, 10 Nov 2021 19:57:00 +0000 Subject: [PATCH 19/27] fix lint --- src/relay/op/nn/convolution.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/relay/op/nn/convolution.h b/src/relay/op/nn/convolution.h index 70f480e8d8cf..c0cd6c5637a0 100644 --- a/src/relay/op/nn/convolution.h +++ b/src/relay/op/nn/convolution.h @@ -1044,7 +1044,7 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a if (data == nullptr) return false; static const Layout kNCHW("NCHW"); - static const Layout kOIHW("OIHW"); // FIXME: weight layout should be IOHW as discussed in https://github.com/apache/tvm/pull/9336 + static const Layout kOIHW("OIHW"); // FIXME: weight layout should be IOHW as discussed in https://github.com/apache/tvm/pull/9336 const Conv2DTransposeAttrs* param = attrs.as(); ICHECK(param != nullptr); From 7af455c60dc621ef9bcd41b52b1f6d914593f8b2 Mon Sep 17 00:00:00 2001 From: ligeng Date: Thu, 11 Nov 2021 08:22:25 +0000 Subject: [PATCH 20/27] fix c++ lint --- src/relay/op/nn/convolution.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/relay/op/nn/convolution.h b/src/relay/op/nn/convolution.h index c0cd6c5637a0..4eeab316c3c8 100644 --- a/src/relay/op/nn/convolution.h +++ b/src/relay/op/nn/convolution.h @@ -1044,7 +1044,8 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a if (data == nullptr) return false; static const Layout kNCHW("NCHW"); - static const Layout kOIHW("OIHW"); // FIXME: weight layout should be IOHW as discussed in https://github.com/apache/tvm/pull/9336 + // FIXME: weight layout should be IOHW as discussed in https://github.com/apache/tvm/pull/9336 + static const Layout kOIHW("OIHW"); const Conv2DTransposeAttrs* param = attrs.as(); ICHECK(param != nullptr); From 6bc2ced4eab90a193da877b0f6addcbbc4fde226 Mon Sep 17 00:00:00 2001 From: ligeng Date: Thu, 11 Nov 2021 09:06:47 +0000 Subject: [PATCH 21/27] fix lint --- src/relay/op/nn/convolution.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/relay/op/nn/convolution.h b/src/relay/op/nn/convolution.h index 4eeab316c3c8..52958c2824b2 100644 --- a/src/relay/op/nn/convolution.h +++ b/src/relay/op/nn/convolution.h @@ -1044,8 +1044,8 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a if (data == nullptr) return false; static const Layout kNCHW("NCHW"); - // FIXME: weight layout should be IOHW as discussed in https://github.com/apache/tvm/pull/9336 - static const Layout kOIHW("OIHW"); + // FIXME: weight layout should be IOHW. + static const Layout kOIHW("OIHW"); const Conv2DTransposeAttrs* param = attrs.as(); ICHECK(param != nullptr); @@ -1110,10 +1110,9 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a << " out_channels=" << param->channels << " weight.shape=" << Array(wshape); } if (!dshape_nchw[1].as() && !wshape[0].as()) { - ICHECK(reporter->AssertEQ(dshape_nchw[1], wshape[0])) + ICHECK(reporter->AssertEQ(dshape_nchw[1], wshape[0])) << "Conv2DTransposed: shape of weight is inconsistent with in_channels." - << " data.shape= " << Array(dshape_nchw) - << " groups= " << param->groups + << " data.shape= " << Array(dshape_nchw) << " groups= " << param->groups << " weight.shape= " << Array(wshape); } channels = wshape[1]; From 3d7ce973a0f798f4213542fe61c9613971b30381 Mon Sep 17 00:00:00 2001 From: ligeng Date: Thu, 11 Nov 2021 11:53:08 +0000 Subject: [PATCH 22/27] fix python lint --- python/tvm/topi/nn/conv2d_transpose.py | 31 ++++++++++++++------------ 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/python/tvm/topi/nn/conv2d_transpose.py b/python/tvm/topi/nn/conv2d_transpose.py index 8173b6c002ef..997ef6da5451 100644 --- a/python/tvm/topi/nn/conv2d_transpose.py +++ b/python/tvm/topi/nn/conv2d_transpose.py @@ -26,14 +26,17 @@ from .utils import get_pad_tuple from ..utils import simplify + def _ntuple(n): def parse(x): if isinstance(x, collections.abc.Iterable): assert len(x) == n, f"Input can only have {n} elements, but got {len(x)} instead: {x}." return x return tuple(repeat(x, n)) + return parse + _single = _ntuple(1) _pair = _ntuple(2) _triple = _ntuple(3) @@ -169,25 +172,28 @@ def group_conv2d_transpose_nchw(data, kernel, stride, padding, out_dtype, output """ if groups == 1: return conv2d_transpose_nchw(data, kernel, stride, padding, out_dtype, output_padding) - - # some pre-processing and prelimnary checks + + # some pre-processing and prelimnary checks if out_dtype is None: out_dtype = data.dtype batch, in_channels, in_h, in_w = data.shape _, out_c, filter_h, filter_w = kernel.shape - assert in_channels % groups == 0, f"input channels {in_channels} must divide group size {groups}" + assert ( + in_channels % groups == 0 + ), f"input channels {in_channels} must divide group size {groups}" # assert out_c % groups == 0, f"output channels {in_c} must divide group size {groups}" strides = _pair(stride) # padding = _pair(padding) # output_padding = _pair(output_padding) # dilation = _pair(dilation) - + stride_h, stride_w = strides opad_h, opad_w = output_padding - assert opad_h < stride_h and opad_w < stride_w, \ - f"[{output_padding}] opad_h:{opad_h} < stride_h:{stride_h} and opad_w:{opad_w} < stride_w:{stride_w} does not satisfy." + assert ( + opad_h < stride_h and opad_w < stride_w + ), f"[{output_padding}] opad_h:{opad_h} < stride_h:{stride_h} and opad_w:{opad_w} < stride_w:{stride_w} does not satisfy." # dilate data data_dilate = dilate(data, [1, 1, stride_h, stride_w], name="data_dilate") # pad data @@ -224,16 +230,13 @@ def group_conv2d_transpose_nchw(data, kernel, stride, padding, out_dtype, output (batch, out_channels, out_h, out_w), lambda b, c, h, w: te.sum( data_pad[ - b, - c // (out_channels // groups) * (in_channels // groups) + dc, - h + dh, - w + dw + b, c // (out_channels // groups) * (in_channels // groups) + dc, h + dh, w + dw ].astype(out_dtype) * kernel_transform[ - c % (out_channels // groups), - c // (out_channels // groups) * (in_channels // groups) + dc, - dh, - dw + c % (out_channels // groups), + c // (out_channels // groups) * (in_channels // groups) + dc, + dh, + dw, ].astype(out_dtype), axis=[dc, dh, dw], ), From a30b5d5f8724bb5f1d2c6d3100447ad163674b90 Mon Sep 17 00:00:00 2001 From: ligeng Date: Thu, 11 Nov 2021 14:55:56 +0000 Subject: [PATCH 23/27] remove comments and reformat --- python/tvm/relay/op/strategy/generic.py | 8 +------- python/tvm/relay/op/strategy/x86.py | 14 ++------------ 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index 1f7a7554b10e..e2c9dbea28cd 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -475,7 +475,6 @@ def conv2d_transpose_strategy(attrs, inputs, out_type, target): groups = attrs.groups assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" - # assert groups == 1, "only support groups == 1 for now" strategy = _op.OpStrategy() if groups == 1: strategy.add_implementation( @@ -1189,12 +1188,7 @@ def _compute_nms(attrs, inputs, out_type): score_threshold = inputs[4] output_format = attrs.output_format return topi_compute( - inputs[0], - inputs[1], - max_output_size, - iou_threshold, - score_threshold, - output_format, + inputs[0], inputs[1], max_output_size, iou_threshold, score_threshold, output_format, ) return _compute_nms diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py index b25cf35b2000..6e86982b67ae 100644 --- a/python/tvm/relay/op/strategy/x86.py +++ b/python/tvm/relay/op/strategy/x86.py @@ -281,13 +281,7 @@ def conv2d_transpose_strategy_cpu(attrs, inputs, out_type, target): groups = attrs.groups assert layout == "NCHW", "only support nchw for now" assert dilation == (1, 1), "not support dilate now" - # assert groups == 1, "only support groups == 1 for now" strategy = _op.OpStrategy() - # strategy.add_implementation( - # wrap_compute_conv2d_transpose(topi.x86.conv2d_transpose_nchw), - # wrap_topi_schedule(topi.x86.schedule_conv2d_transpose_nchw), - # name="conv2d_transpose_nchw.x86", - # ) if groups == 1: strategy.add_implementation( wrap_compute_conv2d_transpose(topi.x86.conv2d_transpose_nchw), @@ -331,9 +325,7 @@ def conv3d_strategy_cpu(attrs, inputs, out_type, target): # or packed layouts. if layout == "NCDHW": strategy.add_implementation( - wrap_compute_conv3d(topi.nn.conv3d_ncdhw), - naive_schedule, - name="conv3d_ncdhw.x86", + wrap_compute_conv3d(topi.nn.conv3d_ncdhw), naive_schedule, name="conv3d_ncdhw.x86", ) elif layout == "NDHWC": strategy.add_implementation( @@ -452,9 +444,7 @@ def matmul_strategy_cpu(attrs, inputs, out_type, target): "Recommend to use cblas/mkl/mkldnn for better performance." ) strategy.add_implementation( - wrap_compute_matmul(topi.nn.matmul), - naive_schedule, - name="matmul.generic", + wrap_compute_matmul(topi.nn.matmul), naive_schedule, name="matmul.generic", ) return strategy From 56fa3bcb71fc7618e3b8b5f00e6a733e81a8ccdd Mon Sep 17 00:00:00 2001 From: ligeng Date: Thu, 11 Nov 2021 15:09:18 +0000 Subject: [PATCH 24/27] lint file --- python/tvm/relay/op/strategy/x86.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py index 6e86982b67ae..a421b120fab4 100644 --- a/python/tvm/relay/op/strategy/x86.py +++ b/python/tvm/relay/op/strategy/x86.py @@ -325,7 +325,9 @@ def conv3d_strategy_cpu(attrs, inputs, out_type, target): # or packed layouts. if layout == "NCDHW": strategy.add_implementation( - wrap_compute_conv3d(topi.nn.conv3d_ncdhw), naive_schedule, name="conv3d_ncdhw.x86", + wrap_compute_conv3d(topi.nn.conv3d_ncdhw), + naive_schedule, + name="conv3d_ncdhw.x86", ) elif layout == "NDHWC": strategy.add_implementation( @@ -444,7 +446,9 @@ def matmul_strategy_cpu(attrs, inputs, out_type, target): "Recommend to use cblas/mkl/mkldnn for better performance." ) strategy.add_implementation( - wrap_compute_matmul(topi.nn.matmul), naive_schedule, name="matmul.generic", + wrap_compute_matmul(topi.nn.matmul), + naive_schedule, + name="matmul.generic", ) return strategy From 0a1a437e1c50afd9186f4e94752094b433013c5a Mon Sep 17 00:00:00 2001 From: ligeng Date: Thu, 11 Nov 2021 15:14:37 +0000 Subject: [PATCH 25/27] lint code --- python/tvm/relay/op/strategy/generic.py | 7 ++++++- python/tvm/topi/nn/conv2d_transpose.py | 16 ++++++++-------- .../tvm/topi/testing/conv2d_transpose_python.py | 14 +++++++------- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index e2c9dbea28cd..ab12be16e17e 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -1188,7 +1188,12 @@ def _compute_nms(attrs, inputs, out_type): score_threshold = inputs[4] output_format = attrs.output_format return topi_compute( - inputs[0], inputs[1], max_output_size, iou_threshold, score_threshold, output_format, + inputs[0], + inputs[1], + max_output_size, + iou_threshold, + score_threshold, + output_format, ) return _compute_nms diff --git a/python/tvm/topi/nn/conv2d_transpose.py b/python/tvm/topi/nn/conv2d_transpose.py index 997ef6da5451..3f3723f43592 100644 --- a/python/tvm/topi/nn/conv2d_transpose.py +++ b/python/tvm/topi/nn/conv2d_transpose.py @@ -141,30 +141,30 @@ def group_conv2d_transpose_nchw(data, kernel, stride, padding, out_dtype, output ---------- data : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - + kernel : tvm.te.Tensor 4-D with shape [in_channel, out_channel // groups, filter_height, filter_width] - + stride : int or a list/tuple of two ints Stride size, or [stride_height, stride_width] - + padding : int or a list/tuple of 2 or 4 ints padding size, or [pad_height, pad_width] for 2 ints, or [pad_top, pad_left, pad_bottom, pad_right] for 4 ints - + out_dtype : str The output data type. This is used for mixed precision. - + output_padding : tuple of ints Used to get the right output shape for gradients - + groups : int number of groups - + out_dtype : str The output type. This is used for mixed precision. - + Returns ------- Output : tvm.te.Tensor diff --git a/python/tvm/topi/testing/conv2d_transpose_python.py b/python/tvm/topi/testing/conv2d_transpose_python.py index 5a0f1b235eb0..a38d8bc9f031 100644 --- a/python/tvm/topi/testing/conv2d_transpose_python.py +++ b/python/tvm/topi/testing/conv2d_transpose_python.py @@ -145,27 +145,27 @@ def conv2d_transpose_nhwc_python( def conv2d_transpose_nchw_python(a_np, w_np, stride, padding, output_padding, groups=1): """Convolution operator in NCHW layout. - + Parameters ---------- a_np : numpy.ndarray 4-D with shape [batch, in_channel, in_height, in_width] - + w_np : numpy.ndarray 4-D with shape [in_channel, num_filter // groups, filter_height, filter_width] - + stride : int or a list/tuple of two ints Stride size, or [stride_height, stride_width] - + padding : int or str Padding size, or ['VALID', 'SAME'] - + output_padding : int or a list/tuple of two ints Use to disambiguate the output shape. - + groups : int Number of groups - + Returns ------- b_np : np.ndarray From 562fdabad786be751bd8f471fde9167e10d6a2eb Mon Sep 17 00:00:00 2001 From: ligeng Date: Thu, 11 Nov 2021 15:33:56 +0000 Subject: [PATCH 26/27] fix lint --- python/tvm/topi/nn/conv2d_transpose.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/tvm/topi/nn/conv2d_transpose.py b/python/tvm/topi/nn/conv2d_transpose.py index 3f3723f43592..e3c6c94eb290 100644 --- a/python/tvm/topi/nn/conv2d_transpose.py +++ b/python/tvm/topi/nn/conv2d_transpose.py @@ -193,7 +193,8 @@ def group_conv2d_transpose_nchw(data, kernel, stride, padding, out_dtype, output opad_h, opad_w = output_padding assert ( opad_h < stride_h and opad_w < stride_w - ), f"[{output_padding}] opad_h:{opad_h} < stride_h:{stride_h} and opad_w:{opad_w} < stride_w:{stride_w} does not satisfy." + ), f"[{output_padding}] opad_h:{opad_h} < stride_h:{stride_h} \ + and opad_w:{opad_w} < stride_w:{stride_w} does not satisfy." # dilate data data_dilate = dilate(data, [1, 1, stride_h, stride_w], name="data_dilate") # pad data From 3e0db602f4be6bf6ab8f4b7329c1dfbc519fb9e7 Mon Sep 17 00:00:00 2001 From: ligeng Date: Thu, 11 Nov 2021 16:36:42 +0000 Subject: [PATCH 27/27] update logging information in convolution.h --- src/relay/op/nn/convolution.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/relay/op/nn/convolution.h b/src/relay/op/nn/convolution.h index 52958c2824b2..5c67a047487f 100644 --- a/src/relay/op/nn/convolution.h +++ b/src/relay/op/nn/convolution.h @@ -1107,7 +1107,8 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a ICHECK(reporter->AssertEQ(indexdiv(param->channels, param->groups), wshape[1])) << "Conv2DTransposed: shape of weight is inconsistent with out_channels, " << " out_channels // groups != weight.shape[1] " - << " out_channels=" << param->channels << " weight.shape=" << Array(wshape); + << " out_channels=" << param->channels << " groups=" << param->groups + << " weight.shape=" << Array(wshape); } if (!dshape_nchw[1].as() && !wshape[0].as()) { ICHECK(reporter->AssertEQ(dshape_nchw[1], wshape[0]))