From bfbcf42e2452919539a8f6d26f9a6d52053a82d9 Mon Sep 17 00:00:00 2001 From: wangyucheng Date: Wed, 9 Jun 2021 20:08:10 +0800 Subject: [PATCH 1/4] add conv2d leg --- python/tvm/topi/cuda/conv2d_alter_op.py | 153 +++++++++++++++--- python/tvm/topi/cuda/tensorcore_alter_op.py | 10 +- .../relay/test_pass_legalize_tensorcore.py | 127 ++++++++++++--- 3 files changed, 246 insertions(+), 44 deletions(-) diff --git a/python/tvm/topi/cuda/conv2d_alter_op.py b/python/tvm/topi/cuda/conv2d_alter_op.py index 067f27262b06..331cc0aad403 100644 --- a/python/tvm/topi/cuda/conv2d_alter_op.py +++ b/python/tvm/topi/cuda/conv2d_alter_op.py @@ -270,6 +270,60 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): return None +def _pad_conv2d_HWNC(db, di, do, data, kernel, out_channel, new_attrs, output_tensor): + # Pad batch size + if db != 0: + data = relay.nn.pad(data, pad_width=((0, 0), (0, 0), (0, db), (0, 0))) + + # Pad input channel + if di != 0: + data = relay.nn.pad(data, pad_width=((0, 0), (0, 0), (0, 0), (0, di))) + kernel = relay.nn.pad(kernel, pad_width=((0, 0), (0, 0), (0, 0), (0, di))) + + # Pad output channel + if do != 0: + kernel = relay.nn.pad(kernel, pad_width=((0, 0), (0, 0), (0, do), (0, 0))) + + if do != 0: + new_out_channel = out_channel + do + new_attrs["channels"] = new_out_channel + + out = relay.nn.conv2d(data, kernel, **new_attrs) + + if db != 0 or do != 0: + original_out_shape = [x.value for x in output_tensor.shape] + out = relay.strided_slice(out, begin=[0, 0, 0, 0], end=original_out_shape) + + return out + + +def _pad_conv2d_NHWC(db, di, do, data, kernel, out_channel, new_attrs, output_tensor): + # Pad batch size + if db != 0: + data = relay.nn.pad(data, pad_width=((0, db), (0, 0), (0, 0), (0, 0))) + + # Pad input channel + if di != 0: + data = relay.nn.pad(data, pad_width=((0, 0), (0, 0), (0, 0), (0, di))) + kernel = relay.nn.pad(kernel, pad_width=((0, 0), (0, 0), (0, di), (0, 0))) + + # Pad output channel + if do != 0: + kernel = relay.nn.pad(kernel, pad_width=((0, 0), (0, 0), (0, 0), (0, do))) + + if do != 0: + new_out_channel = out_channel + do + new_attrs["channels"] = new_out_channel + + out = relay.nn.conv2d(data, kernel, **new_attrs) + + if db != 0 or do != 0: + original_out_shape = [x.value for x in output_tensor.shape] + out = relay.strided_slice(out, begin=[0, 0, 0, 0], end=original_out_shape) + + return out + + @conv2d_legalize.register("cuda") def _conv2d_legalize(attrs, inputs, arg_types): """Legalizes Conv2D op. @@ -347,7 +401,47 @@ def _conv2d_legalize(attrs, inputs, arg_types): else: out = relay.nn.conv2d(data, kernel, **new_attrs) return out - elif data_dtype in ["float16"]: # todo: support int8/int4 + elif data_layout == "NHWC" and kernel_layout == "HWIO": + batch = data_tensor.shape[0].value + in_channel = data_tensor.shape[3].value + out_channel = kernel_tensor.shape[3].value + + if ( + (batch % 8 == 0 and in_channel % 16 == 0 and out_channel % 32 == 0) + or (batch % 16 == 0 and in_channel % 16 == 0 and out_channel % 16 == 0) + or (batch % 32 == 0 and in_channel % 16 == 0 and out_channel % 8 == 0) + ): + # no need to pad + return None + + candidates = [(16, 16, 16), (32, 16, 8), (8, 16, 32)] + (db, di, do), extra_flops = pad_to_tensorcore(batch, in_channel, out_channel, candidates) + + if extra_flops > 2: + logger.info("conv2d pad_to_tensorcore skipped, extra_flops %s", extra_flops) + return None + + logger.info("conv2d pad_to_tensorcore, extra_flops %s", extra_flops) + + return _pad_conv2d_NHWC(db, di, do, data, kernel, out_channel, new_attrs, output_tensor) + elif data_layout == "HWNC" and kernel_layout == "HWOI": + batch = data_tensor.shape[2].value + in_channel = data_tensor.shape[3].value + out_channel = kernel_tensor.shape[2].value + + if batch % 8 == 0 and in_channel % 16 == 0 and out_channel % 32 == 0: + return None + + candidates = [(8, 16, 32)] + (db, di, do), extra_flops = pad_to_tensorcore(batch, in_channel, out_channel, candidates) + + if extra_flops > 2: + logger.info("conv2d pad_to_tensorcore skipped, extra_flops %s", extra_flops) + return None + logger.info("conv2d pad_to_tensorcore, extra_flops %s", extra_flops) + + return _pad_conv2d_HWNC(db, di, do, data, kernel, out_channel, new_attrs, output_tensor) + elif data_dtype in ["float16"]: if data_layout == "NHWC" and kernel_layout == "HWIO": batch = data_tensor.shape[0].value in_channel = data_tensor.shape[3].value @@ -361,7 +455,8 @@ def _conv2d_legalize(attrs, inputs, arg_types): # no need to pad return None - (db, di, do), extra_flops = pad_to_tensorcore(batch, in_channel, out_channel) + candidates = [(16, 16, 16), (32, 16, 8), (8, 16, 32)] + (db, di, do), extra_flops = pad_to_tensorcore(batch, in_channel, out_channel, candidates) if extra_flops > 2: logger.info("conv2d pad_to_tensorcore skipped, extra_flops %s", extra_flops) @@ -369,28 +464,46 @@ def _conv2d_legalize(attrs, inputs, arg_types): logger.info("conv2d pad_to_tensorcore, extra_flops %s", extra_flops) - # Pad batch size - if db != 0: - data = relay.nn.pad(data, pad_width=((0, db), (0, 0), (0, 0), (0, 0))) + return _pad_conv2d_NHWC(db, di, do, data, kernel, out_channel, new_attrs, output_tensor) + elif data_dtype in ["int4", "uint4"]: + if data_layout == "NHWC" and kernel_layout == "HWIO": + batch = data_tensor.shape[0].value + in_channel = data_tensor.shape[3].value + out_channel = kernel_tensor.shape[3].value - # Pad input channel - if di != 0: - data = relay.nn.pad(data, pad_width=((0, 0), (0, 0), (0, 0), (0, di))) - kernel = relay.nn.pad(kernel, pad_width=((0, 0), (0, 0), (0, di), (0, 0))) + if ( + (batch % 8 == 0 and in_channel % 16 == 0 and out_channel % 32 == 0) + or (batch % 16 == 0 and in_channel % 16 == 0 and out_channel % 16 == 0) + or (batch % 32 == 0 and in_channel % 16 == 0 and out_channel % 8 == 0) + ): + # no need to pad + return None - # Pad output channel - if do != 0: - kernel = relay.nn.pad(kernel, pad_width=((0, 0), (0, 0), (0, 0), (0, do))) + candidates = [(16, 16, 16), (32, 16, 8), (8, 16, 32)] + (db, di, do), extra_flops = pad_to_tensorcore(batch, in_channel, out_channel, candidates) - if do != 0: - new_out_channel = out_channel + do - new_attrs["channels"] = new_out_channel + if extra_flops > 2: + logger.info("conv2d pad_to_tensorcore skipped, extra_flops %s", extra_flops) + return None - out = relay.nn.conv2d(data, kernel, **new_attrs) + logger.info("conv2d pad_to_tensorcore, extra_flops %s", extra_flops) - if db != 0 or do != 0: - original_out_shape = [x.value for x in output_tensor.shape] - out = relay.strided_slice(out, begin=[0, 0, 0, 0], end=original_out_shape) + return _pad_conv2d_NHWC(db, di, do, data, kernel, out_channel, new_attrs, output_tensor) + elif data_layout == "HWNC" and kernel_layout == "HWOI": + batch = data_tensor.shape[2].value + in_channel = data_tensor.shape[3].value + out_channel = kernel_tensor.shape[2].value - return out + if batch % 8 == 0 and in_channel % 32 == 0 and out_channel % 8 == 0: + return None + + candidates = [(8, 32, 8)] + (db, di, do), extra_flops = pad_to_tensorcore(batch, in_channel, out_channel, candidates) + + if extra_flops > 2: + logger.info("conv2d pad_to_tensorcore skipped, extra_flops %s", extra_flops) + return None + logger.info("conv2d pad_to_tensorcore, extra_flops %s", extra_flops) + + return _pad_conv2d_HWNC(db, di, do, data, kernel, out_channel, new_attrs, output_tensor) return None diff --git a/python/tvm/topi/cuda/tensorcore_alter_op.py b/python/tvm/topi/cuda/tensorcore_alter_op.py index aec7acbfde56..eb7c71ddf1c9 100644 --- a/python/tvm/topi/cuda/tensorcore_alter_op.py +++ b/python/tvm/topi/cuda/tensorcore_alter_op.py @@ -71,7 +71,8 @@ def _batch_matmul_legalize(attrs, inputs, arg_types): # no need to pad return None - (dm, dk, dn), extra_flops = pad_to_tensorcore(M, K, N) + candidates = [(16, 16, 16), (32, 16, 8), (8, 16, 32)] + (dm, dk, dn), extra_flops = pad_to_tensorcore(M, K, N, candidates) if extra_flops > 2: logger.info("batch_matmul pad_to_tensorcore skipped, extra_flops %s", extra_flops) @@ -145,7 +146,8 @@ def _dense_legalize(attrs, inputs, arg_types): # no need to pad return None - (dm, dk, dn), extra_flops_ratio = pad_to_tensorcore(M, K, N) + candidates = [(16, 16, 16), (32, 16, 8), (8, 16, 32)] + (dm, dk, dn), extra_flops_ratio = pad_to_tensorcore(M, K, N, candidates) if extra_flops_ratio > 2: logger.info("dense pad_to_tensorcore skipped, extra_flops_ratio %s", extra_flops_ratio) @@ -171,10 +173,8 @@ def _dense_legalize(attrs, inputs, arg_types): return None -def pad_to_tensorcore(M, K, N): +def pad_to_tensorcore(M, K, N, candidates): """pad shape to enable tensorcore""" - candidates = [(16, 16, 16), (32, 16, 8), (8, 16, 32)] - flops = M * K * N extra_flops = math.inf best_pad = (0, 0, 0) diff --git a/tests/python/relay/test_pass_legalize_tensorcore.py b/tests/python/relay/test_pass_legalize_tensorcore.py index f45e39047238..57d5865c3b33 100644 --- a/tests/python/relay/test_pass_legalize_tensorcore.py +++ b/tests/python/relay/test_pass_legalize_tensorcore.py @@ -36,18 +36,18 @@ def run_opt_pass(expr, passes): @tvm.testing.uses_gpu -def test_legalize_conv2d(): - """test legalize conv2d to enable tensorcore""" +def test_legalize_conv2d_NHWC(): + """test legalize NHWC conv2d to enable tensorcore""" - def _test_legalize_conv2d(data_shape, kernel_shape, pad_shape, do_pad=True): + def _test_legalize_conv2d(data_shape, kernel_shape, pad_shape, dtype, do_pad=True): out_channel = kernel_shape[3] out_shape = list(data_shape) out_shape[3] = out_channel db, di, do = pad_shape def before(): - x = relay.var("x", shape=data_shape, dtype="float16") - weight = relay.var("weight", shape=kernel_shape, dtype="float16") + x = relay.var("x", shape=data_shape, dtype=dtype) + weight = relay.var("weight", shape=kernel_shape, dtype=dtype) y = relay.nn.conv2d( x, weight, @@ -67,12 +67,12 @@ def legalize_conv2d(attrs, inputs, types): def expected(): if not do_pad: return before() - x = relay.var("x", shape=data_shape, dtype="float16") + x = relay.var("x", shape=data_shape, dtype=dtype) if db or di: x_pad = relay.nn.pad(x, pad_width=((0, db), (0, 0), (0, 0), (0, di))) else: x_pad = x - weight = relay.var("weight", shape=(kernel_shape), dtype="float16") + weight = relay.var("weight", shape=(kernel_shape), dtype=dtype) if di or do: weight_pad = relay.nn.pad(weight, pad_width=((0, 0), (0, 0), (0, di), (0, do))) else: @@ -99,20 +99,108 @@ def expected(): b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) + for dtype in ["float16", "int8", "int4"]: + # conv2d pad batch + _test_legalize_conv2d((7, 16, 16, 64), (3, 3, 64, 64), (1, 0, 0), dtype) + _test_legalize_conv2d((3, 16, 16, 64), (3, 3, 64, 64), (5, 0, 0), dtype) + _test_legalize_conv2d((2, 16, 16, 64), (3, 3, 64, 64), (0, 0, 0), dtype, False) + # conv2d pad in_channel + _test_legalize_conv2d((8, 16, 16, 63), (3, 3, 63, 64), (0, 1, 0), dtype) + _test_legalize_conv2d((8, 16, 16, 33), (3, 3, 33, 64), (0, 15, 0), dtype) + _test_legalize_conv2d((8, 16, 16, 13), (3, 3, 13, 64), (0, 3, 0), dtype) + _test_legalize_conv2d((8, 16, 16, 1), (3, 3, 1, 64), (0, 0, 0), dtype, False) + # conv2d pad out_channel + _test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 63), (0, 0, 1), dtype) + _test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 33), (0, 0, 31), dtype) + _test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 1), (0, 0, 0), dtype, False) + +@tvm.testing.uses_gpu +def test_legalize_conv2d_HWNC(): + """test legalize HWNC conv2d to enable tensorcore""" + + def _test_legalize_conv2d(data_shape, kernel_shape, pad_shape, dtype, do_pad=True): + out_channel = kernel_shape[2] + out_shape = list(data_shape) + out_shape[3] = out_channel + db, di, do = pad_shape + + def before(): + x = relay.var("x", shape=data_shape, dtype=dtype) + weight = relay.var("weight", shape=kernel_shape, dtype=dtype) + y = relay.nn.conv2d( + x, + weight, + channels=out_channel, + kernel_size=(3, 3), + padding=(1, 1), + data_layout="HWNC", + kernel_layout="HWOI", + ) + y = relay.Function([x, weight], y) + return y + + def legalize_conv2d(attrs, inputs, types): + with tvm.target.Target("cuda"): + return topi.nn.conv2d_legalize(attrs, inputs, types) + + def expected(): + if not do_pad: + return before() + x = relay.var("x", shape=data_shape, dtype=dtype) + if db or di: + x_pad = relay.nn.pad(x, pad_width=((0, 0), (0, 0), (0, db), (0, di))) + else: + x_pad = x + weight = relay.var("weight", shape=(kernel_shape), dtype=dtype) + if di or do: + weight_pad = relay.nn.pad(weight, pad_width=((0, 0), (0, 0), (0, do), (0, di))) + else: + weight_pad = weight + y_pad = relay.nn.conv2d( + x_pad, + weight=weight_pad, + channels=out_channel + do, + kernel_size=(3, 3), + padding=(1, 1), + data_layout="HWNC", + kernel_layout="HWOI", + ) + if db or do: + y = relay.strided_slice(y_pad, begin=[0, 0, 0, 0], end=out_shape) + else: + y = y_pad + y = relay.Function([x, weight], y) + return y + + with TempOpAttr("nn.conv2d", "FTVMLegalize", legalize_conv2d): + a = before() + a = run_opt_pass(a, transform.Legalize()) + b = run_opt_pass(expected(), transform.InferType()) + assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) + # data_layout="HWNC",kernel_layout="HWOI" # conv2d pad batch - _test_legalize_conv2d((7, 16, 16, 64), (3, 3, 64, 64), (1, 0, 0)) - _test_legalize_conv2d((3, 16, 16, 64), (3, 3, 64, 64), (5, 0, 0)) - _test_legalize_conv2d((2, 16, 16, 64), (3, 3, 64, 64), (0, 0, 0), False) + _test_legalize_conv2d((16, 16, 7, 64), (3, 3, 64, 64), (1, 0, 0), "int8") + _test_legalize_conv2d((16, 16, 3, 64), (3, 3, 64, 64), (5, 0, 0), "int8") + _test_legalize_conv2d((2, 16, 16, 64), (3, 3, 64, 64), (0, 0, 0), "int8", False) + _test_legalize_conv2d((16, 16, 7, 64), (3, 3, 64, 64), (1, 0, 0), "int4") + _test_legalize_conv2d((16, 16, 3, 64), (3, 3, 64, 64), (5, 0, 0), "int4") + _test_legalize_conv2d((2, 16, 16, 64), (3, 3, 64, 64), (0, 0, 0), "int4", False) # conv2d pad in_channel - _test_legalize_conv2d((8, 16, 16, 63), (3, 3, 63, 64), (0, 1, 0)) - _test_legalize_conv2d((8, 16, 16, 33), (3, 3, 33, 64), (0, 15, 0)) - _test_legalize_conv2d((8, 16, 16, 13), (3, 3, 13, 64), (0, 3, 0)) - _test_legalize_conv2d((8, 16, 16, 1), (3, 3, 1, 64), (0, 0, 0), False) + _test_legalize_conv2d((16, 16, 8, 63), (3, 3, 64, 63), (0, 1, 0), "int8") + _test_legalize_conv2d((16, 16, 8, 33), (3, 3, 64, 33), (0, 15, 0), "int8") + _test_legalize_conv2d((16, 16, 8, 13), (3, 3, 64, 13), (0, 3, 0), "int8") + _test_legalize_conv2d((16, 16, 8, 1), (3, 3, 64, 1), (0, 0, 0), "int8", False) + _test_legalize_conv2d((16, 16, 8, 63), (3, 3, 64, 63), (0, 1, 0), "int4") + _test_legalize_conv2d((16, 16, 8, 33), (3, 3, 64, 33), (0, 31, 0), "int4") + _test_legalize_conv2d((16, 16, 8, 13), (3, 3, 64, 13), (0, 19, 0), "int4") + _test_legalize_conv2d((16, 16, 8, 1), (3, 3, 64, 1), (0, 0, 0), "int4", False) # conv2d pad out_channel - _test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 63), (0, 0, 1)) - _test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 33), (0, 0, 31)) - _test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 1), (0, 0, 0), False) - + _test_legalize_conv2d((16, 16, 8, 64), (3, 3, 63, 64), (0, 0, 1), "int8") + _test_legalize_conv2d((16, 16, 8, 64), (3, 3, 33, 64), (0, 0, 31), "int8") + _test_legalize_conv2d((16, 16, 8, 64), (3, 3, 1, 64), (0, 0, 0), "int8", False) + _test_legalize_conv2d((16, 16, 8, 64), (3, 3, 63, 64), (0, 0, 1), "int4") + _test_legalize_conv2d((16, 16, 8, 64), (3, 3, 33, 64), (0, 0, 7), "int4") + _test_legalize_conv2d((16, 16, 8, 64), (3, 3, 1, 64), (0, 0, 0), "int4", False) @tvm.testing.uses_gpu def test_legalize_dense(): @@ -234,6 +322,7 @@ def expected(): if __name__ == "__main__": - test_legalize_conv2d() + test_legalize_conv2d_NHWC() + test_legalize_conv2d_HWNC() test_legalize_dense() test_legalize_batch_matmul() From b045e365483f87097a6b9478ca0fce747837c578 Mon Sep 17 00:00:00 2001 From: wangyucheng Date: Wed, 9 Jun 2021 20:11:35 +0800 Subject: [PATCH 2/4] minor fix --- tests/python/relay/test_pass_legalize_tensorcore.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/python/relay/test_pass_legalize_tensorcore.py b/tests/python/relay/test_pass_legalize_tensorcore.py index 57d5865c3b33..e91e48c0c451 100644 --- a/tests/python/relay/test_pass_legalize_tensorcore.py +++ b/tests/python/relay/test_pass_legalize_tensorcore.py @@ -114,6 +114,7 @@ def expected(): _test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 33), (0, 0, 31), dtype) _test_legalize_conv2d((8, 16, 16, 64), (3, 3, 64, 1), (0, 0, 0), dtype, False) + @tvm.testing.uses_gpu def test_legalize_conv2d_HWNC(): """test legalize HWNC conv2d to enable tensorcore""" @@ -177,7 +178,6 @@ def expected(): a = run_opt_pass(a, transform.Legalize()) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) - # data_layout="HWNC",kernel_layout="HWOI" # conv2d pad batch _test_legalize_conv2d((16, 16, 7, 64), (3, 3, 64, 64), (1, 0, 0), "int8") _test_legalize_conv2d((16, 16, 3, 64), (3, 3, 64, 64), (5, 0, 0), "int8") @@ -202,6 +202,7 @@ def expected(): _test_legalize_conv2d((16, 16, 8, 64), (3, 3, 33, 64), (0, 0, 7), "int4") _test_legalize_conv2d((16, 16, 8, 64), (3, 3, 1, 64), (0, 0, 0), "int4", False) + @tvm.testing.uses_gpu def test_legalize_dense(): def _test_legalize_dense(data_shape, kernel_shape, pad_shape, do_pad=True): From ced4c69fbfc438d053fa396983f721645fd08d85 Mon Sep 17 00:00:00 2001 From: wangyucheng Date: Wed, 9 Jun 2021 20:25:57 +0800 Subject: [PATCH 3/4] fix pylint --- python/tvm/topi/cuda/conv2d_alter_op.py | 20 ++++++++++++++----- .../relay/test_pass_legalize_tensorcore.py | 1 + 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/python/tvm/topi/cuda/conv2d_alter_op.py b/python/tvm/topi/cuda/conv2d_alter_op.py index 331cc0aad403..dc0dc1ade646 100644 --- a/python/tvm/topi/cuda/conv2d_alter_op.py +++ b/python/tvm/topi/cuda/conv2d_alter_op.py @@ -415,7 +415,9 @@ def _conv2d_legalize(attrs, inputs, arg_types): return None candidates = [(16, 16, 16), (32, 16, 8), (8, 16, 32)] - (db, di, do), extra_flops = pad_to_tensorcore(batch, in_channel, out_channel, candidates) + (db, di, do), extra_flops = pad_to_tensorcore( + batch, in_channel, out_channel, candidates + ) if extra_flops > 2: logger.info("conv2d pad_to_tensorcore skipped, extra_flops %s", extra_flops) @@ -433,7 +435,9 @@ def _conv2d_legalize(attrs, inputs, arg_types): return None candidates = [(8, 16, 32)] - (db, di, do), extra_flops = pad_to_tensorcore(batch, in_channel, out_channel, candidates) + (db, di, do), extra_flops = pad_to_tensorcore( + batch, in_channel, out_channel, candidates + ) if extra_flops > 2: logger.info("conv2d pad_to_tensorcore skipped, extra_flops %s", extra_flops) @@ -456,7 +460,9 @@ def _conv2d_legalize(attrs, inputs, arg_types): return None candidates = [(16, 16, 16), (32, 16, 8), (8, 16, 32)] - (db, di, do), extra_flops = pad_to_tensorcore(batch, in_channel, out_channel, candidates) + (db, di, do), extra_flops = pad_to_tensorcore( + batch, in_channel, out_channel, candidates + ) if extra_flops > 2: logger.info("conv2d pad_to_tensorcore skipped, extra_flops %s", extra_flops) @@ -480,7 +486,9 @@ def _conv2d_legalize(attrs, inputs, arg_types): return None candidates = [(16, 16, 16), (32, 16, 8), (8, 16, 32)] - (db, di, do), extra_flops = pad_to_tensorcore(batch, in_channel, out_channel, candidates) + (db, di, do), extra_flops = pad_to_tensorcore( + batch, in_channel, out_channel, candidates + ) if extra_flops > 2: logger.info("conv2d pad_to_tensorcore skipped, extra_flops %s", extra_flops) @@ -498,7 +506,9 @@ def _conv2d_legalize(attrs, inputs, arg_types): return None candidates = [(8, 32, 8)] - (db, di, do), extra_flops = pad_to_tensorcore(batch, in_channel, out_channel, candidates) + (db, di, do), extra_flops = pad_to_tensorcore( + batch, in_channel, out_channel, candidates + ) if extra_flops > 2: logger.info("conv2d pad_to_tensorcore skipped, extra_flops %s", extra_flops) diff --git a/tests/python/relay/test_pass_legalize_tensorcore.py b/tests/python/relay/test_pass_legalize_tensorcore.py index e91e48c0c451..1312b396fe4c 100644 --- a/tests/python/relay/test_pass_legalize_tensorcore.py +++ b/tests/python/relay/test_pass_legalize_tensorcore.py @@ -178,6 +178,7 @@ def expected(): a = run_opt_pass(a, transform.Legalize()) b = run_opt_pass(expected(), transform.InferType()) assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + "Expected = \n" + str(b) + # conv2d pad batch _test_legalize_conv2d((16, 16, 7, 64), (3, 3, 64, 64), (1, 0, 0), "int8") _test_legalize_conv2d((16, 16, 3, 64), (3, 3, 64, 64), (5, 0, 0), "int8") From 0baf0d3e57c5104b3520a7cd01e9f002861c49c7 Mon Sep 17 00:00:00 2001 From: wangyucheng Date: Wed, 9 Jun 2021 20:34:07 +0800 Subject: [PATCH 4/4] fix pylint --- python/tvm/topi/cuda/conv2d_alter_op.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/python/tvm/topi/cuda/conv2d_alter_op.py b/python/tvm/topi/cuda/conv2d_alter_op.py index dc0dc1ade646..4863a06b728d 100644 --- a/python/tvm/topi/cuda/conv2d_alter_op.py +++ b/python/tvm/topi/cuda/conv2d_alter_op.py @@ -401,7 +401,8 @@ def _conv2d_legalize(attrs, inputs, arg_types): else: out = relay.nn.conv2d(data, kernel, **new_attrs) return out - elif data_layout == "NHWC" and kernel_layout == "HWIO": + + if data_layout == "NHWC" and kernel_layout == "HWIO": batch = data_tensor.shape[0].value in_channel = data_tensor.shape[3].value out_channel = kernel_tensor.shape[3].value @@ -426,7 +427,8 @@ def _conv2d_legalize(attrs, inputs, arg_types): logger.info("conv2d pad_to_tensorcore, extra_flops %s", extra_flops) return _pad_conv2d_NHWC(db, di, do, data, kernel, out_channel, new_attrs, output_tensor) - elif data_layout == "HWNC" and kernel_layout == "HWOI": + + if data_layout == "HWNC" and kernel_layout == "HWOI": batch = data_tensor.shape[2].value in_channel = data_tensor.shape[3].value out_channel = kernel_tensor.shape[2].value @@ -445,6 +447,7 @@ def _conv2d_legalize(attrs, inputs, arg_types): logger.info("conv2d pad_to_tensorcore, extra_flops %s", extra_flops) return _pad_conv2d_HWNC(db, di, do, data, kernel, out_channel, new_attrs, output_tensor) + elif data_dtype in ["float16"]: if data_layout == "NHWC" and kernel_layout == "HWIO": batch = data_tensor.shape[0].value @@ -471,6 +474,7 @@ def _conv2d_legalize(attrs, inputs, arg_types): logger.info("conv2d pad_to_tensorcore, extra_flops %s", extra_flops) return _pad_conv2d_NHWC(db, di, do, data, kernel, out_channel, new_attrs, output_tensor) + elif data_dtype in ["int4", "uint4"]: if data_layout == "NHWC" and kernel_layout == "HWIO": batch = data_tensor.shape[0].value @@ -497,7 +501,8 @@ def _conv2d_legalize(attrs, inputs, arg_types): logger.info("conv2d pad_to_tensorcore, extra_flops %s", extra_flops) return _pad_conv2d_NHWC(db, di, do, data, kernel, out_channel, new_attrs, output_tensor) - elif data_layout == "HWNC" and kernel_layout == "HWOI": + + if data_layout == "HWNC" and kernel_layout == "HWOI": batch = data_tensor.shape[2].value in_channel = data_tensor.shape[3].value out_channel = kernel_tensor.shape[2].value @@ -516,4 +521,5 @@ def _conv2d_legalize(attrs, inputs, arg_types): logger.info("conv2d pad_to_tensorcore, extra_flops %s", extra_flops) return _pad_conv2d_HWNC(db, di, do, data, kernel, out_channel, new_attrs, output_tensor) + return None