From b87684cbdcc02fd3cb9d53283a5f61d8f3ad716d Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sat, 29 Sep 2018 01:03:31 -0700 Subject: [PATCH 1/5] [TOPI] Update tophub and new benchmark results --- apps/benchmark/arm_cpu_imagenet_bench.py | 6 ++++-- apps/benchmark/gpu_imagenet_bench.py | 6 ++++-- apps/benchmark/mobile_gpu_imagenet_bench.py | 8 +++++--- python/tvm/autotvm/tophub.py | 2 +- topi/python/topi/arm_cpu/conv2d.py | 4 ++-- topi/python/topi/mali/conv2d.py | 7 +++++-- 6 files changed, 21 insertions(+), 12 deletions(-) diff --git a/apps/benchmark/arm_cpu_imagenet_bench.py b/apps/benchmark/arm_cpu_imagenet_bench.py index 2d7116475bc5..89a8dc16c756 100644 --- a/apps/benchmark/arm_cpu_imagenet_bench.py +++ b/apps/benchmark/arm_cpu_imagenet_bench.py @@ -58,8 +58,10 @@ def evaluate_network(network, target, target_host, number): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--network", type=str, choices= - ['resnet-18', 'resnet-34', 'vgg-16', - 'mobilenet', 'mobilenet_v2', 'squeezenet v1.0', 'squeezenet v1.1']) + ['resnet-18', 'resnet-34', 'resnet-50', + 'vgg-16', 'vgg-19', 'densenet-121', 'inception_v3', + 'mobilenet', 'mobilenet_v2', 'squeezenet v1.1', 'squeezenet v1.1'], + help='The name of neural network') parser.add_argument("--model", type=str, choices= ['rk3399', 'mate10', 'mate10pro', 'p20', 'p20pro', 'pixel2', 'rasp3b', 'pynq'], default='rk3399', diff --git a/apps/benchmark/gpu_imagenet_bench.py b/apps/benchmark/gpu_imagenet_bench.py index 873e60f82c59..7fddebef019a 100644 --- a/apps/benchmark/gpu_imagenet_bench.py +++ b/apps/benchmark/gpu_imagenet_bench.py @@ -17,8 +17,10 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--network", type=str, choices= - ['resnet-18', 'resnet-34', 'resnet-50', 'vgg-16', 'vgg-19', - 'inception_v3', 'mobilenet', 'mobilenet_v2', 'densenet-121']) + ['resnet-18', 'resnet-34', 'resnet-50', + 'vgg-16', 'vgg-19', 'densenet-121', 'inception_v3', + 'mobilenet', 'mobilenet_v2', 'squeezenet v1.1', 'squeezenet v1.1'], + help='The name of neural network') parser.add_argument("--model", type=str, choices=['1080ti', 'titanx', 'gfx900'], default='1080ti', help="The model of the test device. If your device is not listed in " diff --git a/apps/benchmark/mobile_gpu_imagenet_bench.py b/apps/benchmark/mobile_gpu_imagenet_bench.py index 8e29fa5dab9a..7d1ecdc4d652 100644 --- a/apps/benchmark/mobile_gpu_imagenet_bench.py +++ b/apps/benchmark/mobile_gpu_imagenet_bench.py @@ -58,8 +58,10 @@ def evaluate_network(network, target, target_host, number): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--network", type=str, choices= - ['resnet-18', 'resnet-34', 'vgg-16', - 'mobilenet', 'mobilenet_v2', 'squeezenet v1.1']) + ['resnet-18', 'resnet-34', 'resnet-50', + 'vgg-16', 'vgg-19', 'densenet-121', 'inception_v3', + 'mobilenet', 'mobilenet_v2', 'squeezenet v1.1', 'squeezenet v1.1'], + help='The name of neural network') parser.add_argument("--model", type=str, choices= ['rk3399'], default='rk3399', help="The model of the test device. If your device is not listed in " @@ -67,7 +69,7 @@ def evaluate_network(network, target, target_host, number): parser.add_argument("--host", type=str, default='localhost') parser.add_argument("--port", type=int, default=9190) parser.add_argument("--rpc-key", type=str, required=True) - parser.add_argument("--number", type=int, default=10) + parser.add_argument("--number", type=int, default=20) args = parser.parse_args() dtype = 'float32' diff --git a/python/tvm/autotvm/tophub.py b/python/tvm/autotvm/tophub.py index bde706ee6cfb..4c8abc246d32 100644 --- a/python/tvm/autotvm/tophub.py +++ b/python/tvm/autotvm/tophub.py @@ -20,7 +20,7 @@ # the version of each package PACKAGE_VERSION = { - 'arm_cpu': "v0.01", + 'arm_cpu': "v0.03", 'cuda': "v0.02", 'rocm': "v0.01", diff --git a/topi/python/topi/arm_cpu/conv2d.py b/topi/python/topi/arm_cpu/conv2d.py index 6a924a4b133c..a193e9acf5cb 100644 --- a/topi/python/topi/arm_cpu/conv2d.py +++ b/topi/python/topi/arm_cpu/conv2d.py @@ -506,8 +506,8 @@ def _callback(op): ##### REGISTER ALTER OP LAYOUT ##### -@conv2d_alter_layout.register(["arm_cpu", "mali"]) -def _alter_conv2d_layout(attrs, inputs, tinfos): +@conv2d_alter_layout.register(["arm_cpu"]) +def _alter_conv2d_layout_arm(attrs, inputs, tinfos): """Alter op layout for pre-computing kernel transformation""" import nnvm.symbol as sym copy_inputs = [s for s in inputs] diff --git a/topi/python/topi/mali/conv2d.py b/topi/python/topi/mali/conv2d.py index 6bbf735af18e..26515df9bd0d 100644 --- a/topi/python/topi/mali/conv2d.py +++ b/topi/python/topi/mali/conv2d.py @@ -9,11 +9,11 @@ from ..generic import schedule_conv2d_nchw, schedule_conv2d_winograd_without_weight_transform from ..util import traverse_inline, get_const_int, get_const_tuple, const_matrix from ..nn import conv2d, conv2d_winograd_without_weight_transform, \ - get_pad_tuple, pad + get_pad_tuple, pad, conv2d_alter_layout # reuse some compute declarations from ARM CPU from ..arm_cpu.conv2d import _conv_arg_to_workload, _decl_spatial_pack,\ - _winograd_conv_arg_to_workload + _winograd_conv_arg_to_workload, _alter_conv2d_layout_arm @conv2d.register('mali') @@ -410,6 +410,9 @@ def _schedule_winograd(cfg, s, op): s[Y].compute_at(s[output], tt) +@conv2d_alter_layout.register(["mali"]) +def _alter_conv2d_layout(attrs, inputs, tinfos): + return _alter_conv2d_layout_arm(attrs, inputs, tinfos) ##### REGISTER TOPI COMPUTE / SCHEDULE FOR WINOGRAD WITH WEIGHT TRANSFORM ##### @conv2d_winograd_without_weight_transform.register(['mali']) From d4b37043b670dfe58df9bafb82cc5ebfb1394863 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sat, 29 Sep 2018 08:52:56 -0700 Subject: [PATCH 2/5] [TOPI] update benchmark --- apps/benchmark/arm_cpu_imagenet_bench.py | 4 ++-- apps/benchmark/gpu_imagenet_bench.py | 2 +- apps/benchmark/mobile_gpu_imagenet_bench.py | 2 +- python/tvm/autotvm/tophub.py | 6 ++++-- topi/python/topi/mali/conv2d.py | 5 ++++- 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/apps/benchmark/arm_cpu_imagenet_bench.py b/apps/benchmark/arm_cpu_imagenet_bench.py index 89a8dc16c756..5b666bc9d2e0 100644 --- a/apps/benchmark/arm_cpu_imagenet_bench.py +++ b/apps/benchmark/arm_cpu_imagenet_bench.py @@ -60,7 +60,7 @@ def evaluate_network(network, target, target_host, number): parser.add_argument("--network", type=str, choices= ['resnet-18', 'resnet-34', 'resnet-50', 'vgg-16', 'vgg-19', 'densenet-121', 'inception_v3', - 'mobilenet', 'mobilenet_v2', 'squeezenet v1.1', 'squeezenet v1.1'], + 'mobilenet', 'mobilenet_v2', 'squeezenet_v1.0', 'squeezenet_v1.1'], help='The name of neural network') parser.add_argument("--model", type=str, choices= ['rk3399', 'mate10', 'mate10pro', 'p20', 'p20pro', @@ -70,7 +70,7 @@ def evaluate_network(network, target, target_host, number): parser.add_argument("--host", type=str, default='localhost') parser.add_argument("--port", type=int, default=9190) parser.add_argument("--rpc-key", type=str, required=True) - parser.add_argument("--number", type=int, default=6) + parser.add_argument("--number", type=int, default=3) args = parser.parse_args() dtype = 'float32' diff --git a/apps/benchmark/gpu_imagenet_bench.py b/apps/benchmark/gpu_imagenet_bench.py index 7fddebef019a..a0eb4a055103 100644 --- a/apps/benchmark/gpu_imagenet_bench.py +++ b/apps/benchmark/gpu_imagenet_bench.py @@ -19,7 +19,7 @@ parser.add_argument("--network", type=str, choices= ['resnet-18', 'resnet-34', 'resnet-50', 'vgg-16', 'vgg-19', 'densenet-121', 'inception_v3', - 'mobilenet', 'mobilenet_v2', 'squeezenet v1.1', 'squeezenet v1.1'], + 'mobilenet', 'mobilenet_v2', 'squeezenet_v1.0', 'squeezenet_v1.1'], help='The name of neural network') parser.add_argument("--model", type=str, choices=['1080ti', 'titanx', 'gfx900'], default='1080ti', diff --git a/apps/benchmark/mobile_gpu_imagenet_bench.py b/apps/benchmark/mobile_gpu_imagenet_bench.py index 7d1ecdc4d652..7fbf39760c05 100644 --- a/apps/benchmark/mobile_gpu_imagenet_bench.py +++ b/apps/benchmark/mobile_gpu_imagenet_bench.py @@ -60,7 +60,7 @@ def evaluate_network(network, target, target_host, number): parser.add_argument("--network", type=str, choices= ['resnet-18', 'resnet-34', 'resnet-50', 'vgg-16', 'vgg-19', 'densenet-121', 'inception_v3', - 'mobilenet', 'mobilenet_v2', 'squeezenet v1.1', 'squeezenet v1.1'], + 'mobilenet', 'mobilenet_v2', 'squeezenet_v1.0', 'squeezenet_v1.1'], help='The name of neural network') parser.add_argument("--model", type=str, choices= ['rk3399'], default='rk3399', diff --git a/python/tvm/autotvm/tophub.py b/python/tvm/autotvm/tophub.py index 4c8abc246d32..0a2f8dfa3180 100644 --- a/python/tvm/autotvm/tophub.py +++ b/python/tvm/autotvm/tophub.py @@ -38,7 +38,7 @@ def _alias(name): 'vtacpu': 'vta', 'metal': 'opencl', - 'nvptx': 'cuda' + 'nvptx': 'cuda', } return table.get(name, name) @@ -61,11 +61,12 @@ def context(target, extra_files=None): if isinstance(target, str): target = _target.create(target) - possible_names = [str(target).split()[0]] + possible_names = [] for opt in target.options: if opt.startswith("-device"): device = _alias(opt[8:]) possible_names.append(device) + possible_names.append(target.target_name) all_packages = list(PACKAGE_VERSION.keys()) for name in possible_names: @@ -75,6 +76,7 @@ def context(target, extra_files=None): filename = "%s_%s.log" % (name, PACKAGE_VERSION[name]) best_context.load(os.path.join(AUTOTVM_TOPHUB_ROOT_PATH, filename)) + break # only load one file to avoid some fallback template mismatch problem if extra_files: for filename in extra_files: diff --git a/topi/python/topi/mali/conv2d.py b/topi/python/topi/mali/conv2d.py index 26515df9bd0d..d031acdd9a2b 100644 --- a/topi/python/topi/mali/conv2d.py +++ b/topi/python/topi/mali/conv2d.py @@ -412,7 +412,10 @@ def _schedule_winograd(cfg, s, op): @conv2d_alter_layout.register(["mali"]) def _alter_conv2d_layout(attrs, inputs, tinfos): - return _alter_conv2d_layout_arm(attrs, inputs, tinfos) + try: + return _alter_conv2d_layout_arm(attrs, inputs, tinfos) + except KeyError: # to filter out fallback opencl templates + return None ##### REGISTER TOPI COMPUTE / SCHEDULE FOR WINOGRAD WITH WEIGHT TRANSFORM ##### @conv2d_winograd_without_weight_transform.register(['mali']) From 4b59b4d7b5e61ad161ae88970aa35dad5a58ef23 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 2 Oct 2018 11:31:30 -0700 Subject: [PATCH 3/5] fix number --- apps/benchmark/mobile_gpu_imagenet_bench.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/benchmark/mobile_gpu_imagenet_bench.py b/apps/benchmark/mobile_gpu_imagenet_bench.py index 7fbf39760c05..a75620b3fe08 100644 --- a/apps/benchmark/mobile_gpu_imagenet_bench.py +++ b/apps/benchmark/mobile_gpu_imagenet_bench.py @@ -69,7 +69,7 @@ def evaluate_network(network, target, target_host, number): parser.add_argument("--host", type=str, default='localhost') parser.add_argument("--port", type=int, default=9190) parser.add_argument("--rpc-key", type=str, required=True) - parser.add_argument("--number", type=int, default=20) + parser.add_argument("--number", type=int, default=30) args = parser.parse_args() dtype = 'float32' From f31221ba66a38cd9b6f3462373dc34cc8b7f75f2 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 2 Oct 2018 11:38:02 -0700 Subject: [PATCH 4/5] update for mali --- python/tvm/autotvm/tophub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/autotvm/tophub.py b/python/tvm/autotvm/tophub.py index 0a2f8dfa3180..9a309fd5b338 100644 --- a/python/tvm/autotvm/tophub.py +++ b/python/tvm/autotvm/tophub.py @@ -25,7 +25,7 @@ 'cuda': "v0.02", 'rocm': "v0.01", 'opencl': "v0.01", - 'mali': "v0.01", + 'mali': "v0.02", 'vta': "v0.01", } From c5c626e4a491a59f4220c469167d5163c9f04a5b Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 2 Oct 2018 14:56:09 -0700 Subject: [PATCH 5/5] fix --- topi/tests/python/test_topi_conv2d_nchw.py | 9 ++------- topi/tests/python/test_topi_depthwise_conv2d.py | 12 ++++-------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/topi/tests/python/test_topi_conv2d_nchw.py b/topi/tests/python/test_topi_conv2d_nchw.py index f65832a14bdb..14aa0b742a8a 100644 --- a/topi/tests/python/test_topi_conv2d_nchw.py +++ b/topi/tests/python/test_topi_conv2d_nchw.py @@ -69,16 +69,11 @@ def check_device(device): np.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5) for device in get_all_backend(): - check_device(device) + with autotvm.tophub.context(device): # load tophub pre-tuned parameters + check_device(device) def test_conv2d_nchw(): - # load tophub - ctx = autotvm.apply_history_best([]) - for device in get_all_backend(): - context = autotvm.tophub.context(device) - context.__enter__() - # ResNet18 workloads verify_conv2d_nchw(1, 3, 224, 64, 7, 2, 3) verify_conv2d_nchw(1, 64, 56, 64, 3, 1, 1) diff --git a/topi/tests/python/test_topi_depthwise_conv2d.py b/topi/tests/python/test_topi_depthwise_conv2d.py index 4d3c45763dfb..b03916b9ba09 100644 --- a/topi/tests/python/test_topi_depthwise_conv2d.py +++ b/topi/tests/python/test_topi_depthwise_conv2d.py @@ -102,7 +102,8 @@ def get_ref_data(): np.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5) for device in get_all_backend(): - check_device(device) + with autotvm.tophub.context(device): # load tophub pre-tuned parameters + check_device(device) def depthwise_conv2d_with_workload_nhwc(batch, in_channel, in_height, channel_multiplier, filter_height, stride_h, padding, dilation=1): @@ -201,16 +202,11 @@ def get_ref_data(): np.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5) for device in get_all_backend(): - check_device(device) + with autotvm.tophub.context(device): # load tophub pre-tuned parameters + check_device(device) def test_depthwise_conv2d(): - # load tophub - ctx = autotvm.apply_history_best([]) - for device in get_all_backend(): - context = autotvm.tophub.context(device) - context.__enter__() - # mobilenet workloads depthwise_conv2d_with_workload_nchw(1, 32, 112, 1, 3, 1, "SAME") depthwise_conv2d_with_workload_nchw(1, 64, 112, 1, 3, 2, "SAME")