diff --git a/apps/benchmark/arm_cpu_imagenet_bench.py b/apps/benchmark/arm_cpu_imagenet_bench.py index 2d7116475bc5..5b666bc9d2e0 100644 --- a/apps/benchmark/arm_cpu_imagenet_bench.py +++ b/apps/benchmark/arm_cpu_imagenet_bench.py @@ -58,8 +58,10 @@ def evaluate_network(network, target, target_host, number): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--network", type=str, choices= - ['resnet-18', 'resnet-34', 'vgg-16', - 'mobilenet', 'mobilenet_v2', 'squeezenet v1.0', 'squeezenet v1.1']) + ['resnet-18', 'resnet-34', 'resnet-50', + 'vgg-16', 'vgg-19', 'densenet-121', 'inception_v3', + 'mobilenet', 'mobilenet_v2', 'squeezenet_v1.0', 'squeezenet_v1.1'], + help='The name of neural network') parser.add_argument("--model", type=str, choices= ['rk3399', 'mate10', 'mate10pro', 'p20', 'p20pro', 'pixel2', 'rasp3b', 'pynq'], default='rk3399', @@ -68,7 +70,7 @@ def evaluate_network(network, target, target_host, number): parser.add_argument("--host", type=str, default='localhost') parser.add_argument("--port", type=int, default=9190) parser.add_argument("--rpc-key", type=str, required=True) - parser.add_argument("--number", type=int, default=6) + parser.add_argument("--number", type=int, default=3) args = parser.parse_args() dtype = 'float32' diff --git a/apps/benchmark/gpu_imagenet_bench.py b/apps/benchmark/gpu_imagenet_bench.py index 873e60f82c59..a0eb4a055103 100644 --- a/apps/benchmark/gpu_imagenet_bench.py +++ b/apps/benchmark/gpu_imagenet_bench.py @@ -17,8 +17,10 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--network", type=str, choices= - ['resnet-18', 'resnet-34', 'resnet-50', 'vgg-16', 'vgg-19', - 'inception_v3', 'mobilenet', 'mobilenet_v2', 'densenet-121']) + ['resnet-18', 'resnet-34', 'resnet-50', + 'vgg-16', 'vgg-19', 'densenet-121', 'inception_v3', + 'mobilenet', 'mobilenet_v2', 'squeezenet_v1.0', 'squeezenet_v1.1'], + help='The name of neural network') parser.add_argument("--model", type=str, choices=['1080ti', 'titanx', 'gfx900'], default='1080ti', help="The model of the test device. If your device is not listed in " diff --git a/apps/benchmark/mobile_gpu_imagenet_bench.py b/apps/benchmark/mobile_gpu_imagenet_bench.py index 8e29fa5dab9a..a75620b3fe08 100644 --- a/apps/benchmark/mobile_gpu_imagenet_bench.py +++ b/apps/benchmark/mobile_gpu_imagenet_bench.py @@ -58,8 +58,10 @@ def evaluate_network(network, target, target_host, number): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--network", type=str, choices= - ['resnet-18', 'resnet-34', 'vgg-16', - 'mobilenet', 'mobilenet_v2', 'squeezenet v1.1']) + ['resnet-18', 'resnet-34', 'resnet-50', + 'vgg-16', 'vgg-19', 'densenet-121', 'inception_v3', + 'mobilenet', 'mobilenet_v2', 'squeezenet_v1.0', 'squeezenet_v1.1'], + help='The name of neural network') parser.add_argument("--model", type=str, choices= ['rk3399'], default='rk3399', help="The model of the test device. If your device is not listed in " @@ -67,7 +69,7 @@ def evaluate_network(network, target, target_host, number): parser.add_argument("--host", type=str, default='localhost') parser.add_argument("--port", type=int, default=9190) parser.add_argument("--rpc-key", type=str, required=True) - parser.add_argument("--number", type=int, default=10) + parser.add_argument("--number", type=int, default=30) args = parser.parse_args() dtype = 'float32' diff --git a/python/tvm/autotvm/tophub.py b/python/tvm/autotvm/tophub.py index bde706ee6cfb..9a309fd5b338 100644 --- a/python/tvm/autotvm/tophub.py +++ b/python/tvm/autotvm/tophub.py @@ -20,12 +20,12 @@ # the version of each package PACKAGE_VERSION = { - 'arm_cpu': "v0.01", + 'arm_cpu': "v0.03", 'cuda': "v0.02", 'rocm': "v0.01", 'opencl': "v0.01", - 'mali': "v0.01", + 'mali': "v0.02", 'vta': "v0.01", } @@ -38,7 +38,7 @@ def _alias(name): 'vtacpu': 'vta', 'metal': 'opencl', - 'nvptx': 'cuda' + 'nvptx': 'cuda', } return table.get(name, name) @@ -61,11 +61,12 @@ def context(target, extra_files=None): if isinstance(target, str): target = _target.create(target) - possible_names = [str(target).split()[0]] + possible_names = [] for opt in target.options: if opt.startswith("-device"): device = _alias(opt[8:]) possible_names.append(device) + possible_names.append(target.target_name) all_packages = list(PACKAGE_VERSION.keys()) for name in possible_names: @@ -75,6 +76,7 @@ def context(target, extra_files=None): filename = "%s_%s.log" % (name, PACKAGE_VERSION[name]) best_context.load(os.path.join(AUTOTVM_TOPHUB_ROOT_PATH, filename)) + break # only load one file to avoid some fallback template mismatch problem if extra_files: for filename in extra_files: diff --git a/topi/python/topi/arm_cpu/conv2d.py b/topi/python/topi/arm_cpu/conv2d.py index 6a924a4b133c..a193e9acf5cb 100644 --- a/topi/python/topi/arm_cpu/conv2d.py +++ b/topi/python/topi/arm_cpu/conv2d.py @@ -506,8 +506,8 @@ def _callback(op): ##### REGISTER ALTER OP LAYOUT ##### -@conv2d_alter_layout.register(["arm_cpu", "mali"]) -def _alter_conv2d_layout(attrs, inputs, tinfos): +@conv2d_alter_layout.register(["arm_cpu"]) +def _alter_conv2d_layout_arm(attrs, inputs, tinfos): """Alter op layout for pre-computing kernel transformation""" import nnvm.symbol as sym copy_inputs = [s for s in inputs] diff --git a/topi/python/topi/mali/conv2d.py b/topi/python/topi/mali/conv2d.py index 6bbf735af18e..d031acdd9a2b 100644 --- a/topi/python/topi/mali/conv2d.py +++ b/topi/python/topi/mali/conv2d.py @@ -9,11 +9,11 @@ from ..generic import schedule_conv2d_nchw, schedule_conv2d_winograd_without_weight_transform from ..util import traverse_inline, get_const_int, get_const_tuple, const_matrix from ..nn import conv2d, conv2d_winograd_without_weight_transform, \ - get_pad_tuple, pad + get_pad_tuple, pad, conv2d_alter_layout # reuse some compute declarations from ARM CPU from ..arm_cpu.conv2d import _conv_arg_to_workload, _decl_spatial_pack,\ - _winograd_conv_arg_to_workload + _winograd_conv_arg_to_workload, _alter_conv2d_layout_arm @conv2d.register('mali') @@ -410,6 +410,12 @@ def _schedule_winograd(cfg, s, op): s[Y].compute_at(s[output], tt) +@conv2d_alter_layout.register(["mali"]) +def _alter_conv2d_layout(attrs, inputs, tinfos): + try: + return _alter_conv2d_layout_arm(attrs, inputs, tinfos) + except KeyError: # to filter out fallback opencl templates + return None ##### REGISTER TOPI COMPUTE / SCHEDULE FOR WINOGRAD WITH WEIGHT TRANSFORM ##### @conv2d_winograd_without_weight_transform.register(['mali']) diff --git a/topi/tests/python/test_topi_conv2d_nchw.py b/topi/tests/python/test_topi_conv2d_nchw.py index f65832a14bdb..14aa0b742a8a 100644 --- a/topi/tests/python/test_topi_conv2d_nchw.py +++ b/topi/tests/python/test_topi_conv2d_nchw.py @@ -69,16 +69,11 @@ def check_device(device): np.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5) for device in get_all_backend(): - check_device(device) + with autotvm.tophub.context(device): # load tophub pre-tuned parameters + check_device(device) def test_conv2d_nchw(): - # load tophub - ctx = autotvm.apply_history_best([]) - for device in get_all_backend(): - context = autotvm.tophub.context(device) - context.__enter__() - # ResNet18 workloads verify_conv2d_nchw(1, 3, 224, 64, 7, 2, 3) verify_conv2d_nchw(1, 64, 56, 64, 3, 1, 1) diff --git a/topi/tests/python/test_topi_depthwise_conv2d.py b/topi/tests/python/test_topi_depthwise_conv2d.py index 4d3c45763dfb..b03916b9ba09 100644 --- a/topi/tests/python/test_topi_depthwise_conv2d.py +++ b/topi/tests/python/test_topi_depthwise_conv2d.py @@ -102,7 +102,8 @@ def get_ref_data(): np.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5) for device in get_all_backend(): - check_device(device) + with autotvm.tophub.context(device): # load tophub pre-tuned parameters + check_device(device) def depthwise_conv2d_with_workload_nhwc(batch, in_channel, in_height, channel_multiplier, filter_height, stride_h, padding, dilation=1): @@ -201,16 +202,11 @@ def get_ref_data(): np.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5) for device in get_all_backend(): - check_device(device) + with autotvm.tophub.context(device): # load tophub pre-tuned parameters + check_device(device) def test_depthwise_conv2d(): - # load tophub - ctx = autotvm.apply_history_best([]) - for device in get_all_backend(): - context = autotvm.tophub.context(device) - context.__enter__() - # mobilenet workloads depthwise_conv2d_with_workload_nchw(1, 32, 112, 1, 3, 1, "SAME") depthwise_conv2d_with_workload_nchw(1, 64, 112, 1, 3, 2, "SAME")