diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h index f16e1e936d96..7f7f083cf303 100644 --- a/src/runtime/opencl/opencl_common.h +++ b/src/runtime/opencl/opencl_common.h @@ -439,9 +439,9 @@ class OpenCLTimerNode : public TimerNode { public: // Timer start virtual void Start() { + this->duration = 0; if (count_timer_execs == 0) { cl::OpenCLWorkspace::Global()->GetEventQueue(dev_).clear(); - this->duration = 0; // Very first call of Start() leads to the recreation of // OpenCL command queue in profiling mode. This allows to run profile after inference. recreateCommandQueue(); diff --git a/tests/cpp-runtime/opencl/opencl_timer_test.cc b/tests/cpp-runtime/opencl/opencl_timer_test.cc index 40ec65d8dfe2..6faf2f6a1482 100644 --- a/tests/cpp-runtime/opencl/opencl_timer_test.cc +++ b/tests/cpp-runtime/opencl/opencl_timer_test.cc @@ -46,6 +46,7 @@ TEST(OpenCLTimerNode, nested_timers) { cl_mem cl_buf = clCreateBuffer(workspace->context, CL_MEM_READ_ONLY, BUFF_SIZE * sizeof(cl_int), NULL, &err); OPENCL_CHECK_ERROR(err); + queue = workspace->GetQueue(thr->device); OPENCL_CALL(clEnqueueWriteBuffer(queue, cl_buf, false, 0, BUFF_SIZE * sizeof(cl_int), tmp_buf, 0, NULL, &ev)); OPENCL_CALL(clReleaseMemObject(cl_buf)); diff --git a/tests/cpp-runtime/opencl/run_gtests.cc b/tests/cpp-runtime/opencl/run_gtests.cc index b16ae3efc74d..ffe86a7f52c0 100644 --- a/tests/cpp-runtime/opencl/run_gtests.cc +++ b/tests/cpp-runtime/opencl/run_gtests.cc @@ -40,7 +40,7 @@ TVM_REGISTER_GLOBAL("opencl.run_gtests").set_body([](TVMArgs args, TVMRetValue* argv.push_back(const_cast("opencl_run_gtests")); // add parsed arguments - for (int i = 0; i < parsed_args.size(); ++i) { + for (size_t i = 0; i < parsed_args.size(); ++i) { argv.push_back(const_cast(parsed_args[i].data())); } diff --git a/tests/python/contrib/test_opencl/test_run_gtests.py b/tests/python/contrib/test_opencl/test_run_gtests.py index 4afcf7ee8d66..ee59086b25f1 100644 --- a/tests/python/contrib/test_opencl/test_run_gtests.py +++ b/tests/python/contrib/test_opencl/test_run_gtests.py @@ -28,6 +28,7 @@ # for example to run all "foo" tests twice and observe gtest output run # pytest -sv --gtests_args="--gtest_filter=*foo* --gtest_repeat=2" @tvm.testing.requires_opencl +@pytest.mark.skipif(tvm.testing.utils.IS_IN_CI, reason="failed due to nvidia libOpencl in the CI") def test_run_gtests(gtest_args): if ( "TVM_TRACKER_HOST" in os.environ diff --git a/tests/python/driver/tvmc/test_compiler.py b/tests/python/driver/tvmc/test_compiler.py index 27cd78d436c7..5535fc02249f 100644 --- a/tests/python/driver/tvmc/test_compiler.py +++ b/tests/python/driver/tvmc/test_compiler.py @@ -367,8 +367,9 @@ def test_compile_opencl(tflite_mobilenet_v1_0_25_128): tvmc_model = tvmc.load(tflite_mobilenet_v1_0_25_128) tvmc_package = tvmc.compile( tvmc_model, - target="opencl --host=llvm", + target="opencl -host=llvm", desired_layout="NCHW", + dump_code="asm", ) dumps_path = tvmc_package.package_path + ".asm" diff --git a/tests/python/relay/test_conv2d_nchw_texture.py b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py similarity index 90% rename from tests/python/relay/test_conv2d_nchw_texture.py rename to tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py index ab12e40b39cb..504a2b4e3ed3 100644 --- a/tests/python/relay/test_conv2d_nchw_texture.py +++ b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py @@ -22,13 +22,15 @@ from tvm.relay import testing from tvm.contrib import utils from utils.adreno_utils import gpu_preprocess, build_run_compare +import pytest -@tvm.testing.requires_opencl -def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(): - target = "opencl --device=adreno" - dtype = "float16" +dtype = tvm.testing.parameter("float32") + +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(target, dtype): input_shape = (1, 32, 42, 42) filter_shape = (96, 32, 3, 3) bias_shape = (1, 96, 1, 1) @@ -67,10 +69,8 @@ def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(): @tvm.testing.requires_opencl -def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(target, dtype): input_shape = (1, 32, 40, 40) filter_shape = (96, 32, 2, 2) bias_shape = (1, 96, 1, 1) @@ -109,10 +109,8 @@ def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(): @tvm.testing.requires_opencl -def test_conv2d_inceptionv3_35_35_strides(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_inceptionv3_35_35_strides(target, dtype): input_shape = (1, 48, 35, 35) filter_shape = (64, 48, 5, 5) bias_shape = (1, 64, 1, 1) @@ -151,10 +149,8 @@ def test_conv2d_inceptionv3_35_35_strides(): @tvm.testing.requires_opencl -def test_conv2d_resnet50_v2_nchw_3c(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_resnet50_v2_nchw_3c(target, dtype): input_shape = (1, 3, 224, 224) filter_shape = (64, 3, 7, 7) bias_shape = (1, 64, 1, 1) @@ -194,10 +190,8 @@ def test_conv2d_resnet50_v2_nchw_3c(): @tvm.testing.requires_opencl -def test_conv2d_inceptionv3_nchw_3c(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_inceptionv3_nchw_3c(target, dtype): input_shape = (1, 3, 299, 299) filter_shape = (64, 3, 3, 3) bias_shape = (1, 64, 1, 1) @@ -236,10 +230,8 @@ def test_conv2d_inceptionv3_nchw_3c(): @tvm.testing.requires_opencl -def test_conv2d_1x1_16c16spatial(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_1x1_16c16spatial(target, dtype): input_shape = (1, 16, 256, 256) filter_shape = (32, 16, 4, 4) bias_shape = (1, 32, 1, 1) @@ -278,10 +270,8 @@ def test_conv2d_1x1_16c16spatial(): @tvm.testing.requires_opencl -def test_conv2d_4x4_16c16pad(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_4x4_16c16pad(target, dtype): input_shape = (1, 32, 256, 256) filter_shape = (32, 32, 4, 4) bias_shape = (1, 32, 1, 1) @@ -320,10 +310,8 @@ def test_conv2d_4x4_16c16pad(): @tvm.testing.requires_opencl -def test_conv2d_4x4x4_16c16pad(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_4x4x4_16c16pad(target, dtype): input_shape = (1, 32, 256, 256) filter_shape = (4, 32, 4, 4) bias_shape = (1, 4, 1, 1) @@ -362,10 +350,8 @@ def test_conv2d_4x4x4_16c16pad(): @tvm.testing.requires_opencl -def test_conv2d_yolov3_v2_nchw_3c(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_yolov3_v2_nchw_3c(target, dtype): input_shape = (1, 1024, 13, 13) filter_shape = (255, 1024, 1, 1) A = relay.var("data", shape=input_shape, dtype=dtype) @@ -397,10 +383,8 @@ def test_conv2d_yolov3_v2_nchw_3c(): @tvm.testing.requires_opencl -def test_conv2d_vgg16_winograd_4d(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_vgg16_winograd_4d(target, dtype): input_shape = (1, 512, 28, 28) filter_shape = (512, 512, 3, 3) bias_shape = (1, 512, 1, 1) @@ -437,7 +421,7 @@ def test_conv2d_vgg16_winograd_4d(): stat_file = temp.relpath("stat.log") with open(stat_file, "w") as f: f.write( - '{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 512, 28, 28], "float16"], ["TENSOR", [512, 512, 3, 3], "float16"], [1, 1], [1, 1, 1, 1], [1, 1], "float16"], {}], "config": {"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}\n' + f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 512, 28, 28], "{dtype}"], ["TENSOR", [512, 512, 3, 3], "{dtype}"], [1, 1], [1, 1, 1, 1], [1, 1], "{dtype}"], {{}}], "config": {{"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}}\n' ) graph = build_run_compare( mod, params1, {"data": input_shape}, dtype, target, stat_file=stat_file @@ -447,10 +431,8 @@ def test_conv2d_vgg16_winograd_4d(): @tvm.testing.requires_opencl -def test_conv2d_winograd_conv(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_winograd_conv(target, dtype): input_shape = (1, 4, 3, 3) A = relay.var("data", shape=input_shape, dtype=dtype) filter_shape3 = (8, 4, 3, 3) @@ -486,7 +468,7 @@ def test_conv2d_winograd_conv(): stat_file = temp.relpath("stat.log") with open(stat_file, "w") as f: f.write( - '{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 4, 3, 3], "float16"], ["TENSOR", [8, 4, 3, 3], "float16"], [1, 1], [1, 1, 1, 1], [1, 1], "float16"], {}], "config": {"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}\n' + f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 4, 3, 3], "{dtype}"], ["TENSOR", [8, 4, 3, 3], "{dtype}"], [1, 1], [1, 1, 1, 1], [1, 1], "{dtype}"], {{}}], "config": {{"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}}\n' ) graph = build_run_compare( mod, params1, {"data": input_shape}, dtype, target, stat_file=stat_file @@ -496,7 +478,9 @@ def test_conv2d_winograd_conv(): @tvm.testing.requires_opencl -def test_residual_block(): +@tvm.testing.parametrize_targets("opencl -device=adreno") +@pytest.mark.skipif(tvm.testing.utils.IS_IN_CI, reason="failed due to nvidia libOpencl in the CI") +def test_residual_block(target, dtype): """ - some kind of residual block followed by convolution to have texture after residual block - scalar data type verification which should be mapped to global memory scope @@ -515,9 +499,6 @@ def test_residual_block(): | <- buffer layout_transform (NCHW4c->NCHW) """ - target = "opencl --device=adreno" - dtype = "float16" - input_shape = (1, 32, 40, 40) filter_shape1 = (32, 32, 2, 2) filter_shape2 = (32, 32, 1, 1) @@ -555,7 +536,7 @@ def test_residual_block(): kernel_size=(1, 1), ) D = relay.op.add(conv2, D) - D = D * relay.const(0.15, "float16") + D = D * relay.const(0.15, dtype) D = relay.op.nn.relu(D) conv3 = relay.nn.conv2d( @@ -607,7 +588,8 @@ def test_residual_block(): @tvm.testing.requires_opencl -def test_concat(): +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_concat(target, dtype): """ layout_transform (NCHW->NCHW4c) | <- buffer @@ -619,9 +601,6 @@ def test_concat(): | <- buffer layout_transform (NCHW4c->NCHW) """ - target = "opencl --device=adreno" - dtype = "float16" - input_shape = (1, 32, 40, 40) filter_shape1 = (96, 32, 2, 2) filter_shape2 = (32, 96, 2, 2) @@ -721,7 +700,8 @@ def test_concat(): @tvm.testing.requires_opencl -def test_pooling_branching_texture_params(): +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_pooling_branching_texture_params(target, dtype): """ Verification of the pooling and many branches having textures layout_transform (NCHW->NCHW4c) @@ -738,9 +718,6 @@ def test_pooling_branching_texture_params(): | <- buffer layout_transform (NCHW4c->NCHW) """ - target = "opencl --device=adreno" - dtype = "float16" - input_shape = (1, 32, 40, 40) filter_shape0 = (32, 32, 1, 1) filter_shape1 = (32, 32, 2, 2) @@ -849,7 +826,8 @@ def test_pooling_branching_texture_params(): @tvm.testing.requires_opencl -def test_branching_texture_params(): +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_branching_texture_params(target, dtype): """ Verification of passing texture to several consumers markup of relay variables in primary functions + on_device @@ -866,9 +844,6 @@ def test_branching_texture_params(): | <- buffer layout_transform (NCHW4c->NCHW) """ - target = "opencl --device=adreno" - dtype = "float16" - input_shape = (1, 32, 40, 40) filter_shape0 = (32, 32, 1, 1) filter_shape1 = (32, 32, 2, 2) @@ -976,7 +951,8 @@ def test_branching_texture_params(): # function repeat, params scope are different in reused functions @tvm.testing.requires_opencl -def test_conv2d_different_lowering_same_op(): +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_different_lowering_same_op(target, dtype): """ Use case for verification of caching compiled functions Three convolutions following by each other in this case should be @@ -993,9 +969,6 @@ def test_conv2d_different_lowering_same_op(): | <- buffer layout_transform (NCHW4c->NCHW) """ - target = "opencl --device=adreno" - dtype = "float16" - input_shape = (1, 32, 40, 40) filter_shape1 = (32, 32, 1, 1) A = relay.var("data", shape=input_shape, dtype=dtype) diff --git a/tests/python/relay/test_conv2d_nhwc_texture.py b/tests/python/relay/opencl_texture/test_conv2d_nhwc_texture.py similarity index 87% rename from tests/python/relay/test_conv2d_nhwc_texture.py rename to tests/python/relay/opencl_texture/test_conv2d_nhwc_texture.py index cf8116c076cc..37c22137f035 100644 --- a/tests/python/relay/test_conv2d_nhwc_texture.py +++ b/tests/python/relay/opencl_texture/test_conv2d_nhwc_texture.py @@ -23,13 +23,15 @@ from tvm.relay import testing from tvm.contrib import utils from utils.adreno_utils import gpu_preprocess, build_run_compare +import pytest -@tvm.testing.requires_opencl -def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16(): - target = "opencl --device=adreno" - dtype = "float16" +dtype = tvm.testing.parameter("float32") + +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16(target, dtype): input_shape = (1, 257, 257, 32) filter_shape = (1, 1, 32, 16) bias_shape = (filter_shape[-1],) @@ -65,10 +67,8 @@ def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16(): @tvm.testing.requires_opencl -def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16_with_padding(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16_with_padding(target, dtype): input_shape = (1, 257, 257, 32) filter_shape = (1, 1, 32, 16) bias_shape = (filter_shape[-1],) @@ -107,10 +107,8 @@ def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16_with_padding(): @tvm.testing.requires_opencl -def test_conv2d_4_35_35_32x3_3_144_16(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_4_35_35_32x3_3_144_16(target, dtype): input_shape = (4, 35, 35, 32) filter_shape = (3, 3, 32, 16) bias_shape = (filter_shape[-1],) @@ -147,10 +145,8 @@ def test_conv2d_4_35_35_32x3_3_144_16(): @tvm.testing.requires_opencl -def test_conv2d_deeplabv3_1_513_513_3x3_3_3_32(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_deeplabv3_1_513_513_3x3_3_3_32(target, dtype): input_shape = (1, 513, 513, 3) filter_shape = (3, 3, 3, 32) bias_shape = (filter_shape[-1],) @@ -187,10 +183,8 @@ def test_conv2d_deeplabv3_1_513_513_3x3_3_3_32(): @tvm.testing.requires_opencl -def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(target, dtype): input_shape = (1, 42, 42, 32) filter_shape = (3, 3, 32, 96) bias_shape = (1, 1, 1, 96) @@ -229,10 +223,8 @@ def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(): @tvm.testing.requires_opencl -def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(target, dtype): input_shape = (1, 40, 40, 32) filter_shape = (2, 2, 32, 96) bias_shape = (1, 1, 1, 96) @@ -271,10 +263,8 @@ def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(): @tvm.testing.requires_opencl -def test_conv2d_inceptionv3_35_35_strides(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_inceptionv3_35_35_strides(target, dtype): input_shape = (1, 35, 35, 48) filter_shape = (5, 5, 48, 64) bias_shape = (1, 1, 1, 64) @@ -313,10 +303,8 @@ def test_conv2d_inceptionv3_35_35_strides(): @tvm.testing.requires_opencl -def test_conv2d_resnet50_v2_nhwc_3c(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_resnet50_v2_nhwc_3c(target, dtype): input_shape = (1, 224, 224, 3) filter_shape = (7, 7, 3, 64) bias_shape = (1, 1, 1, 64) @@ -356,10 +344,8 @@ def test_conv2d_resnet50_v2_nhwc_3c(): @tvm.testing.requires_opencl -def test_conv2d_inceptionv3_nhwc_3c(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_inceptionv3_nhwc_3c(target, dtype): input_shape = (1, 299, 299, 3) filter_shape = (3, 3, 3, 64) bias_shape = (1, 1, 1, 64) @@ -398,11 +384,9 @@ def test_conv2d_inceptionv3_nhwc_3c(): @tvm.testing.requires_opencl -def test_conv2d_1x1_16c16spatial(): - target = "opencl --device=adreno" - dtype = "float16" - - input_shape = (1, 256, 256, 16) +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_1x1_16c16spatial(target, dtype): + input_shape = (1, 128, 128, 16) filter_shape = (4, 4, 16, 32) bias_shape = (1, 1, 1, 32) A = relay.var("data", shape=input_shape, dtype=dtype) @@ -440,10 +424,8 @@ def test_conv2d_1x1_16c16spatial(): @tvm.testing.requires_opencl -def test_conv2d_4x4_16c16pad(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_4x4_16c16pad(target, dtype): input_shape = (1, 256, 256, 32) filter_shape = (4, 4, 32, 32) bias_shape = (1, 1, 1, 32) @@ -482,10 +464,8 @@ def test_conv2d_4x4_16c16pad(): @tvm.testing.requires_opencl -def test_conv2d_4x4x4_16c16pad(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_4x4x4_16c16pad(target, dtype): input_shape = (1, 256, 256, 32) filter_shape = (4, 4, 32, 4) bias_shape = (1, 1, 1, 4) @@ -523,10 +503,8 @@ def test_conv2d_4x4x4_16c16pad(): @tvm.testing.requires_opencl -def test_conv2d_yolov3_v2_nhwc_3c(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_yolov3_v2_nhwc_3c(target, dtype): input_shape = (1, 13, 13, 1024) filter_shape = (1, 1, 1024, 255) A = relay.var("data", shape=input_shape, dtype=dtype) @@ -558,10 +536,8 @@ def test_conv2d_yolov3_v2_nhwc_3c(): @tvm.testing.requires_opencl -def test_conv2d_vgg16_winograd_4d(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_vgg16_winograd_4d(target, dtype): input_shape = (1, 28, 28, 512) filter_shape = (3, 3, 512, 512) bias_shape = (1, 1, 1, 512) @@ -598,7 +574,7 @@ def test_conv2d_vgg16_winograd_4d(): stat_file = temp.relpath("stat.log") with open(stat_file, "w") as f: f.write( - '{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nhwc_winograd.image2d", [["TENSOR", [1, 28, 28, 512], "float16"], ["TENSOR", [3, 3, 512, 512], "float16"], [1, 1], [1, 1, 1, 1], [1, 1], "float16"], {}], "config": {"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}\n' + f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nhwc_winograd.image2d", [["TENSOR", [1, 28, 28, 512], "{dtype}"], ["TENSOR", [3, 3, 512, 512], "{dtype}"], [1, 1], [1, 1, 1, 1], [1, 1], "{dtype}"], {{}}], "config": {{"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}}\n' ) graph = build_run_compare( mod, params1, {"data": input_shape}, dtype, target, stat_file=stat_file diff --git a/tests/python/relay/test_depthwise_conv2d_nchw_texture.py b/tests/python/relay/opencl_texture/test_depthwise_conv2d_nchw_texture.py similarity index 91% rename from tests/python/relay/test_depthwise_conv2d_nchw_texture.py rename to tests/python/relay/opencl_texture/test_depthwise_conv2d_nchw_texture.py index c94d085b5115..0ac92d03b6f9 100644 --- a/tests/python/relay/test_depthwise_conv2d_nchw_texture.py +++ b/tests/python/relay/opencl_texture/test_depthwise_conv2d_nchw_texture.py @@ -22,12 +22,12 @@ from tvm.relay import testing from utils.adreno_utils import gpu_preprocess, build_run_compare +dtype = tvm.testing.parameter("float32") -@tvm.testing.requires_opencl -def test_depthwise_conv2d_bias_nchwc(): - target = "opencl --device=adreno" - dtype = "float16" +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_depthwise_conv2d_bias_nchwc(target, dtype): input_shape = (1, 64, 112, 112) filter_shape = (64, 1, 3, 3) bias_shape = (1, 64, 1, 1) @@ -68,10 +68,8 @@ def test_depthwise_conv2d_bias_nchwc(): @tvm.testing.requires_opencl -def test_depthwise_conv2d_nchwc(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_depthwise_conv2d_nchwc(target, dtype): input_shape = (1, 64, 112, 112) filter_shape = (64, 1, 3, 3) bias_shape = (1, 64, 1, 1) @@ -107,10 +105,8 @@ def test_depthwise_conv2d_nchwc(): @tvm.testing.requires_opencl -def test_depthwise_conv2d_bias_nchw(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_depthwise_conv2d_bias_nchw(target, dtype): input_shape = (1, 64, 112, 112) filter_shape = (64, 1, 3, 3) bias_shape = (1, 64, 1, 1) @@ -151,10 +147,8 @@ def test_depthwise_conv2d_bias_nchw(): @tvm.testing.requires_opencl -def test_depthwise_conv2d_repack_bias_nchw(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_depthwise_conv2d_repack_bias_nchw(target, dtype): input_shape = (1, 63, 112, 112) filter_shape = (63, 1, 3, 3) bias_shape = (1, 63, 1, 1) diff --git a/tests/python/relay/test_depthwise_conv2d_nhwc_texture.py b/tests/python/relay/opencl_texture/test_depthwise_conv2d_nhwc_texture.py similarity index 91% rename from tests/python/relay/test_depthwise_conv2d_nhwc_texture.py rename to tests/python/relay/opencl_texture/test_depthwise_conv2d_nhwc_texture.py index 16f9b8749909..3af7db3a4e1f 100644 --- a/tests/python/relay/test_depthwise_conv2d_nhwc_texture.py +++ b/tests/python/relay/opencl_texture/test_depthwise_conv2d_nhwc_texture.py @@ -22,12 +22,12 @@ from tvm.relay import testing from utils.adreno_utils import build_run_compare +dtype = tvm.testing.parameter("float32") -@tvm.testing.requires_opencl -def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1(): - target = "opencl --device=adreno" - dtype = "float16" +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1(target, dtype): input_shape = (1, 129, 129, 144) filter_shape = (3, 3, 144, 1) kernel_size = (filter_shape[0], filter_shape[1]) @@ -66,10 +66,8 @@ def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1(): @tvm.testing.requires_opencl -def test_depthwise_conv2d_deeplabv3_4_35_35_576x3_3_576_1(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_depthwise_conv2d_deeplabv3_4_35_35_576x3_3_576_1(target, dtype): input_shape = (4, 35, 35, 576) filter_shape = (3, 3, 576, 1) kernel_size = (filter_shape[0], filter_shape[1]) @@ -108,10 +106,8 @@ def test_depthwise_conv2d_deeplabv3_4_35_35_576x3_3_576_1(): @tvm.testing.requires_opencl -def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1_with_padding(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1_with_padding(target, dtype): input_shape = (1, 129, 129, 144) filter_shape = (3, 3, 144, 1) kernel_size = (filter_shape[0], filter_shape[1]) @@ -152,10 +148,8 @@ def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1_with_padding(): @tvm.testing.requires_opencl -def test_depthwise_conv2d_1_513_513_7x3_3_7_1(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_depthwise_conv2d_1_513_513_7x3_3_7_1(target, dtype): input_shape = (1, 513, 513, 7) filter_shape = (3, 3, 7, 1) bias_shape = (filter_shape[2],) @@ -193,10 +187,8 @@ def test_depthwise_conv2d_1_513_513_7x3_3_7_1(): @tvm.testing.requires_opencl -def test_depthwise_conv2d_1_513_513_3x3_3_3_1(): - target = "opencl --device=adreno" - dtype = "float16" - +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_depthwise_conv2d_1_513_513_3x3_3_3_1(target, dtype): input_shape = (1, 513, 513, 3) filter_shape = (3, 3, 3, 1) bias_shape = (filter_shape[2],) diff --git a/tests/python/relay/utils/adreno_utils.py b/tests/python/relay/opencl_texture/utils/adreno_utils.py similarity index 100% rename from tests/python/relay/utils/adreno_utils.py rename to tests/python/relay/opencl_texture/utils/adreno_utils.py diff --git a/tests/python/unittest/test_target_codegen_vulkan.py b/tests/python/unittest/test_target_codegen_vulkan.py index 73e840208549..76cad250e053 100644 --- a/tests/python/unittest/test_target_codegen_vulkan.py +++ b/tests/python/unittest/test_target_codegen_vulkan.py @@ -16,6 +16,7 @@ # under the License. import os +from posixpath import split import random import re import threading @@ -91,6 +92,8 @@ def test_array_copy(dev, dtype, fuzz_seed): def test_array_vectorize_add(target, dev, dtype): arr_size = 64 lanes = 2 + if "opencl" in target and dtype == "float16": + pytest.xfail("Opencl target does not support float16") num_thread = 8 diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh index f79076e213cb..5163a16da3cd 100755 --- a/tests/scripts/task_config_build_gpu.sh +++ b/tests/scripts/task_config_build_gpu.sh @@ -28,6 +28,7 @@ echo set\(USE_CUDNN ON\) >> config.cmake echo set\(USE_CUDA ON\) >> config.cmake echo set\(USE_VULKAN ON\) >> config.cmake echo set\(USE_OPENGL ON\) >> config.cmake +echo set\(USE_OPENCL ON\) >> config.cmake echo set\(USE_MICRO ON\) >> config.cmake echo set\(USE_MICRO_STANDALONE_RUNTIME ON\) >> config.cmake echo set\(USE_LLVM \"/usr/bin/llvm-config-9 --link-static\"\) >> config.cmake diff --git a/tests/scripts/task_python_integration.sh b/tests/scripts/task_python_integration.sh index fc7cbf3a88e7..5eac7b45ba61 100755 --- a/tests/scripts/task_python_integration.sh +++ b/tests/scripts/task_python_integration.sh @@ -61,12 +61,14 @@ run_pytest cython ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module-1 apps/dso run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-integration tests/python/integration # Ignoring Arm(R) Ethos(TM)-U NPU tests in the collective to run to run them in parallel in the next step. -run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib tests/python/contrib --ignore=tests/python/contrib/test_ethosu --ignore=tests/python/contrib/test_cmsisnn - +run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib tests/python/contrib --ignore=tests/python/contrib/test_ethosu --ignore=tests/python/contrib/test_cmsisnn # forked is needed because the global registry gets contaminated TVM_TEST_TARGETS="${TVM_RELAY_TEST_TARGETS:-llvm;cuda}" \ run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-relay tests/python/relay +# OpenCL texture test. Deselected specific tests that fails in CI +TVM_TEST_TARGETS="${TVM_RELAY_OPENCL_TEXTURE_TARGETS:-opencl}" \ + run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-opencl-texture tests/python/relay/opencl_texture # Command line driver test run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-driver tests/python/driver diff --git a/tests/scripts/task_python_integration_gpuonly.sh b/tests/scripts/task_python_integration_gpuonly.sh index 3ce5571caa0e..432984c95561 100755 --- a/tests/scripts/task_python_integration_gpuonly.sh +++ b/tests/scripts/task_python_integration_gpuonly.sh @@ -18,9 +18,10 @@ set -exo pipefail -export TVM_TEST_TARGETS="cuda;opencl;metal;rocm;nvptx;opencl -device=mali,aocl_sw_emu" +export TVM_TEST_TARGETS="cuda;opencl;metal;rocm;nvptx;opencl -device=mali,aocl_sw_emu,adreno" export PYTEST_ADDOPTS="-m gpu $PYTEST_ADDOPTS" export TVM_RELAY_TEST_TARGETS="cuda" +export TVM_RELAY_OPENCL_TEXTURE_TARGETS="opencl -device=adreno" export TVM_INTEGRATION_TESTSUITE_NAME=python-integration-gpu export TVM_INTEGRATION_GPU_ONLY=1