diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h
index f16e1e936d96..7f7f083cf303 100644
--- a/src/runtime/opencl/opencl_common.h
+++ b/src/runtime/opencl/opencl_common.h
@@ -439,9 +439,9 @@ class OpenCLTimerNode : public TimerNode {
  public:
   // Timer start
   virtual void Start() {
+    this->duration = 0;
     if (count_timer_execs == 0) {
       cl::OpenCLWorkspace::Global()->GetEventQueue(dev_).clear();
-      this->duration = 0;
       // Very first call of Start() leads to the recreation of
       // OpenCL command queue in profiling mode. This allows to run profile after inference.
       recreateCommandQueue();
diff --git a/tests/cpp-runtime/opencl/opencl_timer_test.cc b/tests/cpp-runtime/opencl/opencl_timer_test.cc
index 40ec65d8dfe2..6faf2f6a1482 100644
--- a/tests/cpp-runtime/opencl/opencl_timer_test.cc
+++ b/tests/cpp-runtime/opencl/opencl_timer_test.cc
@@ -46,6 +46,7 @@ TEST(OpenCLTimerNode, nested_timers) {
     cl_mem cl_buf = clCreateBuffer(workspace->context, CL_MEM_READ_ONLY, BUFF_SIZE * sizeof(cl_int),
                                    NULL, &err);
     OPENCL_CHECK_ERROR(err);
+    queue = workspace->GetQueue(thr->device);
     OPENCL_CALL(clEnqueueWriteBuffer(queue, cl_buf, false, 0, BUFF_SIZE * sizeof(cl_int), tmp_buf,
                                      0, NULL, &ev));
     OPENCL_CALL(clReleaseMemObject(cl_buf));
diff --git a/tests/cpp-runtime/opencl/run_gtests.cc b/tests/cpp-runtime/opencl/run_gtests.cc
index b16ae3efc74d..ffe86a7f52c0 100644
--- a/tests/cpp-runtime/opencl/run_gtests.cc
+++ b/tests/cpp-runtime/opencl/run_gtests.cc
@@ -40,7 +40,7 @@ TVM_REGISTER_GLOBAL("opencl.run_gtests").set_body([](TVMArgs args, TVMRetValue*
   argv.push_back(const_cast<char*>("opencl_run_gtests"));
 
   // add parsed arguments
-  for (int i = 0; i < parsed_args.size(); ++i) {
+  for (size_t i = 0; i < parsed_args.size(); ++i) {
     argv.push_back(const_cast<char*>(parsed_args[i].data()));
   }
 
diff --git a/tests/python/contrib/test_opencl/test_run_gtests.py b/tests/python/contrib/test_opencl/test_run_gtests.py
index 4afcf7ee8d66..ee59086b25f1 100644
--- a/tests/python/contrib/test_opencl/test_run_gtests.py
+++ b/tests/python/contrib/test_opencl/test_run_gtests.py
@@ -28,6 +28,7 @@
 # for example to run all "foo" tests twice and observe gtest output run
 # pytest -sv <this file> --gtests_args="--gtest_filter=*foo* --gtest_repeat=2"
 @tvm.testing.requires_opencl
+@pytest.mark.skipif(tvm.testing.utils.IS_IN_CI, reason="failed due to nvidia libOpencl in the CI")
 def test_run_gtests(gtest_args):
     if (
         "TVM_TRACKER_HOST" in os.environ
diff --git a/tests/python/driver/tvmc/test_compiler.py b/tests/python/driver/tvmc/test_compiler.py
index 27cd78d436c7..5535fc02249f 100644
--- a/tests/python/driver/tvmc/test_compiler.py
+++ b/tests/python/driver/tvmc/test_compiler.py
@@ -367,8 +367,9 @@ def test_compile_opencl(tflite_mobilenet_v1_0_25_128):
     tvmc_model = tvmc.load(tflite_mobilenet_v1_0_25_128)
     tvmc_package = tvmc.compile(
         tvmc_model,
-        target="opencl --host=llvm",
+        target="opencl -host=llvm",
         desired_layout="NCHW",
+        dump_code="asm",
     )
     dumps_path = tvmc_package.package_path + ".asm"
 
diff --git a/tests/python/relay/test_conv2d_nchw_texture.py b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py
similarity index 90%
rename from tests/python/relay/test_conv2d_nchw_texture.py
rename to tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py
index ab12e40b39cb..504a2b4e3ed3 100644
--- a/tests/python/relay/test_conv2d_nchw_texture.py
+++ b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py
@@ -22,13 +22,15 @@
 from tvm.relay import testing
 from tvm.contrib import utils
 from utils.adreno_utils import gpu_preprocess, build_run_compare
+import pytest
 
 
-@tvm.testing.requires_opencl
-def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad():
-    target = "opencl --device=adreno"
-    dtype = "float16"
+dtype = tvm.testing.parameter("float32")
+
 
+@tvm.testing.requires_opencl
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(target, dtype):
     input_shape = (1, 32, 42, 42)
     filter_shape = (96, 32, 3, 3)
     bias_shape = (1, 96, 1, 1)
@@ -67,10 +69,8 @@ def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(target, dtype):
     input_shape = (1, 32, 40, 40)
     filter_shape = (96, 32, 2, 2)
     bias_shape = (1, 96, 1, 1)
@@ -109,10 +109,8 @@ def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_inceptionv3_35_35_strides():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_inceptionv3_35_35_strides(target, dtype):
     input_shape = (1, 48, 35, 35)
     filter_shape = (64, 48, 5, 5)
     bias_shape = (1, 64, 1, 1)
@@ -151,10 +149,8 @@ def test_conv2d_inceptionv3_35_35_strides():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_resnet50_v2_nchw_3c():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_resnet50_v2_nchw_3c(target, dtype):
     input_shape = (1, 3, 224, 224)
     filter_shape = (64, 3, 7, 7)
     bias_shape = (1, 64, 1, 1)
@@ -194,10 +190,8 @@ def test_conv2d_resnet50_v2_nchw_3c():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_inceptionv3_nchw_3c():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_inceptionv3_nchw_3c(target, dtype):
     input_shape = (1, 3, 299, 299)
     filter_shape = (64, 3, 3, 3)
     bias_shape = (1, 64, 1, 1)
@@ -236,10 +230,8 @@ def test_conv2d_inceptionv3_nchw_3c():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_1x1_16c16spatial():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_1x1_16c16spatial(target, dtype):
     input_shape = (1, 16, 256, 256)
     filter_shape = (32, 16, 4, 4)
     bias_shape = (1, 32, 1, 1)
@@ -278,10 +270,8 @@ def test_conv2d_1x1_16c16spatial():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_4x4_16c16pad():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_4x4_16c16pad(target, dtype):
     input_shape = (1, 32, 256, 256)
     filter_shape = (32, 32, 4, 4)
     bias_shape = (1, 32, 1, 1)
@@ -320,10 +310,8 @@ def test_conv2d_4x4_16c16pad():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_4x4x4_16c16pad():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_4x4x4_16c16pad(target, dtype):
     input_shape = (1, 32, 256, 256)
     filter_shape = (4, 32, 4, 4)
     bias_shape = (1, 4, 1, 1)
@@ -362,10 +350,8 @@ def test_conv2d_4x4x4_16c16pad():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_yolov3_v2_nchw_3c():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_yolov3_v2_nchw_3c(target, dtype):
     input_shape = (1, 1024, 13, 13)
     filter_shape = (255, 1024, 1, 1)
     A = relay.var("data", shape=input_shape, dtype=dtype)
@@ -397,10 +383,8 @@ def test_conv2d_yolov3_v2_nchw_3c():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_vgg16_winograd_4d():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_vgg16_winograd_4d(target, dtype):
     input_shape = (1, 512, 28, 28)
     filter_shape = (512, 512, 3, 3)
     bias_shape = (1, 512, 1, 1)
@@ -437,7 +421,7 @@ def test_conv2d_vgg16_winograd_4d():
     stat_file = temp.relpath("stat.log")
     with open(stat_file, "w") as f:
         f.write(
-            '{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 512, 28, 28], "float16"], ["TENSOR", [512, 512, 3, 3], "float16"], [1, 1], [1, 1, 1, 1], [1, 1], "float16"], {}], "config": {"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}\n'
+            f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 512, 28, 28], "{dtype}"], ["TENSOR", [512, 512, 3, 3], "{dtype}"], [1, 1], [1, 1, 1, 1], [1, 1], "{dtype}"], {{}}], "config": {{"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}}\n'
         )
     graph = build_run_compare(
         mod, params1, {"data": input_shape}, dtype, target, stat_file=stat_file
@@ -447,10 +431,8 @@ def test_conv2d_vgg16_winograd_4d():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_winograd_conv():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_winograd_conv(target, dtype):
     input_shape = (1, 4, 3, 3)
     A = relay.var("data", shape=input_shape, dtype=dtype)
     filter_shape3 = (8, 4, 3, 3)
@@ -486,7 +468,7 @@ def test_conv2d_winograd_conv():
     stat_file = temp.relpath("stat.log")
     with open(stat_file, "w") as f:
         f.write(
-            '{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 4, 3, 3], "float16"], ["TENSOR", [8, 4, 3, 3], "float16"], [1, 1], [1, 1, 1, 1], [1, 1], "float16"], {}], "config": {"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}\n'
+            f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 4, 3, 3], "{dtype}"], ["TENSOR", [8, 4, 3, 3], "{dtype}"], [1, 1], [1, 1, 1, 1], [1, 1], "{dtype}"], {{}}], "config": {{"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}}\n'
         )
     graph = build_run_compare(
         mod, params1, {"data": input_shape}, dtype, target, stat_file=stat_file
@@ -496,7 +478,9 @@ def test_conv2d_winograd_conv():
 
 
 @tvm.testing.requires_opencl
-def test_residual_block():
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+@pytest.mark.skipif(tvm.testing.utils.IS_IN_CI, reason="failed due to nvidia libOpencl in the CI")
+def test_residual_block(target, dtype):
     """
     - some kind of residual block followed by convolution to have texture after residual block
     - scalar data type verification which should be mapped to global memory scope
@@ -515,9 +499,6 @@ def test_residual_block():
                      |                      <- buffer
                layout_transform (NCHW4c->NCHW)
     """
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
     input_shape = (1, 32, 40, 40)
     filter_shape1 = (32, 32, 2, 2)
     filter_shape2 = (32, 32, 1, 1)
@@ -555,7 +536,7 @@ def test_residual_block():
         kernel_size=(1, 1),
     )
     D = relay.op.add(conv2, D)
-    D = D * relay.const(0.15, "float16")
+    D = D * relay.const(0.15, dtype)
     D = relay.op.nn.relu(D)
 
     conv3 = relay.nn.conv2d(
@@ -607,7 +588,8 @@ def test_residual_block():
 
 
 @tvm.testing.requires_opencl
-def test_concat():
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_concat(target, dtype):
     """
         layout_transform (NCHW->NCHW4c)
                   |                      <- buffer
@@ -619,9 +601,6 @@ def test_concat():
                      |                   <- buffer
                layout_transform (NCHW4c->NCHW)
     """
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
     input_shape = (1, 32, 40, 40)
     filter_shape1 = (96, 32, 2, 2)
     filter_shape2 = (32, 96, 2, 2)
@@ -721,7 +700,8 @@ def test_concat():
 
 
 @tvm.testing.requires_opencl
-def test_pooling_branching_texture_params():
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_pooling_branching_texture_params(target, dtype):
     """
     Verification of the pooling and many branches having textures
                 layout_transform (NCHW->NCHW4c)
@@ -738,9 +718,6 @@ def test_pooling_branching_texture_params():
                              |                   <- buffer
                     layout_transform (NCHW4c->NCHW)
     """
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
     input_shape = (1, 32, 40, 40)
     filter_shape0 = (32, 32, 1, 1)
     filter_shape1 = (32, 32, 2, 2)
@@ -849,7 +826,8 @@ def test_pooling_branching_texture_params():
 
 
 @tvm.testing.requires_opencl
-def test_branching_texture_params():
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_branching_texture_params(target, dtype):
     """
     Verification of passing texture to several consumers markup of relay variables in
     primary functions + on_device
@@ -866,9 +844,6 @@ def test_branching_texture_params():
                             |                   <- buffer
                     layout_transform (NCHW4c->NCHW)
     """
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
     input_shape = (1, 32, 40, 40)
     filter_shape0 = (32, 32, 1, 1)
     filter_shape1 = (32, 32, 2, 2)
@@ -976,7 +951,8 @@ def test_branching_texture_params():
 
 # function repeat, params scope are different in reused functions
 @tvm.testing.requires_opencl
-def test_conv2d_different_lowering_same_op():
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_different_lowering_same_op(target, dtype):
     """
     Use case for verification of caching compiled functions
     Three convolutions following by each other in this case should be
@@ -993,9 +969,6 @@ def test_conv2d_different_lowering_same_op():
                          |                      <- buffer
                     layout_transform (NCHW4c->NCHW)
     """
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
     input_shape = (1, 32, 40, 40)
     filter_shape1 = (32, 32, 1, 1)
     A = relay.var("data", shape=input_shape, dtype=dtype)
diff --git a/tests/python/relay/test_conv2d_nhwc_texture.py b/tests/python/relay/opencl_texture/test_conv2d_nhwc_texture.py
similarity index 87%
rename from tests/python/relay/test_conv2d_nhwc_texture.py
rename to tests/python/relay/opencl_texture/test_conv2d_nhwc_texture.py
index cf8116c076cc..37c22137f035 100644
--- a/tests/python/relay/test_conv2d_nhwc_texture.py
+++ b/tests/python/relay/opencl_texture/test_conv2d_nhwc_texture.py
@@ -23,13 +23,15 @@
 from tvm.relay import testing
 from tvm.contrib import utils
 from utils.adreno_utils import gpu_preprocess, build_run_compare
+import pytest
 
 
-@tvm.testing.requires_opencl
-def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16():
-    target = "opencl --device=adreno"
-    dtype = "float16"
+dtype = tvm.testing.parameter("float32")
+
 
+@tvm.testing.requires_opencl
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16(target, dtype):
     input_shape = (1, 257, 257, 32)
     filter_shape = (1, 1, 32, 16)
     bias_shape = (filter_shape[-1],)
@@ -65,10 +67,8 @@ def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16_with_padding():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16_with_padding(target, dtype):
     input_shape = (1, 257, 257, 32)
     filter_shape = (1, 1, 32, 16)
     bias_shape = (filter_shape[-1],)
@@ -107,10 +107,8 @@ def test_conv2d_deeplabv3_1_257_257_32x1_1_32_16_with_padding():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_4_35_35_32x3_3_144_16():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_4_35_35_32x3_3_144_16(target, dtype):
     input_shape = (4, 35, 35, 32)
     filter_shape = (3, 3, 32, 16)
     bias_shape = (filter_shape[-1],)
@@ -147,10 +145,8 @@ def test_conv2d_4_35_35_32x3_3_144_16():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_deeplabv3_1_513_513_3x3_3_3_32():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_deeplabv3_1_513_513_3x3_3_3_32(target, dtype):
     input_shape = (1, 513, 513, 3)
     filter_shape = (3, 3, 3, 32)
     bias_shape = (filter_shape[-1],)
@@ -187,10 +183,8 @@ def test_conv2d_deeplabv3_1_513_513_3x3_3_3_32():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad(target, dtype):
     input_shape = (1, 42, 42, 32)
     filter_shape = (3, 3, 32, 96)
     bias_shape = (1, 1, 1, 96)
@@ -229,10 +223,8 @@ def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass(target, dtype):
     input_shape = (1, 40, 40, 32)
     filter_shape = (2, 2, 32, 96)
     bias_shape = (1, 1, 1, 96)
@@ -271,10 +263,8 @@ def test_conv2d_inceptionv3_64x35x35_96x64x3x3_nopad_pass():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_inceptionv3_35_35_strides():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_inceptionv3_35_35_strides(target, dtype):
     input_shape = (1, 35, 35, 48)
     filter_shape = (5, 5, 48, 64)
     bias_shape = (1, 1, 1, 64)
@@ -313,10 +303,8 @@ def test_conv2d_inceptionv3_35_35_strides():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_resnet50_v2_nhwc_3c():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_resnet50_v2_nhwc_3c(target, dtype):
     input_shape = (1, 224, 224, 3)
     filter_shape = (7, 7, 3, 64)
     bias_shape = (1, 1, 1, 64)
@@ -356,10 +344,8 @@ def test_conv2d_resnet50_v2_nhwc_3c():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_inceptionv3_nhwc_3c():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_inceptionv3_nhwc_3c(target, dtype):
     input_shape = (1, 299, 299, 3)
     filter_shape = (3, 3, 3, 64)
     bias_shape = (1, 1, 1, 64)
@@ -398,11 +384,9 @@ def test_conv2d_inceptionv3_nhwc_3c():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_1x1_16c16spatial():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
-    input_shape = (1, 256, 256, 16)
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_1x1_16c16spatial(target, dtype):
+    input_shape = (1, 128, 128, 16)
     filter_shape = (4, 4, 16, 32)
     bias_shape = (1, 1, 1, 32)
     A = relay.var("data", shape=input_shape, dtype=dtype)
@@ -440,10 +424,8 @@ def test_conv2d_1x1_16c16spatial():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_4x4_16c16pad():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_4x4_16c16pad(target, dtype):
     input_shape = (1, 256, 256, 32)
     filter_shape = (4, 4, 32, 32)
     bias_shape = (1, 1, 1, 32)
@@ -482,10 +464,8 @@ def test_conv2d_4x4_16c16pad():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_4x4x4_16c16pad():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_4x4x4_16c16pad(target, dtype):
     input_shape = (1, 256, 256, 32)
     filter_shape = (4, 4, 32, 4)
     bias_shape = (1, 1, 1, 4)
@@ -523,10 +503,8 @@ def test_conv2d_4x4x4_16c16pad():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_yolov3_v2_nhwc_3c():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_yolov3_v2_nhwc_3c(target, dtype):
     input_shape = (1, 13, 13, 1024)
     filter_shape = (1, 1, 1024, 255)
     A = relay.var("data", shape=input_shape, dtype=dtype)
@@ -558,10 +536,8 @@ def test_conv2d_yolov3_v2_nhwc_3c():
 
 
 @tvm.testing.requires_opencl
-def test_conv2d_vgg16_winograd_4d():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_vgg16_winograd_4d(target, dtype):
     input_shape = (1, 28, 28, 512)
     filter_shape = (3, 3, 512, 512)
     bias_shape = (1, 1, 1, 512)
@@ -598,7 +574,7 @@ def test_conv2d_vgg16_winograd_4d():
     stat_file = temp.relpath("stat.log")
     with open(stat_file, "w") as f:
         f.write(
-            '{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nhwc_winograd.image2d", [["TENSOR", [1, 28, 28, 512], "float16"], ["TENSOR", [3, 3, 512, 512], "float16"], [1, 1], [1, 1, 1, 1], [1, 1], "float16"], {}], "config": {"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}\n'
+            f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nhwc_winograd.image2d", [["TENSOR", [1, 28, 28, 512], "{dtype}"], ["TENSOR", [3, 3, 512, 512], "{dtype}"], [1, 1], [1, 1, 1, 1], [1, 1], "{dtype}"], {{}}], "config": {{"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}}\n'
         )
     graph = build_run_compare(
         mod, params1, {"data": input_shape}, dtype, target, stat_file=stat_file
diff --git a/tests/python/relay/test_depthwise_conv2d_nchw_texture.py b/tests/python/relay/opencl_texture/test_depthwise_conv2d_nchw_texture.py
similarity index 91%
rename from tests/python/relay/test_depthwise_conv2d_nchw_texture.py
rename to tests/python/relay/opencl_texture/test_depthwise_conv2d_nchw_texture.py
index c94d085b5115..0ac92d03b6f9 100644
--- a/tests/python/relay/test_depthwise_conv2d_nchw_texture.py
+++ b/tests/python/relay/opencl_texture/test_depthwise_conv2d_nchw_texture.py
@@ -22,12 +22,12 @@
 from tvm.relay import testing
 from utils.adreno_utils import gpu_preprocess, build_run_compare
 
+dtype = tvm.testing.parameter("float32")
 
-@tvm.testing.requires_opencl
-def test_depthwise_conv2d_bias_nchwc():
-    target = "opencl --device=adreno"
-    dtype = "float16"
 
+@tvm.testing.requires_opencl
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_depthwise_conv2d_bias_nchwc(target, dtype):
     input_shape = (1, 64, 112, 112)
     filter_shape = (64, 1, 3, 3)
     bias_shape = (1, 64, 1, 1)
@@ -68,10 +68,8 @@ def test_depthwise_conv2d_bias_nchwc():
 
 
 @tvm.testing.requires_opencl
-def test_depthwise_conv2d_nchwc():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_depthwise_conv2d_nchwc(target, dtype):
     input_shape = (1, 64, 112, 112)
     filter_shape = (64, 1, 3, 3)
     bias_shape = (1, 64, 1, 1)
@@ -107,10 +105,8 @@ def test_depthwise_conv2d_nchwc():
 
 
 @tvm.testing.requires_opencl
-def test_depthwise_conv2d_bias_nchw():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_depthwise_conv2d_bias_nchw(target, dtype):
     input_shape = (1, 64, 112, 112)
     filter_shape = (64, 1, 3, 3)
     bias_shape = (1, 64, 1, 1)
@@ -151,10 +147,8 @@ def test_depthwise_conv2d_bias_nchw():
 
 
 @tvm.testing.requires_opencl
-def test_depthwise_conv2d_repack_bias_nchw():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_depthwise_conv2d_repack_bias_nchw(target, dtype):
     input_shape = (1, 63, 112, 112)
     filter_shape = (63, 1, 3, 3)
     bias_shape = (1, 63, 1, 1)
diff --git a/tests/python/relay/test_depthwise_conv2d_nhwc_texture.py b/tests/python/relay/opencl_texture/test_depthwise_conv2d_nhwc_texture.py
similarity index 91%
rename from tests/python/relay/test_depthwise_conv2d_nhwc_texture.py
rename to tests/python/relay/opencl_texture/test_depthwise_conv2d_nhwc_texture.py
index 16f9b8749909..3af7db3a4e1f 100644
--- a/tests/python/relay/test_depthwise_conv2d_nhwc_texture.py
+++ b/tests/python/relay/opencl_texture/test_depthwise_conv2d_nhwc_texture.py
@@ -22,12 +22,12 @@
 from tvm.relay import testing
 from utils.adreno_utils import build_run_compare
 
+dtype = tvm.testing.parameter("float32")
 
-@tvm.testing.requires_opencl
-def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1():
-    target = "opencl --device=adreno"
-    dtype = "float16"
 
+@tvm.testing.requires_opencl
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1(target, dtype):
     input_shape = (1, 129, 129, 144)
     filter_shape = (3, 3, 144, 1)
     kernel_size = (filter_shape[0], filter_shape[1])
@@ -66,10 +66,8 @@ def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1():
 
 
 @tvm.testing.requires_opencl
-def test_depthwise_conv2d_deeplabv3_4_35_35_576x3_3_576_1():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_depthwise_conv2d_deeplabv3_4_35_35_576x3_3_576_1(target, dtype):
     input_shape = (4, 35, 35, 576)
     filter_shape = (3, 3, 576, 1)
     kernel_size = (filter_shape[0], filter_shape[1])
@@ -108,10 +106,8 @@ def test_depthwise_conv2d_deeplabv3_4_35_35_576x3_3_576_1():
 
 
 @tvm.testing.requires_opencl
-def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1_with_padding():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1_with_padding(target, dtype):
     input_shape = (1, 129, 129, 144)
     filter_shape = (3, 3, 144, 1)
     kernel_size = (filter_shape[0], filter_shape[1])
@@ -152,10 +148,8 @@ def test_depthwise_conv2d_deeplabv3_1_129_129_144x3_3_144_1_with_padding():
 
 
 @tvm.testing.requires_opencl
-def test_depthwise_conv2d_1_513_513_7x3_3_7_1():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_depthwise_conv2d_1_513_513_7x3_3_7_1(target, dtype):
     input_shape = (1, 513, 513, 7)
     filter_shape = (3, 3, 7, 1)
     bias_shape = (filter_shape[2],)
@@ -193,10 +187,8 @@ def test_depthwise_conv2d_1_513_513_7x3_3_7_1():
 
 
 @tvm.testing.requires_opencl
-def test_depthwise_conv2d_1_513_513_3x3_3_3_1():
-    target = "opencl --device=adreno"
-    dtype = "float16"
-
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_depthwise_conv2d_1_513_513_3x3_3_3_1(target, dtype):
     input_shape = (1, 513, 513, 3)
     filter_shape = (3, 3, 3, 1)
     bias_shape = (filter_shape[2],)
diff --git a/tests/python/relay/utils/adreno_utils.py b/tests/python/relay/opencl_texture/utils/adreno_utils.py
similarity index 100%
rename from tests/python/relay/utils/adreno_utils.py
rename to tests/python/relay/opencl_texture/utils/adreno_utils.py
diff --git a/tests/python/unittest/test_target_codegen_vulkan.py b/tests/python/unittest/test_target_codegen_vulkan.py
index 73e840208549..76cad250e053 100644
--- a/tests/python/unittest/test_target_codegen_vulkan.py
+++ b/tests/python/unittest/test_target_codegen_vulkan.py
@@ -16,6 +16,7 @@
 # under the License.
 
 import os
+from posixpath import split
 import random
 import re
 import threading
@@ -91,6 +92,8 @@ def test_array_copy(dev, dtype, fuzz_seed):
 def test_array_vectorize_add(target, dev, dtype):
     arr_size = 64
     lanes = 2
+    if "opencl" in target and dtype == "float16":
+        pytest.xfail("Opencl target does not support float16")
 
     num_thread = 8
 
diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh
index f79076e213cb..5163a16da3cd 100755
--- a/tests/scripts/task_config_build_gpu.sh
+++ b/tests/scripts/task_config_build_gpu.sh
@@ -28,6 +28,7 @@ echo set\(USE_CUDNN ON\) >> config.cmake
 echo set\(USE_CUDA ON\) >> config.cmake
 echo set\(USE_VULKAN ON\) >> config.cmake
 echo set\(USE_OPENGL ON\) >> config.cmake
+echo set\(USE_OPENCL ON\) >> config.cmake
 echo set\(USE_MICRO ON\) >> config.cmake
 echo set\(USE_MICRO_STANDALONE_RUNTIME ON\) >> config.cmake
 echo set\(USE_LLVM \"/usr/bin/llvm-config-9 --link-static\"\) >> config.cmake
diff --git a/tests/scripts/task_python_integration.sh b/tests/scripts/task_python_integration.sh
index fc7cbf3a88e7..5eac7b45ba61 100755
--- a/tests/scripts/task_python_integration.sh
+++ b/tests/scripts/task_python_integration.sh
@@ -61,12 +61,14 @@ run_pytest cython ${TVM_INTEGRATION_TESTSUITE_NAME}-dso_plugin_module-1 apps/dso
 run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-integration tests/python/integration
 
 # Ignoring Arm(R) Ethos(TM)-U NPU tests in the collective to run to run them in parallel in the next step.
-run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib tests/python/contrib --ignore=tests/python/contrib/test_ethosu --ignore=tests/python/contrib/test_cmsisnn
-
+run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-contrib tests/python/contrib --ignore=tests/python/contrib/test_ethosu --ignore=tests/python/contrib/test_cmsisnn 
 # forked is needed because the global registry gets contaminated
 TVM_TEST_TARGETS="${TVM_RELAY_TEST_TARGETS:-llvm;cuda}" \
     run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-relay tests/python/relay
 
+# OpenCL texture test. Deselected specific tests that fails  in CI
+TVM_TEST_TARGETS="${TVM_RELAY_OPENCL_TEXTURE_TARGETS:-opencl}" \
+    run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-opencl-texture tests/python/relay/opencl_texture
 # Command line driver test
 run_pytest ctypes ${TVM_INTEGRATION_TESTSUITE_NAME}-driver tests/python/driver
 
diff --git a/tests/scripts/task_python_integration_gpuonly.sh b/tests/scripts/task_python_integration_gpuonly.sh
index 3ce5571caa0e..432984c95561 100755
--- a/tests/scripts/task_python_integration_gpuonly.sh
+++ b/tests/scripts/task_python_integration_gpuonly.sh
@@ -18,9 +18,10 @@
 
 set -exo pipefail
 
-export TVM_TEST_TARGETS="cuda;opencl;metal;rocm;nvptx;opencl -device=mali,aocl_sw_emu"
+export TVM_TEST_TARGETS="cuda;opencl;metal;rocm;nvptx;opencl -device=mali,aocl_sw_emu,adreno"
 export PYTEST_ADDOPTS="-m gpu $PYTEST_ADDOPTS"
 export TVM_RELAY_TEST_TARGETS="cuda"
+export TVM_RELAY_OPENCL_TEXTURE_TARGETS="opencl -device=adreno"
 export TVM_INTEGRATION_TESTSUITE_NAME=python-integration-gpu
 export TVM_INTEGRATION_GPU_ONLY=1