diff --git a/python/tvm/topi/adreno/utils.py b/python/tvm/topi/adreno/utils.py index 9716a62fcc7e..6be843571fb3 100644 --- a/python/tvm/topi/adreno/utils.py +++ b/python/tvm/topi/adreno/utils.py @@ -555,8 +555,13 @@ def bind_data_copy(stage, axis_to_vectorize=None): stage.vectorize(axes[-1]) else: ftc = numpy.prod(shape) - vthread = get_div(ftc, 8) fused = stage.fuse(*stage.op.axis) + if ftc % 4 == 0: + ftc = ftc / 4 + fused, vec = stage.split(fused, factor=4) + stage.vectorize(vec) + + vthread = get_div(ftc, 8) ftc = ftc / vthread # 1024 is a maximum work group size on the most Adreno GPU num_thread = get_div(ftc, 1024 // vthread) diff --git a/tests/python/relay/opencl_texture/test_injection_texture.py b/tests/python/relay/opencl_texture/test_injection_texture.py new file mode 100644 index 000000000000..6fdc088f9ed4 --- /dev/null +++ b/tests/python/relay/opencl_texture/test_injection_texture.py @@ -0,0 +1,81 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import re +import tvm +import numpy as np +from tvm import relay +from tvm.relay import testing +from tvm.contrib import utils +from utils.adreno_utils import gpu_preprocess, build_run_compare + + +dtype = tvm.testing.parameter("float32") + + +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_layout_transform_to_block_nchw4c(remote, target, dtype): + """Verification of the case NCHW->NCHW4c""" + input_shape = (1, 32, 720, 1280) + A = relay.var("data", shape=input_shape, dtype=dtype) + lt = relay.layout_transform(A, "NCHW", "NCHW4c") + mod = relay.Function([A], lt) + + build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype}, target) + + +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_layout_transform_to_block_nchw(remote, target, dtype): + """Verification of the case NCHW4c->NCHW""" + input_shape = (1, 36, 1, 1, 4) + A = relay.var("data", shape=input_shape, dtype=dtype) + lt = relay.layout_transform(A, "NCHW4c", "NCHW") + mod = relay.Function([A], lt) + + build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype}, target) + + +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_layout_transform_to_block_nhwc4c(remote, target, dtype): + """Verification of the case NHWC->NHWC4c""" + input_shape = (1, 1, 1, 144) + A = relay.var("data", shape=input_shape, dtype=dtype) + lt = relay.layout_transform(A, "NHWC", "NHWC4c") + mod = relay.Function([A], lt) + + build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype}, target) + + +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_layout_transform_to_block_nhwc(remote, target, dtype): + """Verification of the case NHWC4c->NHWC""" + input_shape = (1, 80, 80, 36, 4) + A = relay.var("data", shape=input_shape, dtype=dtype) + mean = relay.mean(A, axis=[1, 2], keepdims=True) + cast = relay.cast(mean, "float16") + lt = relay.layout_transform(cast, "NHWC4c", "NHWC") + mod = relay.Function([A], lt) + + build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype}, target) + + +if __name__ == "__main__": + test_layout_transform_to_block_nhwc(None, "opencl -device=adreno", "float16")