From fa3fbc18a85007e48ede1c9d650cc9a7294df1a8 Mon Sep 17 00:00:00 2001 From: Andrey Malyshev Date: Fri, 13 Jan 2023 23:43:29 +0200 Subject: [PATCH 1/2] [Adreno] fix injective schedule with textures on 4d output --- python/tvm/topi/adreno/utils.py | 7 +- .../opencl_texture/test_injection_texture.py | 80 +++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 tests/python/relay/opencl_texture/test_injection_texture.py diff --git a/python/tvm/topi/adreno/utils.py b/python/tvm/topi/adreno/utils.py index 9716a62fcc7e..6be843571fb3 100644 --- a/python/tvm/topi/adreno/utils.py +++ b/python/tvm/topi/adreno/utils.py @@ -555,8 +555,13 @@ def bind_data_copy(stage, axis_to_vectorize=None): stage.vectorize(axes[-1]) else: ftc = numpy.prod(shape) - vthread = get_div(ftc, 8) fused = stage.fuse(*stage.op.axis) + if ftc % 4 == 0: + ftc = ftc / 4 + fused, vec = stage.split(fused, factor=4) + stage.vectorize(vec) + + vthread = get_div(ftc, 8) ftc = ftc / vthread # 1024 is a maximum work group size on the most Adreno GPU num_thread = get_div(ftc, 1024 // vthread) diff --git a/tests/python/relay/opencl_texture/test_injection_texture.py b/tests/python/relay/opencl_texture/test_injection_texture.py new file mode 100644 index 000000000000..70e74039b725 --- /dev/null +++ b/tests/python/relay/opencl_texture/test_injection_texture.py @@ -0,0 +1,80 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import re +import tvm +import numpy as np +from tvm import relay +from tvm.relay import testing +from tvm.contrib import utils +from utils.adreno_utils import gpu_preprocess, build_run_compare + + +dtype = tvm.testing.parameter("float32") + + +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_layout_transform_to_block_nchw4c(remote, target, dtype): + """Verification of the case NCHW->NCHW4c""" + input_shape = (1, 32, 720, 1280) + A = relay.var("data", shape=input_shape, dtype=dtype) + lt = relay.layout_transform(A, "NCHW", "NCHW4c") + mod = relay.Function([A], lt) + + build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype}, target) + + +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_layout_transform_to_block_nchw(remote, target, dtype): + """Verification of the case NCHW4c->NCHW""" + input_shape = (1, 36, 1, 1, 4) + A = relay.var("data", shape=input_shape, dtype=dtype) + lt = relay.layout_transform(A, "NCHW4c", "NCHW") + mod = relay.Function([A], lt) + + build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype}, target) + + +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_layout_transform_to_block_nhwc4c(remote, target, dtype): + """Verification of the case NHWC->NHWC4c""" + input_shape = (1, 1, 1, 144) + A = relay.var("data", shape=input_shape, dtype=dtype) + lt = relay.layout_transform(A, "NHWC", "NHWC4c") + mod = relay.Function([A], lt) + + build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype}, target) + + +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_layout_transform_to_block_nhwc(remote, target, dtype): + """Verification of the case NHWC4c->NHWC""" + input_shape = (1, 80, 80, 36, 4) + A = relay.var("data", shape=input_shape, dtype=dtype) + mean = relay.mean(A, axis=[1,2], keepdims=True) + cast = relay.cast(mean, "float16") + lt = relay.layout_transform(cast, "NHWC4c", "NHWC") + mod = relay.Function([A], lt) + + build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype}, target) + +if __name__ == "__main__": + test_layout_transform_to_block_nhwc(None, "opencl -device=adreno", "float16") From 2e9a37113de76c03d0ce6f727eace5fc32c3a18a Mon Sep 17 00:00:00 2001 From: Andrey Malyshev Date: Sat, 14 Jan 2023 00:05:38 +0200 Subject: [PATCH 2/2] fix llint --- tests/python/relay/opencl_texture/test_injection_texture.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/python/relay/opencl_texture/test_injection_texture.py b/tests/python/relay/opencl_texture/test_injection_texture.py index 70e74039b725..6fdc088f9ed4 100644 --- a/tests/python/relay/opencl_texture/test_injection_texture.py +++ b/tests/python/relay/opencl_texture/test_injection_texture.py @@ -69,12 +69,13 @@ def test_layout_transform_to_block_nhwc(remote, target, dtype): """Verification of the case NHWC4c->NHWC""" input_shape = (1, 80, 80, 36, 4) A = relay.var("data", shape=input_shape, dtype=dtype) - mean = relay.mean(A, axis=[1,2], keepdims=True) + mean = relay.mean(A, axis=[1, 2], keepdims=True) cast = relay.cast(mean, "float16") lt = relay.layout_transform(cast, "NHWC4c", "NHWC") mod = relay.Function([A], lt) build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype}, target) + if __name__ == "__main__": test_layout_transform_to_block_nhwc(None, "opencl -device=adreno", "float16")