From 93428a9b17e6fa854b4a5c1e8180c110f511925a Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Fri, 20 May 2022 12:36:55 -0500 Subject: [PATCH 1/9] Implement avg_pool2d slice op --- python/tvm/topi/hexagon/slice_ops/__init__.py | 22 + .../tvm/topi/hexagon/slice_ops/avg_pool2d.py | 198 ++++++++ python/tvm/topi/hexagon/utils.py | 75 +++ .../contrib/test_hexagon/infrastructure.py | 18 +- .../test_hexagon/test_avg_pool2d_slice.py | 434 ++++++++++++++++++ 5 files changed, 745 insertions(+), 2 deletions(-) create mode 100644 python/tvm/topi/hexagon/slice_ops/__init__.py create mode 100644 python/tvm/topi/hexagon/slice_ops/avg_pool2d.py create mode 100644 python/tvm/topi/hexagon/utils.py create mode 100644 tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py diff --git a/python/tvm/topi/hexagon/slice_ops/__init__.py b/python/tvm/topi/hexagon/slice_ops/__init__.py new file mode 100644 index 000000000000..e9d4d00c4285 --- /dev/null +++ b/python/tvm/topi/hexagon/slice_ops/__init__.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" Computes and Schedules for Hexagon slice ops. """ + +# pylint: disable=wildcard-import + +from .avg_pool2d import avg_pool2d_compute, avg_pool2d_schedule, avg_pool2d_STIR_schedule diff --git a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py new file mode 100644 index 000000000000..ef87677c7e5d --- /dev/null +++ b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py @@ -0,0 +1,198 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from tvm.ir.module import IRModule +from tvm import te +from tvm import tir +from tvm.script import tir as T +from ..utils import apply_transform, get_layout_transform_fn + + +# The slice op implementation for avg_pool2d makes serveral assumptions: +# 1) Both input and output are a multiple of croutons, and the input is already +# padded for a given output shape as per any crouton and non-crouton related +# padding. +# 2) The current implementation assumes 'count_include_pad' to be 'True'. It can +# modified to support 'False' but the element count for the pooling window must +# be pre-computed and provided as an input to reduce the run-time overhead. +# 3) 'padding' is also ignored. It must be handled outside of the sliced op. +# 4) Please note that this implementation will not work if the output was padded +# for the croutons. Since we loop over the logical output shape, this can result +# into out-of-bound access for the input. + +def avg_pool2d_compute(A, out_shape, kernel, stride, dilation): + kh, kw = kernel + rh = te.reduce_axis((0, kh), name="rh") + rw = te.reduce_axis((0, kw), name="rw") + ob, oh, ow, oc = out_shape + sh, sw = stride + dh, dw = dilation + Area = float(1) / (kh * kw) + + Sum = te.compute( + out_shape, + lambda b, h, w, c: te.sum( + A[b, h * sh + dh * rh, w * sw + dw * rw, c].astype("float32"), axis=[rh, rw] + ), + name="sum", + ) + Avg = te.compute( + out_shape, lambda b, h, w, c: (Sum[b, h, w, c] * Area).astype(A.dtype), name="avg" + ) + return Avg + + +# Schedule for input and output layout nhwc-8h2w32c2w +def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: str): + func = te.create_prim_func([ins, outs]) + s = tir.Schedule(func) + Sum = s.get_block("sum") + Avg = s.get_block("avg") + + apply_transform(s, Sum, 0, "read", input_layout) + apply_transform(s, Avg, 0, "write", output_layout) + + # Schedule 'Sum' + bn, bh, bw, bc, rx, ry = s.get_loops(Sum) + bho, bhi = s.split(bh, [None, 8]) + bwo, bwi = s.split(bw, [None, 4]) + bwio, bwii = s.split(bwi, [None, 2]) # Doesn't seem to be doing anything + bco, bci = s.split(bc, [None, 32]) + s.reorder(bn, bho, bwo, bco, bhi, bwio, rx, ry, bci, bwii) # --- DOESN'T do anything + bci_wii = s.fuse(bci, bwii) # --- DOESN'T do anything + # s.vectorize(bci_wii) # --- DOESN'T WORK -- errors out + + # Schedule 'Avg' + n, h, w, c = s.get_loops(Avg) + ho, hi = s.split(h, [None, 8]) + wo, wi = s.split(w, [None, 4]) + wio, wii = s.split(wi, [None, 2]) + co, ci = s.split(c, [None, 32]) + s.reorder(n, ho, wo, co, hi, wio, ci, wii) + ci_wii = s.fuse(ci, wii) + s.vectorize(ci_wii) + + s.compute_at(Sum, hi) + return s + + +# Schedule for output layout: n11c-1024c, input layout: nhwc-8h2w32c2w +def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str): + func = te.create_prim_func([ins, outs]) + s = tir.Schedule(func) + Sum = s.get_block("sum") + Avg = s.get_block("avg") + + apply_transform(s, Sum, 0, "read", input_layout) + apply_transform(s, Avg, 0, "write", output_layout) + + bn, bh, bw, bc, rx, ry = s.get_loops(Sum) + bco, bci = s.split(bc, [None, 1024]) + bcio, bcii = s.split(bci, [None, 64]) + s.reorder(bn, bh, bw, bco, bcio, rx, ry, bcii) # --- DOESN'T do anything + # s.vectorize(bcii) # --- DOESN'T WORK -- errors out + + n, h, w, c = s.get_loops(Avg) + co, ci = s.split(c, [None, 1024]) + cio, cii = s.split(ci, [None, 64]) + s.vectorize(cii) + + s.compute_at(Sum, cio) + return s + + +# TIR based schedule +def avg_pool2d_STIR_schedule(outs, ins, output_layout: str, input_layout: str): + output_layout += "-1d" + input_layout += "-1d" + if output_layout == "nhwc-8h2w32c2w-1d": + return STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout, input_layout) + if output_layout == "n11c-1024c-1d": + return STIR_schedule_n11c_1024c(outs, ins, output_layout, input_layout) + else: + raise RuntimeError(f"Unexpected layout '{output_layout}'") + + +# Schedule for input and output layout nhwc-8h2w32c2w +def schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: str): + A = ins + M = outs + s = te.create_schedule([M.op]) + B = s[M].op.input_tensors[0] + + # Apply layout transformation + input_layout = get_layout_transform_fn(input_layout) + output_layout = get_layout_transform_fn(output_layout) + s[A].transform_layout(input_layout) + M_axis = s[M].transform_layout(output_layout) + + # Schedule 'M' + m_inner = s[M].fuse(M_axis[7], M_axis[6]) + s[M].vectorize(m_inner) + + # Schedule 'B' + bn, bh, bw, bc = s[B].op.axis + rx, ry = s[B].op.reduce_axis + bwo, bwi = s[B].split(bw, factor=4) + bwio, bwii = s[B].split(bwi, factor=2) + bco, bci = s[B].split(bc, factor=32) + s[B].reorder(bn, bco, bh, bwo, bwio, ry, rx, bci, bwii) + b_inner = s[B].fuse(bci, bwii) + # s[B].vectorize(b_inner) # Doesn't work + + s[B].compute_at(s[M], M_axis[5]) + return s + + +# Schedule for output layout: n11c-1024c, input layout: nhwc-8h2w32c2w +def schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str): + A = ins + M = outs + s = te.create_schedule([M.op]) + B = s[M].op.input_tensors[0] + + # Apply layout transformation + input_layout = get_layout_transform_fn(input_layout) + output_layout = get_layout_transform_fn(output_layout) + s[A].transform_layout(input_layout) + M_axis = s[M].transform_layout(output_layout) + + # Schedule 'M' + mco, mci = s[M].split(M_axis[4], factor=64) + s[M].vectorize(mci) + + # Schedule 'B' + bn, bh, bw, bc = s[B].op.axis + rx, ry = s[B].op.reduce_axis + bco, bci = s[B].split(bc, factor=64) + s[B].reorder(bco, rx, ry, bci) + # s[B].vectorize(bci) # Doesn't work + + s[B].compute_at(s[M], mco) + return s + + +# te based schedule +def avg_pool2d_schedule(outs, ins, output_layout: str, input_layout: str): + output_layout += "-2d" + input_layout += "-2d" + if output_layout == "nhwc-8h2w32c2w-2d": + return schedule_nhwc_8h2w32c2w(outs, ins, output_layout, input_layout) + if output_layout == "n11c-1024c-2d": + return schedule_n11c_1024c(outs, ins, output_layout, input_layout) + else: + raise RuntimeError(f"Unexpected layout '{output_layout}'") diff --git a/python/tvm/topi/hexagon/utils.py b/python/tvm/topi/hexagon/utils.py new file mode 100644 index 000000000000..1dc84139ae70 --- /dev/null +++ b/python/tvm/topi/hexagon/utils.py @@ -0,0 +1,75 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from tvm import te + + +def n11c_1024c_2d(n, h, w, c): + return [n, h, w, c // 1024, te.AXIS_SEPARATOR, c % 1024] + + +def n11c_1024c_1d(n, h, w, c): + return [n, h, w, c // 1024, c % 1024] + + +def nhwc_8h2w32c2w_2d(n, h, w, c): + return [n, h // 8, w // 4, c // 32, te.AXIS_SEPARATOR, h % 8, (w % 4) // 2, c % 32, w % 2] + + +def nhwc_8h2w32c2w_1d(n, h, w, c): + return [n, h // 8, w // 4, c // 32, h % 8, (w % 4) // 2, c % 32, w % 2] + + +def get_layout_transform_fn(layout): + if layout == "nhwc-8h2w32c2w-2d": + return nhwc_8h2w32c2w_2d + if layout == "nhwc-8h2w32c2w-1d": + return nhwc_8h2w32c2w_1d + elif layout == "n11c-1024c-2d": + return n11c_1024c_2d + elif layout == "n11c-1024c-1d": + return n11c_1024c_1d + else: + raise RuntimeError(f"Unexpected layout '{layout}'") + + +def apply_transform(s, block, block_index: int, buffer_type: str, layout: str): + """Apply transform layout on a buffer + + Parameters + ---------- + s: Schedule + block : BlockRV + The block that accesses the target buffer + buffer_index: int + The index of the buffer in block's read or write region + buffer_type : str + Type of the buffer index, "read" or "write" + layout : str + Layout of the buffer + """ + transform_fn = get_layout_transform_fn(layout) + if layout == "nhwc-8h2w32c2w-1d": + axis_separators = [4] + elif layout == "n11c-1024c-1d": + axis_separators = [2] + else: + raise RuntimeError(f"Unexpected layout '{layout}'") + + s.transform_layout(block, block_index, buffer_type, transform_fn) + if axis_separators: + s.set_axis_separator(block, block_index, buffer_type, axis_separators) diff --git a/tests/python/contrib/test_hexagon/infrastructure.py b/tests/python/contrib/test_hexagon/infrastructure.py index 0c9a9478c870..541414176a08 100644 --- a/tests/python/contrib/test_hexagon/infrastructure.py +++ b/tests/python/contrib/test_hexagon/infrastructure.py @@ -47,8 +47,7 @@ def allocate_hexagon_array( numpy.prod(tensor_shape[dim_i:dim_f]) for dim_i, dim_f in zip(boundaries[:-1], boundaries[1:]) ] - - arr = tvm.nd.empty(physical_shape, dtype=dtype, device=dev) + arr = tvm.nd.empty(physical_shape, dtype=dtype, device=dev, mem_scope=mem_scope) if data is not None: arr.copyfrom(data.reshape(physical_shape)) @@ -228,3 +227,18 @@ def compute(n, ho, wo, ko, hi, wi, ki): ) return output_shape, compute + + +# Transpose and reshape numpy array according to the specified layout +def transform_numpy(arr_np, layout): + if layout == "nhwc": + return arr_np + elif layout == "nhwc-8h2w32c2w": + N, H, W, C = arr_np.shape + return arr_np.reshape([N, H // 8, 8, W // 4, 2, 2, C // 32, 32]).transpose(0, 1, 3, 6, 2, 4, 7, 5) + elif layout == "n11c-1024c": + N, H, W, C = arr_np.shape + assert (H == 1 and W == 1), "The size of H and W must be 1!" + return arr_np.reshape([N, C//1024, 1024]).transpose(0, 1, 2) + else: + raise RuntimeError(f"Unexpected layout '{layout}'") diff --git a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py new file mode 100644 index 000000000000..a1d135acb358 --- /dev/null +++ b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py @@ -0,0 +1,434 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +import numpy as np + +np.set_printoptions(threshold=np.inf) +from tvm import te, topi +from tvm.tir.stmt_functor import post_order_visit + +import tvm.testing +from tvm.topi import testing +from tvm.contrib.hexagon.build import HexagonLauncher +import tvm.topi.hexagon.slice_ops as sl +from .infrastructure import allocate_hexagon_array, transform_numpy + + +input_layout = tvm.testing.parameter( + "nhwc-8h2w32c2w", +) + + +@tvm.testing.fixture +def input_np(input_shape, dtype): + return np.random.random(input_shape).astype(dtype) + + +@tvm.testing.fixture +def transformed_expected_output_np(expected_output_np, output_layout): + return transform_numpy(expected_output_np, output_layout) + + +@tvm.testing.fixture +def transformed_input_np_padded(input_np_padded, input_layout): + return transform_numpy(input_np_padded, input_layout) + + +class TestAvgPool2dSlice: + # NOTE: input_layout is always assumed to be "nhwc-8h2w32c2w" + ( + output_shape, + kernel, + stride, + dilation, + padding, + ceil_mode, + count_include_pad, + output_layout, + dtype, + use_te_sched, + ) = tvm.testing.parameters( + ( + [1, 8, 8, 32], + [3, 3], + [1, 1], + [1, 1], + [0, 0, 0, 0], + False, + True, + "nhwc-8h2w32c2w", + "float16", + False, + ), + ( + [1, 16, 16, 32], + [3, 3], + [1, 1], + [1, 1], + [0, 0, 0, 0], + False, + True, + "nhwc-8h2w32c2w", + "float16", + False, + ), + ( + [1, 8, 8, 32], + [8, 8], + [1, 1], + [1, 1], + [0, 0, 0, 0], + False, + True, + "nhwc-8h2w32c2w", + "float16", + False, + ), + ( + [1, 8, 8, 32], + [1, 1], + [1, 1], + [1, 1], + [0, 0, 0, 0], + False, + True, + "nhwc-8h2w32c2w", + "float16", + False, + ), + # Test non-one stride and dilation + ( + [1, 8, 8, 32], + [3, 3], + [2, 3], + [1, 1], + [0, 0, 0, 0], + False, + True, + "nhwc-8h2w32c2w", + "float16", + False, + ), + ( + [1, 8, 8, 32], + [3, 3], + [2, 2], + [2, 2], + [0, 0, 0, 0], + False, + True, + "nhwc-8h2w32c2w", + "float16", + False, + ), + ( + [1, 8, 8, 32], + [3, 3], + [2, 2], + [2, 3], + [0, 0, 0, 0], + False, + True, + "nhwc-8h2w32c2w", + "float16", + False, + ), + # Test non-zero padding + ( + [1, 8, 8, 32], + [3, 3], + [1, 1], + [1, 1], + [1, 1, 1, 1], + False, + True, + "nhwc-8h2w32c2w", + "float16", + False, + ), + ( + [1, 8, 8, 32], + [3, 3], + [1, 1], + [1, 1], + [1, 2, 3, 4], + False, + True, + "nhwc-8h2w32c2w", + "float16", + False, + ), + ( + [1, 8, 8, 32], + [3, 3], + [1, 1], + [1, 1], + [1, 2, 3, 4], + False, + True, + "nhwc-8h2w32c2w", + "float16", + False, + ), + ( + [1, 8, 8, 32], + [3, 3], + [3, 2], + [2, 3], + [1, 2, 3, 4], + False, + True, + "nhwc-8h2w32c2w", + "float16", + False, + ), + # Test n11c-1024c layout which will require input and output to have different layout + ( + [1, 1, 1, 2048], + [8, 8], + [1, 1], + [1, 1], + [0, 0, 0, 0], + False, + True, + "n11c-1024c", + "float16", + False, + ), + ( + [1, 1, 1, 2048], + [6, 6], + [1, 1], + [1, 1], + [0, 0, 0, 0], + False, + True, + "n11c-1024c", + "float16", + False, + ), + ( + [1, 1, 1, 2048], + [3, 3], + [2, 2], + [1, 1], + [0, 0, 0, 0], + False, + True, + "n11c-1024c", + "float16", + False, + ), + ( + [1, 1, 1, 2048], + [4, 4], + [2, 2], + [2, 3], + [0, 0, 0, 0], + False, + True, + "n11c-1024c", + "float16", + False, + ), + # Use 'te' schedule + ( + [1, 8, 8, 32], + [3, 3], + [1, 1], + [1, 1], + [0, 0, 0, 0], + False, + True, + "nhwc-8h2w32c2w", + "float16", + True, + ), + ( + [1, 1, 1, 2048], + [8, 8], + [1, 1], + [1, 1], + [0, 0, 0, 0], + False, + True, + "n11c-1024c", + "float16", + True, + ), + ) + + @tvm.testing.fixture + def expected_output_np( + self, + input_np, + kernel, + stride, + dilation, + padding, + ceil_mode, + count_include_pad, + ): + pad_before = padding[:2] + pad_after = padding[2:] + ref_np = tvm.topi.testing.poolnd_python( + input_np, + kernel, + stride, + dilation, + pad_before, + pad_after, + "avg", # pool_type + count_include_pad, + False, # ceil_mode, + layout="NHWC", + ) + return ref_np + + @tvm.testing.fixture + def input_shape(self, output_shape, kernel, padding, stride, dilation, output_layout): + # Input shape without crouton padding; 'ceil' is being ignored from calculation: + o_b, o_h, o_w, o_c = output_shape + d_h, d_w = dilation + s_h, s_w = stride + k_h, k_w = kernel + pad_before_h, pad_before_w = padding[:2] + pad_after_h, pad_after_w = padding[2:] + + if output_layout == "n11c-1024c": + assert ( + pad_before_w == 0 and pad_after_w == 0 and pad_before_h == 0 and pad_after_h == 0 + ), "Padding must be zero for n11c-1024c layout!!" + assert o_h == 1 and o_w == 1, "Output height and width must be 1!" + + in_h = (o_h - 1) * s_h + d_h * (k_h - 1) + 1 - pad_before_h - pad_after_h + in_w = (o_w - 1) * s_w + d_w * (k_w - 1) + 1 - pad_before_w - pad_after_w + + return [o_b, in_h, in_w, o_c] + + @tvm.testing.fixture + def input_shape_padded(self, input_shape, padding, output_layout): + # Input shape with regular and crouton padding. + # Input width and height are padded to a multiple of croutons. + # NOTE: Input layout is always assumed to be nhwc-8h2w32c2w. Only the output layout can be + # different. + pad_before_h, pad_before_w = padding[:2] + pad_after_h, pad_after_w = padding[2:] + padded_input_height = ((input_shape[1] + pad_before_h + pad_after_h + 7) // 8) * 8 + padded_input_width = ((input_shape[2] + pad_before_w + pad_after_w + 3) // 4) * 4 + return [input_shape[0], padded_input_height, padded_input_width, input_shape[3]] + + @tvm.testing.fixture + def input_np_padded(self, input_np, input_shape, input_shape_padded, padding): + pad_before_h, pad_before_w = padding[:2] + pad_after_h = ( + input_shape_padded[1] - input_shape[1] - pad_before_h + ) # pad_after for height with crouton padding + pad_after_w = ( + input_shape_padded[2] - input_shape[2] - pad_before_w + ) # pad_after for width with crouton padding + input_padded = np.pad( + input_np, + ((0, 0), (pad_before_h, pad_after_h), (pad_before_w, pad_after_w), (0, 0)), + "constant", + ) + return input_padded + + @tvm.testing.requires_hexagon + def test_avg_pool2d_slice( + self, + use_te_sched, + stride, + kernel, + dtype, + dilation, + padding, + count_include_pad, + input_layout, + output_layout, + output_shape, + input_shape, + input_shape_padded, + input_np, + input_np_padded, + transformed_input_np_padded, + transformed_expected_output_np, + expected_output_np, + hexagon_session, + ): + + target_hexagon = tvm.target.hexagon("v69") + A = te.placeholder(input_shape_padded, name="A", dtype=dtype) + + M = sl.avg_pool2d_compute( + A, output_shape, kernel, stride, dilation) + if not use_te_sched: + # tir schedule + tir_schedule = sl.avg_pool2d_STIR_schedule(M, A, output_layout, input_layout) + sch = tir_schedule.mod + else: + # te schedule + te_s = sl.avg_pool2d_schedule(M, A, output_layout, input_layout) + sch = te_s + + input_axis_separator = [4] + if output_layout == "nhwc-8h2w32c2w": + output_axis_separator = [4] + elif output_layout == "n11c-1024c": + output_axis_separator = [2] + else: + raise RuntimeError(f"Unexpected layout '{output_layout}'") + + with tvm.transform.PassContext(opt_level=3, config={"tir.disable_assert": True}): + func = tvm.build( + sch, + [A, M], + tvm.target.Target(target_hexagon, host=target_hexagon), + name="avg_pool2d", + ) + + input_arr = allocate_hexagon_array( + hexagon_session.device, + data=transformed_input_np_padded, + axis_separators=input_axis_separator, + mem_scope="global.vtcm", + ) + output_arr = allocate_hexagon_array( + hexagon_session.device, + transformed_expected_output_np.shape, + dtype, + axis_separators=output_axis_separator, + mem_scope="global.vtcm", + ) + + mod = hexagon_session.load_module(func) + mod(input_arr, output_arr) + + b, h, w, c = output_shape + if output_layout == "nhwc-8h2w32c2w": + output_np = output_arr.numpy().reshape([b, h // 8, w // 4, c // 32, 8, 2, 32, 2]) + elif output_layout == "n11c-1024c": + output_np = output_arr.numpy().reshape([b, c // 1024, 1024]) + else: + raise RuntimeError(f"Unexpected layout '{output_layout}'") + + np.testing.assert_allclose(output_np, transformed_expected_output_np, rtol=1e-3, atol=1e-3) + + +if __name__ == "__main__": + sys.exit(pytest.main(sys.argv)) From 793287651d7ca5a575521065151019eedd57e46c Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Fri, 27 May 2022 11:21:55 -0500 Subject: [PATCH 2/9] Address review comments and fix the STIR schedule --- python/tvm/topi/hexagon/slice_ops/__init__.py | 2 +- .../tvm/topi/hexagon/slice_ops/avg_pool2d.py | 168 ++++++------------ python/tvm/topi/hexagon/utils.py | 35 +--- .../contrib/test_hexagon/infrastructure.py | 28 +-- .../test_hexagon/test_avg_pool2d_slice.py | 140 +++++---------- 5 files changed, 113 insertions(+), 260 deletions(-) diff --git a/python/tvm/topi/hexagon/slice_ops/__init__.py b/python/tvm/topi/hexagon/slice_ops/__init__.py index e9d4d00c4285..b52d410676af 100644 --- a/python/tvm/topi/hexagon/slice_ops/__init__.py +++ b/python/tvm/topi/hexagon/slice_ops/__init__.py @@ -19,4 +19,4 @@ # pylint: disable=wildcard-import -from .avg_pool2d import avg_pool2d_compute, avg_pool2d_schedule, avg_pool2d_STIR_schedule +from .avg_pool2d import avg_pool2d_compute, avg_pool2d_STIR_schedule diff --git a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py index ef87677c7e5d..3d4a8e4ac3c9 100644 --- a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py +++ b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py @@ -19,29 +19,45 @@ from tvm import te from tvm import tir from tvm.script import tir as T -from ..utils import apply_transform, get_layout_transform_fn - +from ..utils import get_layout_transform_fn + +""" +The slice op implementation for avg_pool2d makes serveral assumptions: + +1) The input must be padded in advance to account for 'padding'. In addition, + both input and output must be padded as per the physical buffer layout. +2) The current implementation assumes 'count_include_pad' to be 'True'. It can be + modified to support 'False' case but the element count for the pooling window + must be pre-computed and provided as an input to reduce the run-time overhead. +3) 'padding' is ignored. It must be handled outside of the sliced op. +4) Please note that this implementation will not work if the output includes any + physical layout related padding as it can result into out-of-bound access + for the input. +""" + +def validate_out_shape(out_shape, in_shape, kernel, stride, dilation): + ob, oh, ow, oc = out_shape + ib, ih, iw, ic = in_shape + kh, kw = kernel + sh, sw = stride + dh, dw = dilation + if (ih < (oh - 1) * sh + dh * (kh - 1) + 1 ): + raise RuntimeError(f"Output height is too large") + if (iw < (ow - 1) * sw + dw * (kw - 1) + 1 ): + raise RuntimeError(f"Output width is too large") -# The slice op implementation for avg_pool2d makes serveral assumptions: -# 1) Both input and output are a multiple of croutons, and the input is already -# padded for a given output shape as per any crouton and non-crouton related -# padding. -# 2) The current implementation assumes 'count_include_pad' to be 'True'. It can -# modified to support 'False' but the element count for the pooling window must -# be pre-computed and provided as an input to reduce the run-time overhead. -# 3) 'padding' is also ignored. It must be handled outside of the sliced op. -# 4) Please note that this implementation will not work if the output was padded -# for the croutons. Since we loop over the logical output shape, this can result -# into out-of-bound access for the input. def avg_pool2d_compute(A, out_shape, kernel, stride, dilation): kh, kw = kernel rh = te.reduce_axis((0, kh), name="rh") rw = te.reduce_axis((0, kw), name="rw") ob, oh, ow, oc = out_shape + if (isinstance(ob, int)): + validate_out_shape(out_shape, A.shape, kernel, stride, dilation) + sh, sw = stride dh, dw = dilation - Area = float(1) / (kh * kw) + InvArea = float(1) / (kh * kw) Sum = te.compute( out_shape, @@ -51,7 +67,7 @@ def avg_pool2d_compute(A, out_shape, kernel, stride, dilation): name="sum", ) Avg = te.compute( - out_shape, lambda b, h, w, c: (Sum[b, h, w, c] * Area).astype(A.dtype), name="avg" + out_shape, lambda b, h, w, c: (Sum[b, h, w, c] * InvArea).astype(A.dtype), name="avg" ) return Avg @@ -63,18 +79,10 @@ def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: st Sum = s.get_block("sum") Avg = s.get_block("avg") - apply_transform(s, Sum, 0, "read", input_layout) - apply_transform(s, Avg, 0, "write", output_layout) - - # Schedule 'Sum' - bn, bh, bw, bc, rx, ry = s.get_loops(Sum) - bho, bhi = s.split(bh, [None, 8]) - bwo, bwi = s.split(bw, [None, 4]) - bwio, bwii = s.split(bwi, [None, 2]) # Doesn't seem to be doing anything - bco, bci = s.split(bc, [None, 32]) - s.reorder(bn, bho, bwo, bco, bhi, bwio, rx, ry, bci, bwii) # --- DOESN'T do anything - bci_wii = s.fuse(bci, bwii) # --- DOESN'T do anything - # s.vectorize(bci_wii) # --- DOESN'T WORK -- errors out + input_transform_fn = get_layout_transform_fn(input_layout) + output_transform_fn = get_layout_transform_fn(output_layout) + s.transform_layout(Sum, ("read", 0), input_transform_fn) + s.transform_layout(Avg, ("write", 0), output_transform_fn) # Schedule 'Avg' n, h, w, c = s.get_loops(Avg) @@ -86,7 +94,12 @@ def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: st ci_wii = s.fuse(ci, wii) s.vectorize(ci_wii) - s.compute_at(Sum, hi) + # Schedule 'Sum' + s.compute_at(Sum, wio) + Sum_axis = s.get_loops(Sum) + s.reorder(Sum_axis[-2], Sum_axis[-1], Sum_axis[-4], Sum_axis[-3]) + ci_wii = s.fuse(Sum_axis[-4], Sum_axis[-3]) + # s.vectorize(ci_wii) # Doesn't work return s @@ -97,102 +110,29 @@ def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str): Sum = s.get_block("sum") Avg = s.get_block("avg") - apply_transform(s, Sum, 0, "read", input_layout) - apply_transform(s, Avg, 0, "write", output_layout) - - bn, bh, bw, bc, rx, ry = s.get_loops(Sum) - bco, bci = s.split(bc, [None, 1024]) - bcio, bcii = s.split(bci, [None, 64]) - s.reorder(bn, bh, bw, bco, bcio, rx, ry, bcii) # --- DOESN'T do anything - # s.vectorize(bcii) # --- DOESN'T WORK -- errors out + input_transform_fn = get_layout_transform_fn(input_layout) + output_transform_fn = get_layout_transform_fn(output_layout) + s.transform_layout(Sum, ("read", 0), input_transform_fn) + s.transform_layout(Avg, ("write", 0), output_transform_fn) + # Schedule 'Avg' n, h, w, c = s.get_loops(Avg) co, ci = s.split(c, [None, 1024]) cio, cii = s.split(ci, [None, 64]) s.vectorize(cii) + # Schedule 'Sum' s.compute_at(Sum, cio) + Sum_axis = s.get_loops(Sum) + s.reorder(Sum_axis[-2], Sum_axis[-1], Sum_axis[-3]) # + # s.vectorize(Sum_axis[-3]) # Doesn't work return s -# TIR based schedule +# STIR based schedule def avg_pool2d_STIR_schedule(outs, ins, output_layout: str, input_layout: str): - output_layout += "-1d" - input_layout += "-1d" - if output_layout == "nhwc-8h2w32c2w-1d": - return STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout, input_layout) - if output_layout == "n11c-1024c-1d": - return STIR_schedule_n11c_1024c(outs, ins, output_layout, input_layout) - else: - raise RuntimeError(f"Unexpected layout '{output_layout}'") - - -# Schedule for input and output layout nhwc-8h2w32c2w -def schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: str): - A = ins - M = outs - s = te.create_schedule([M.op]) - B = s[M].op.input_tensors[0] - - # Apply layout transformation - input_layout = get_layout_transform_fn(input_layout) - output_layout = get_layout_transform_fn(output_layout) - s[A].transform_layout(input_layout) - M_axis = s[M].transform_layout(output_layout) - - # Schedule 'M' - m_inner = s[M].fuse(M_axis[7], M_axis[6]) - s[M].vectorize(m_inner) - - # Schedule 'B' - bn, bh, bw, bc = s[B].op.axis - rx, ry = s[B].op.reduce_axis - bwo, bwi = s[B].split(bw, factor=4) - bwio, bwii = s[B].split(bwi, factor=2) - bco, bci = s[B].split(bc, factor=32) - s[B].reorder(bn, bco, bh, bwo, bwio, ry, rx, bci, bwii) - b_inner = s[B].fuse(bci, bwii) - # s[B].vectorize(b_inner) # Doesn't work - - s[B].compute_at(s[M], M_axis[5]) - return s - - -# Schedule for output layout: n11c-1024c, input layout: nhwc-8h2w32c2w -def schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str): - A = ins - M = outs - s = te.create_schedule([M.op]) - B = s[M].op.input_tensors[0] - - # Apply layout transformation - input_layout = get_layout_transform_fn(input_layout) - output_layout = get_layout_transform_fn(output_layout) - s[A].transform_layout(input_layout) - M_axis = s[M].transform_layout(output_layout) - - # Schedule 'M' - mco, mci = s[M].split(M_axis[4], factor=64) - s[M].vectorize(mci) - - # Schedule 'B' - bn, bh, bw, bc = s[B].op.axis - rx, ry = s[B].op.reduce_axis - bco, bci = s[B].split(bc, factor=64) - s[B].reorder(bco, rx, ry, bci) - # s[B].vectorize(bci) # Doesn't work - - s[B].compute_at(s[M], mco) - return s - - -# te based schedule -def avg_pool2d_schedule(outs, ins, output_layout: str, input_layout: str): - output_layout += "-2d" - input_layout += "-2d" if output_layout == "nhwc-8h2w32c2w-2d": - return schedule_nhwc_8h2w32c2w(outs, ins, output_layout, input_layout) + return STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout, input_layout) if output_layout == "n11c-1024c-2d": - return schedule_n11c_1024c(outs, ins, output_layout, input_layout) - else: - raise RuntimeError(f"Unexpected layout '{output_layout}'") + return STIR_schedule_n11c_1024c(outs, ins, output_layout, input_layout) + raise RuntimeError(f"Unexpected layout '{output_layout}'") diff --git a/python/tvm/topi/hexagon/utils.py b/python/tvm/topi/hexagon/utils.py index 1dc84139ae70..5dc7e15801c0 100644 --- a/python/tvm/topi/hexagon/utils.py +++ b/python/tvm/topi/hexagon/utils.py @@ -39,37 +39,8 @@ def get_layout_transform_fn(layout): return nhwc_8h2w32c2w_2d if layout == "nhwc-8h2w32c2w-1d": return nhwc_8h2w32c2w_1d - elif layout == "n11c-1024c-2d": + if layout == "n11c-1024c-2d": return n11c_1024c_2d - elif layout == "n11c-1024c-1d": + if layout == "n11c-1024c-1d": return n11c_1024c_1d - else: - raise RuntimeError(f"Unexpected layout '{layout}'") - - -def apply_transform(s, block, block_index: int, buffer_type: str, layout: str): - """Apply transform layout on a buffer - - Parameters - ---------- - s: Schedule - block : BlockRV - The block that accesses the target buffer - buffer_index: int - The index of the buffer in block's read or write region - buffer_type : str - Type of the buffer index, "read" or "write" - layout : str - Layout of the buffer - """ - transform_fn = get_layout_transform_fn(layout) - if layout == "nhwc-8h2w32c2w-1d": - axis_separators = [4] - elif layout == "n11c-1024c-1d": - axis_separators = [2] - else: - raise RuntimeError(f"Unexpected layout '{layout}'") - - s.transform_layout(block, block_index, buffer_type, transform_fn) - if axis_separators: - s.set_axis_separator(block, block_index, buffer_type, axis_separators) + raise RuntimeError(f"Unexpected layout '{layout}'") diff --git a/tests/python/contrib/test_hexagon/infrastructure.py b/tests/python/contrib/test_hexagon/infrastructure.py index 541414176a08..ae9f5f947f67 100644 --- a/tests/python/contrib/test_hexagon/infrastructure.py +++ b/tests/python/contrib/test_hexagon/infrastructure.py @@ -229,16 +229,18 @@ def compute(n, ho, wo, ko, hi, wi, ki): return output_shape, compute -# Transpose and reshape numpy array according to the specified layout -def transform_numpy(arr_np, layout): - if layout == "nhwc": - return arr_np - elif layout == "nhwc-8h2w32c2w": - N, H, W, C = arr_np.shape - return arr_np.reshape([N, H // 8, 8, W // 4, 2, 2, C // 32, 32]).transpose(0, 1, 3, 6, 2, 4, 7, 5) - elif layout == "n11c-1024c": - N, H, W, C = arr_np.shape - assert (H == 1 and W == 1), "The size of H and W must be 1!" - return arr_np.reshape([N, C//1024, 1024]).transpose(0, 1, 2) - else: - raise RuntimeError(f"Unexpected layout '{layout}'") +# Reshape and transpose numpy array according to the specified layout +def transform_numpy(arr_np, current_layout: str, new_layout:str): + if current_layout == "nhwc": + if new_layout == "nhwc": + return arr_np + if new_layout in ["nhwc-8h2w32c2w-2d", "nhwc-8h2w32c2w-1d"]: + N, H, W, C = arr_np.shape + return arr_np.reshape([N, H // 8, 8, W // 4, 2, 2, C // 32, 32]).transpose(0, 1, 3, 6, 2, 4, 7, 5) + if new_layout in ["n11c-1024c-2d", "n11c-1024c-1d"]: + N, H, W, C = arr_np.shape + assert (H == 1 and W == 1), "The size of H and W must be 1" + return arr_np.reshape([N, 1, 1, C//1024, 1024]) + + raise RuntimeError(f"Unexpected new_layout '{new_layout}'") + raise RuntimeError(f"Unexpected current_layout '{current_layout}'") diff --git a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py index a1d135acb358..d46e2db68651 100644 --- a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py +++ b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py @@ -18,9 +18,7 @@ import pytest import numpy as np -np.set_printoptions(threshold=np.inf) from tvm import te, topi -from tvm.tir.stmt_functor import post_order_visit import tvm.testing from tvm.topi import testing @@ -30,7 +28,7 @@ input_layout = tvm.testing.parameter( - "nhwc-8h2w32c2w", + "nhwc-8h2w32c2w-2d", ) @@ -41,16 +39,16 @@ def input_np(input_shape, dtype): @tvm.testing.fixture def transformed_expected_output_np(expected_output_np, output_layout): - return transform_numpy(expected_output_np, output_layout) + return transform_numpy(expected_output_np, "nhwc", output_layout) @tvm.testing.fixture def transformed_input_np_padded(input_np_padded, input_layout): - return transform_numpy(input_np_padded, input_layout) + return transform_numpy(input_np_padded, "nhwc", input_layout) class TestAvgPool2dSlice: - # NOTE: input_layout is always assumed to be "nhwc-8h2w32c2w" + # NOTE: input_layout is always assumed to be "nhwc-8h2w32c2w-2d" ( output_shape, kernel, @@ -61,7 +59,6 @@ class TestAvgPool2dSlice: count_include_pad, output_layout, dtype, - use_te_sched, ) = tvm.testing.parameters( ( [1, 8, 8, 32], @@ -71,9 +68,8 @@ class TestAvgPool2dSlice: [0, 0, 0, 0], False, True, - "nhwc-8h2w32c2w", + "nhwc-8h2w32c2w-2d", "float16", - False, ), ( [1, 16, 16, 32], @@ -83,9 +79,8 @@ class TestAvgPool2dSlice: [0, 0, 0, 0], False, True, - "nhwc-8h2w32c2w", + "nhwc-8h2w32c2w-2d", "float16", - False, ), ( [1, 8, 8, 32], @@ -95,21 +90,8 @@ class TestAvgPool2dSlice: [0, 0, 0, 0], False, True, - "nhwc-8h2w32c2w", - "float16", - False, - ), - ( - [1, 8, 8, 32], - [1, 1], - [1, 1], - [1, 1], - [0, 0, 0, 0], - False, - True, - "nhwc-8h2w32c2w", + "nhwc-8h2w32c2w-2d", "float16", - False, ), # Test non-one stride and dilation ( @@ -120,9 +102,8 @@ class TestAvgPool2dSlice: [0, 0, 0, 0], False, True, - "nhwc-8h2w32c2w", + "nhwc-8h2w32c2w-2d", "float16", - False, ), ( [1, 8, 8, 32], @@ -132,9 +113,8 @@ class TestAvgPool2dSlice: [0, 0, 0, 0], False, True, - "nhwc-8h2w32c2w", + "nhwc-8h2w32c2w-2d", "float16", - False, ), ( [1, 8, 8, 32], @@ -144,9 +124,8 @@ class TestAvgPool2dSlice: [0, 0, 0, 0], False, True, - "nhwc-8h2w32c2w", + "nhwc-8h2w32c2w-2d", "float16", - False, ), # Test non-zero padding ( @@ -157,9 +136,8 @@ class TestAvgPool2dSlice: [1, 1, 1, 1], False, True, - "nhwc-8h2w32c2w", + "nhwc-8h2w32c2w-2d", "float16", - False, ), ( [1, 8, 8, 32], @@ -169,9 +147,8 @@ class TestAvgPool2dSlice: [1, 2, 3, 4], False, True, - "nhwc-8h2w32c2w", + "nhwc-8h2w32c2w-2d", "float16", - False, ), ( [1, 8, 8, 32], @@ -181,9 +158,8 @@ class TestAvgPool2dSlice: [1, 2, 3, 4], False, True, - "nhwc-8h2w32c2w", + "nhwc-8h2w32c2w-2d", "float16", - False, ), ( [1, 8, 8, 32], @@ -193,11 +169,10 @@ class TestAvgPool2dSlice: [1, 2, 3, 4], False, True, - "nhwc-8h2w32c2w", + "nhwc-8h2w32c2w-2d", "float16", - False, ), - # Test n11c-1024c layout which will require input and output to have different layout + # Test n11c-1024c-2d layout which will require input and output to have different layout ( [1, 1, 1, 2048], [8, 8], @@ -206,9 +181,8 @@ class TestAvgPool2dSlice: [0, 0, 0, 0], False, True, - "n11c-1024c", + "n11c-1024c-2d", "float16", - False, ), ( [1, 1, 1, 2048], @@ -218,9 +192,8 @@ class TestAvgPool2dSlice: [0, 0, 0, 0], False, True, - "n11c-1024c", + "n11c-1024c-2d", "float16", - False, ), ( [1, 1, 1, 2048], @@ -230,9 +203,8 @@ class TestAvgPool2dSlice: [0, 0, 0, 0], False, True, - "n11c-1024c", + "n11c-1024c-2d", "float16", - False, ), ( [1, 1, 1, 2048], @@ -242,34 +214,8 @@ class TestAvgPool2dSlice: [0, 0, 0, 0], False, True, - "n11c-1024c", + "n11c-1024c-2d", "float16", - False, - ), - # Use 'te' schedule - ( - [1, 8, 8, 32], - [3, 3], - [1, 1], - [1, 1], - [0, 0, 0, 0], - False, - True, - "nhwc-8h2w32c2w", - "float16", - True, - ), - ( - [1, 1, 1, 2048], - [8, 8], - [1, 1], - [1, 1], - [0, 0, 0, 0], - False, - True, - "n11c-1024c", - "float16", - True, ), ) @@ -302,7 +248,7 @@ def expected_output_np( @tvm.testing.fixture def input_shape(self, output_shape, kernel, padding, stride, dilation, output_layout): - # Input shape without crouton padding; 'ceil' is being ignored from calculation: + # Input shape without any padding; 'ceil' is being ignored from calculation: o_b, o_h, o_w, o_c = output_shape d_h, d_w = dilation s_h, s_w = stride @@ -310,11 +256,11 @@ def input_shape(self, output_shape, kernel, padding, stride, dilation, output_la pad_before_h, pad_before_w = padding[:2] pad_after_h, pad_after_w = padding[2:] - if output_layout == "n11c-1024c": + if output_layout == "n11c-1024c-2d": assert ( pad_before_w == 0 and pad_after_w == 0 and pad_before_h == 0 and pad_after_h == 0 - ), "Padding must be zero for n11c-1024c layout!!" - assert o_h == 1 and o_w == 1, "Output height and width must be 1!" + ), "Padding must be zero for n11c-1024c-2d layout" + assert o_h == 1 and o_w == 1, "Output height and width must be 1" in_h = (o_h - 1) * s_h + d_h * (k_h - 1) + 1 - pad_before_h - pad_after_h in_w = (o_w - 1) * s_w + d_w * (k_w - 1) + 1 - pad_before_w - pad_after_w @@ -323,10 +269,10 @@ def input_shape(self, output_shape, kernel, padding, stride, dilation, output_la @tvm.testing.fixture def input_shape_padded(self, input_shape, padding, output_layout): - # Input shape with regular and crouton padding. - # Input width and height are padded to a multiple of croutons. - # NOTE: Input layout is always assumed to be nhwc-8h2w32c2w. Only the output layout can be - # different. + # Input shape is adjusted to account for 'padding'. Also, due to the physical + # layout of the buffer, height and width are adjusted so that they are a + # multiple of 8 and 4 respectively. + # NOTE: Input layout is always assumed to be nhwc-8h2w32c2w-2d. pad_before_h, pad_before_w = padding[:2] pad_after_h, pad_after_w = padding[2:] padded_input_height = ((input_shape[1] + pad_before_h + pad_after_h + 7) // 8) * 8 @@ -338,10 +284,10 @@ def input_np_padded(self, input_np, input_shape, input_shape_padded, padding): pad_before_h, pad_before_w = padding[:2] pad_after_h = ( input_shape_padded[1] - input_shape[1] - pad_before_h - ) # pad_after for height with crouton padding + ) pad_after_w = ( input_shape_padded[2] - input_shape[2] - pad_before_w - ) # pad_after for width with crouton padding + ) input_padded = np.pad( input_np, ((0, 0), (pad_before_h, pad_after_h), (pad_before_w, pad_after_w), (0, 0)), @@ -352,7 +298,6 @@ def input_np_padded(self, input_np, input_shape, input_shape_padded, padding): @tvm.testing.requires_hexagon def test_avg_pool2d_slice( self, - use_te_sched, stride, kernel, dtype, @@ -377,24 +322,20 @@ def test_avg_pool2d_slice( M = sl.avg_pool2d_compute( A, output_shape, kernel, stride, dilation) - if not use_te_sched: - # tir schedule - tir_schedule = sl.avg_pool2d_STIR_schedule(M, A, output_layout, input_layout) - sch = tir_schedule.mod - else: - # te schedule - te_s = sl.avg_pool2d_schedule(M, A, output_layout, input_layout) - sch = te_s + + # tir schedule + tir_schedule = sl.avg_pool2d_STIR_schedule(M, A, output_layout, input_layout) + sch = tir_schedule.mod input_axis_separator = [4] - if output_layout == "nhwc-8h2w32c2w": + if output_layout == "nhwc-8h2w32c2w-2d": + output_axis_separator = [4] + elif output_layout == "n11c-1024c-2d": output_axis_separator = [4] - elif output_layout == "n11c-1024c": - output_axis_separator = [2] else: raise RuntimeError(f"Unexpected layout '{output_layout}'") - with tvm.transform.PassContext(opt_level=3, config={"tir.disable_assert": True}): + with tvm.transform.PassContext(opt_level=3): func = tvm.build( sch, [A, M], @@ -418,12 +359,11 @@ def test_avg_pool2d_slice( mod = hexagon_session.load_module(func) mod(input_arr, output_arr) - b, h, w, c = output_shape - if output_layout == "nhwc-8h2w32c2w": + if output_layout == "nhwc-8h2w32c2w-2d": output_np = output_arr.numpy().reshape([b, h // 8, w // 4, c // 32, 8, 2, 32, 2]) - elif output_layout == "n11c-1024c": - output_np = output_arr.numpy().reshape([b, c // 1024, 1024]) + elif output_layout == "n11c-1024c-2d": + output_np = output_arr.numpy().reshape([b, 1, 1, c // 1024, 1024]) else: raise RuntimeError(f"Unexpected layout '{output_layout}'") From f731032e9403d2c09e735886ec38e7857327ae75 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Tue, 31 May 2022 17:37:01 -0500 Subject: [PATCH 3/9] Fix formatting issues --- python/tvm/topi/hexagon/slice_ops/avg_pool2d.py | 9 +++++---- tests/python/contrib/test_hexagon/infrastructure.py | 10 ++++++---- .../contrib/test_hexagon/test_avg_pool2d_slice.py | 11 +++-------- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py index 3d4a8e4ac3c9..1d6330a0acee 100644 --- a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py +++ b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py @@ -35,15 +35,16 @@ for the input. """ + def validate_out_shape(out_shape, in_shape, kernel, stride, dilation): ob, oh, ow, oc = out_shape ib, ih, iw, ic = in_shape kh, kw = kernel sh, sw = stride dh, dw = dilation - if (ih < (oh - 1) * sh + dh * (kh - 1) + 1 ): + if ih < (oh - 1) * sh + dh * (kh - 1) + 1: raise RuntimeError(f"Output height is too large") - if (iw < (ow - 1) * sw + dw * (kw - 1) + 1 ): + if iw < (ow - 1) * sw + dw * (kw - 1) + 1: raise RuntimeError(f"Output width is too large") @@ -52,7 +53,7 @@ def avg_pool2d_compute(A, out_shape, kernel, stride, dilation): rh = te.reduce_axis((0, kh), name="rh") rw = te.reduce_axis((0, kw), name="rw") ob, oh, ow, oc = out_shape - if (isinstance(ob, int)): + if isinstance(ob, int): validate_out_shape(out_shape, A.shape, kernel, stride, dilation) sh, sw = stride @@ -124,7 +125,7 @@ def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str): # Schedule 'Sum' s.compute_at(Sum, cio) Sum_axis = s.get_loops(Sum) - s.reorder(Sum_axis[-2], Sum_axis[-1], Sum_axis[-3]) # + s.reorder(Sum_axis[-2], Sum_axis[-1], Sum_axis[-3]) # s.vectorize(Sum_axis[-3]) # Doesn't work return s diff --git a/tests/python/contrib/test_hexagon/infrastructure.py b/tests/python/contrib/test_hexagon/infrastructure.py index ae9f5f947f67..607fa82565bc 100644 --- a/tests/python/contrib/test_hexagon/infrastructure.py +++ b/tests/python/contrib/test_hexagon/infrastructure.py @@ -230,17 +230,19 @@ def compute(n, ho, wo, ko, hi, wi, ki): # Reshape and transpose numpy array according to the specified layout -def transform_numpy(arr_np, current_layout: str, new_layout:str): +def transform_numpy(arr_np, current_layout: str, new_layout: str): if current_layout == "nhwc": if new_layout == "nhwc": return arr_np if new_layout in ["nhwc-8h2w32c2w-2d", "nhwc-8h2w32c2w-1d"]: N, H, W, C = arr_np.shape - return arr_np.reshape([N, H // 8, 8, W // 4, 2, 2, C // 32, 32]).transpose(0, 1, 3, 6, 2, 4, 7, 5) + return arr_np.reshape([N, H // 8, 8, W // 4, 2, 2, C // 32, 32]).transpose( + 0, 1, 3, 6, 2, 4, 7, 5 + ) if new_layout in ["n11c-1024c-2d", "n11c-1024c-1d"]: N, H, W, C = arr_np.shape - assert (H == 1 and W == 1), "The size of H and W must be 1" - return arr_np.reshape([N, 1, 1, C//1024, 1024]) + assert H == 1 and W == 1, "The size of H and W must be 1" + return arr_np.reshape([N, 1, 1, C // 1024, 1024]) raise RuntimeError(f"Unexpected new_layout '{new_layout}'") raise RuntimeError(f"Unexpected current_layout '{current_layout}'") diff --git a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py index d46e2db68651..687e88cd2466 100644 --- a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py +++ b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py @@ -282,12 +282,8 @@ def input_shape_padded(self, input_shape, padding, output_layout): @tvm.testing.fixture def input_np_padded(self, input_np, input_shape, input_shape_padded, padding): pad_before_h, pad_before_w = padding[:2] - pad_after_h = ( - input_shape_padded[1] - input_shape[1] - pad_before_h - ) - pad_after_w = ( - input_shape_padded[2] - input_shape[2] - pad_before_w - ) + pad_after_h = input_shape_padded[1] - input_shape[1] - pad_before_h + pad_after_w = input_shape_padded[2] - input_shape[2] - pad_before_w input_padded = np.pad( input_np, ((0, 0), (pad_before_h, pad_after_h), (pad_before_w, pad_after_w), (0, 0)), @@ -320,8 +316,7 @@ def test_avg_pool2d_slice( target_hexagon = tvm.target.hexagon("v69") A = te.placeholder(input_shape_padded, name="A", dtype=dtype) - M = sl.avg_pool2d_compute( - A, output_shape, kernel, stride, dilation) + M = sl.avg_pool2d_compute(A, output_shape, kernel, stride, dilation) # tir schedule tir_schedule = sl.avg_pool2d_STIR_schedule(M, A, output_layout, input_layout) From 856474cc75641f4189af0ea1e293254916e80147 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Wed, 1 Jun 2022 14:45:08 -0500 Subject: [PATCH 4/9] Address pylint errors --- .../tvm/topi/hexagon/slice_ops/avg_pool2d.py | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py index 1d6330a0acee..8ad14854e330 100644 --- a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py +++ b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py @@ -14,15 +14,11 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# pylint: disable=invalid-name, unused-variable, unused-argument, too-many-locals -from tvm.ir.module import IRModule -from tvm import te -from tvm import tir -from tvm.script import tir as T -from ..utils import get_layout_transform_fn +""" Compute and schedule for avg_pool2d slice op -""" -The slice op implementation for avg_pool2d makes serveral assumptions: +Please note the following assumptions made by the implementation: 1) The input must be padded in advance to account for 'padding'. In addition, both input and output must be padded as per the physical buffer layout. @@ -35,20 +31,27 @@ for the input. """ +#from tvm.ir.module import IRModule +from tvm import te +from tvm import tir +from ..utils import get_layout_transform_fn + def validate_out_shape(out_shape, in_shape, kernel, stride, dilation): - ob, oh, ow, oc = out_shape - ib, ih, iw, ic = in_shape + """Validate output shape""" + _, oh, ow, _ = out_shape + _, ih, iw, _ = in_shape kh, kw = kernel sh, sw = stride dh, dw = dilation if ih < (oh - 1) * sh + dh * (kh - 1) + 1: - raise RuntimeError(f"Output height is too large") + raise RuntimeError("Output height is too large") if iw < (ow - 1) * sw + dw * (kw - 1) + 1: - raise RuntimeError(f"Output width is too large") + raise RuntimeError("Output width is too large") def avg_pool2d_compute(A, out_shape, kernel, stride, dilation): + """avg_pool2d compute""" kh, kw = kernel rh = te.reduce_axis((0, kh), name="rh") rw = te.reduce_axis((0, kw), name="rw") @@ -72,9 +75,8 @@ def avg_pool2d_compute(A, out_shape, kernel, stride, dilation): ) return Avg - -# Schedule for input and output layout nhwc-8h2w32c2w def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: str): + """Schedule for input and output layout nhwc-8h2w32c2w""" func = te.create_prim_func([ins, outs]) s = tir.Schedule(func) Sum = s.get_block("sum") @@ -103,9 +105,8 @@ def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: st # s.vectorize(ci_wii) # Doesn't work return s - -# Schedule for output layout: n11c-1024c, input layout: nhwc-8h2w32c2w def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str): + """Schedule for output layout: n11c-1024c, input layout: nhwc-8h2w32c2w""" func = te.create_prim_func([ins, outs]) s = tir.Schedule(func) Sum = s.get_block("sum") @@ -129,9 +130,8 @@ def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str): # s.vectorize(Sum_axis[-3]) # Doesn't work return s - -# STIR based schedule def avg_pool2d_STIR_schedule(outs, ins, output_layout: str, input_layout: str): + """STIR based schedule""" if output_layout == "nhwc-8h2w32c2w-2d": return STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout, input_layout) if output_layout == "n11c-1024c-2d": From 5910685e84294512bf6f02a7a8815f1b8600bb07 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Thu, 2 Jun 2022 10:31:53 -0500 Subject: [PATCH 5/9] Additional formatting issues --- python/tvm/topi/hexagon/slice_ops/avg_pool2d.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py index 8ad14854e330..306be543d8fb 100644 --- a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py +++ b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py @@ -31,7 +31,6 @@ for the input. """ -#from tvm.ir.module import IRModule from tvm import te from tvm import tir from ..utils import get_layout_transform_fn @@ -75,6 +74,7 @@ def avg_pool2d_compute(A, out_shape, kernel, stride, dilation): ) return Avg + def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: str): """Schedule for input and output layout nhwc-8h2w32c2w""" func = te.create_prim_func([ins, outs]) @@ -105,6 +105,7 @@ def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: st # s.vectorize(ci_wii) # Doesn't work return s + def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str): """Schedule for output layout: n11c-1024c, input layout: nhwc-8h2w32c2w""" func = te.create_prim_func([ins, outs]) @@ -130,6 +131,7 @@ def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str): # s.vectorize(Sum_axis[-3]) # Doesn't work return s + def avg_pool2d_STIR_schedule(outs, ins, output_layout: str, input_layout: str): """STIR based schedule""" if output_layout == "nhwc-8h2w32c2w-2d": From 7a26a5675864bd6000999eabbfe39e58eb7fed2d Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Thu, 2 Jun 2022 11:30:55 -0500 Subject: [PATCH 6/9] more pylint fixes --- python/tvm/topi/hexagon/utils.py | 8 +++++++- tests/python/contrib/test_hexagon/infrastructure.py | 13 +++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/python/tvm/topi/hexagon/utils.py b/python/tvm/topi/hexagon/utils.py index 5dc7e15801c0..af6e3de9c350 100644 --- a/python/tvm/topi/hexagon/utils.py +++ b/python/tvm/topi/hexagon/utils.py @@ -14,27 +14,33 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - +# pylint: disable=invalid-name +"""Common hexagon specific utilities""" from tvm import te def n11c_1024c_2d(n, h, w, c): + """Return index map for n11c_1024 2d layout""" return [n, h, w, c // 1024, te.AXIS_SEPARATOR, c % 1024] def n11c_1024c_1d(n, h, w, c): + """Return index map for n11c_1024 1d layout""" return [n, h, w, c // 1024, c % 1024] def nhwc_8h2w32c2w_2d(n, h, w, c): + """Return index map for nhwc_8h2w32c2w 2d layout""" return [n, h // 8, w // 4, c // 32, te.AXIS_SEPARATOR, h % 8, (w % 4) // 2, c % 32, w % 2] def nhwc_8h2w32c2w_1d(n, h, w, c): + """Return index map for nhwc_8h2w32c2w 1d layout""" return [n, h // 8, w // 4, c // 32, h % 8, (w % 4) // 2, c % 32, w % 2] def get_layout_transform_fn(layout): + """Return index map function as per the layout string""" if layout == "nhwc-8h2w32c2w-2d": return nhwc_8h2w32c2w_2d if layout == "nhwc-8h2w32c2w-1d": diff --git a/tests/python/contrib/test_hexagon/infrastructure.py b/tests/python/contrib/test_hexagon/infrastructure.py index 607fa82565bc..a6e1fa662e4a 100644 --- a/tests/python/contrib/test_hexagon/infrastructure.py +++ b/tests/python/contrib/test_hexagon/infrastructure.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# pylint: disable=invalid-name """ Hexagon testing infrastructure """ @@ -229,20 +230,20 @@ def compute(n, ho, wo, ko, hi, wi, ki): return output_shape, compute -# Reshape and transpose numpy array according to the specified layout def transform_numpy(arr_np, current_layout: str, new_layout: str): + """Reshape and transpose numpy array according to the specified layout""" if current_layout == "nhwc": if new_layout == "nhwc": return arr_np if new_layout in ["nhwc-8h2w32c2w-2d", "nhwc-8h2w32c2w-1d"]: - N, H, W, C = arr_np.shape - return arr_np.reshape([N, H // 8, 8, W // 4, 2, 2, C // 32, 32]).transpose( + n, h, w, c = arr_np.shape + return arr_np.reshape([n, h // 8, 8, w // 4, 2, 2, c // 32, 32]).transpose( 0, 1, 3, 6, 2, 4, 7, 5 ) if new_layout in ["n11c-1024c-2d", "n11c-1024c-1d"]: - N, H, W, C = arr_np.shape - assert H == 1 and W == 1, "The size of H and W must be 1" - return arr_np.reshape([N, 1, 1, C // 1024, 1024]) + n, h, w, c = arr_np.shape + assert h == 1 and w == 1, "The size of h and w must be 1" + return arr_np.reshape([n, 1, 1, c // 1024, 1024]) raise RuntimeError(f"Unexpected new_layout '{new_layout}'") raise RuntimeError(f"Unexpected current_layout '{current_layout}'") From cb28e17850d2284c96c2ff0a8d9b573f4298420a Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Thu, 2 Jun 2022 11:32:21 -0500 Subject: [PATCH 7/9] Changed arch version to v68 for now --- tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py index 687e88cd2466..89c76ec8e996 100644 --- a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py +++ b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py @@ -313,7 +313,7 @@ def test_avg_pool2d_slice( hexagon_session, ): - target_hexagon = tvm.target.hexagon("v69") + target_hexagon = tvm.target.hexagon("v68") A = te.placeholder(input_shape_padded, name="A", dtype=dtype) M = sl.avg_pool2d_compute(A, output_shape, kernel, stride, dilation) From 8f7e4adf2abd5397d3b543724a57f98385d0674d Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Tue, 7 Jun 2022 12:09:41 -0500 Subject: [PATCH 8/9] Changing arch version back to v69 --- tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py index 89c76ec8e996..687e88cd2466 100644 --- a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py +++ b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py @@ -313,7 +313,7 @@ def test_avg_pool2d_slice( hexagon_session, ): - target_hexagon = tvm.target.hexagon("v68") + target_hexagon = tvm.target.hexagon("v69") A = te.placeholder(input_shape_padded, name="A", dtype=dtype) M = sl.avg_pool2d_compute(A, output_shape, kernel, stride, dilation) From 1f9710885ba0743dad5a88f4786b38c3ffbeaf23 Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Fri, 10 Jun 2022 16:56:38 -0500 Subject: [PATCH 9/9] Move the test to tests/python/contrib/test_hexagon/topi --- .../contrib/test_hexagon/{ => topi}/test_avg_pool2d_slice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename tests/python/contrib/test_hexagon/{ => topi}/test_avg_pool2d_slice.py (99%) diff --git a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py b/tests/python/contrib/test_hexagon/topi/test_avg_pool2d_slice.py similarity index 99% rename from tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py rename to tests/python/contrib/test_hexagon/topi/test_avg_pool2d_slice.py index 687e88cd2466..6cbd84b7ee3a 100644 --- a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py +++ b/tests/python/contrib/test_hexagon/topi/test_avg_pool2d_slice.py @@ -24,7 +24,7 @@ from tvm.topi import testing from tvm.contrib.hexagon.build import HexagonLauncher import tvm.topi.hexagon.slice_ops as sl -from .infrastructure import allocate_hexagon_array, transform_numpy +from ..infrastructure import allocate_hexagon_array, transform_numpy input_layout = tvm.testing.parameter(