From 93428a9b17e6fa854b4a5c1e8180c110f511925a Mon Sep 17 00:00:00 2001
From: Jyotsna Verma <jverma@quicinc.com>
Date: Fri, 20 May 2022 12:36:55 -0500
Subject: [PATCH 1/9] Implement avg_pool2d slice op

---
 python/tvm/topi/hexagon/slice_ops/__init__.py |  22 +
 .../tvm/topi/hexagon/slice_ops/avg_pool2d.py  | 198 ++++++++
 python/tvm/topi/hexagon/utils.py              |  75 +++
 .../contrib/test_hexagon/infrastructure.py    |  18 +-
 .../test_hexagon/test_avg_pool2d_slice.py     | 434 ++++++++++++++++++
 5 files changed, 745 insertions(+), 2 deletions(-)
 create mode 100644 python/tvm/topi/hexagon/slice_ops/__init__.py
 create mode 100644 python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
 create mode 100644 python/tvm/topi/hexagon/utils.py
 create mode 100644 tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py

diff --git a/python/tvm/topi/hexagon/slice_ops/__init__.py b/python/tvm/topi/hexagon/slice_ops/__init__.py
new file mode 100644
index 000000000000..e9d4d00c4285
--- /dev/null
+++ b/python/tvm/topi/hexagon/slice_ops/__init__.py
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+""" Computes and Schedules for Hexagon slice ops. """
+
+# pylint: disable=wildcard-import
+
+from .avg_pool2d import avg_pool2d_compute, avg_pool2d_schedule, avg_pool2d_STIR_schedule
diff --git a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
new file mode 100644
index 000000000000..ef87677c7e5d
--- /dev/null
+++ b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
@@ -0,0 +1,198 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from tvm.ir.module import IRModule
+from tvm import te
+from tvm import tir
+from tvm.script import tir as T
+from ..utils import apply_transform, get_layout_transform_fn
+
+
+# The slice op implementation for avg_pool2d makes serveral assumptions:
+# 1) Both input and output are a multiple of croutons, and the input is already
+#    padded for a given output shape as per any crouton and non-crouton related
+#    padding.
+# 2) The current implementation assumes 'count_include_pad' to be 'True'. It can
+#    modified to support 'False' but the element count for the pooling window must
+#    be pre-computed and provided as an input to reduce the run-time overhead.
+# 3) 'padding' is also ignored. It must be handled outside of the sliced op.
+# 4) Please note that this implementation will not work if the output was padded
+#    for the croutons. Since we loop over the logical output shape, this can result
+#    into out-of-bound access for the input.
+
+def avg_pool2d_compute(A, out_shape, kernel, stride, dilation):
+    kh, kw = kernel
+    rh = te.reduce_axis((0, kh), name="rh")
+    rw = te.reduce_axis((0, kw), name="rw")
+    ob, oh, ow, oc = out_shape
+    sh, sw = stride
+    dh, dw = dilation
+    Area = float(1) / (kh * kw)
+
+    Sum = te.compute(
+        out_shape,
+        lambda b, h, w, c: te.sum(
+            A[b, h * sh + dh * rh, w * sw + dw * rw, c].astype("float32"), axis=[rh, rw]
+        ),
+        name="sum",
+    )
+    Avg = te.compute(
+        out_shape, lambda b, h, w, c: (Sum[b, h, w, c] * Area).astype(A.dtype), name="avg"
+    )
+    return Avg
+
+
+# Schedule for input and output layout nhwc-8h2w32c2w
+def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: str):
+    func = te.create_prim_func([ins, outs])
+    s = tir.Schedule(func)
+    Sum = s.get_block("sum")
+    Avg = s.get_block("avg")
+
+    apply_transform(s, Sum, 0, "read", input_layout)
+    apply_transform(s, Avg, 0, "write", output_layout)
+
+    # Schedule 'Sum'
+    bn, bh, bw, bc, rx, ry = s.get_loops(Sum)
+    bho, bhi = s.split(bh, [None, 8])
+    bwo, bwi = s.split(bw, [None, 4])
+    bwio, bwii = s.split(bwi, [None, 2])  # Doesn't seem to be doing anything
+    bco, bci = s.split(bc, [None, 32])
+    s.reorder(bn, bho, bwo, bco, bhi, bwio, rx, ry, bci, bwii)  # --- DOESN'T do anything
+    bci_wii = s.fuse(bci, bwii)  # --- DOESN'T do anything
+    # s.vectorize(bci_wii) # --- DOESN'T WORK -- errors out
+
+    # Schedule 'Avg'
+    n, h, w, c = s.get_loops(Avg)
+    ho, hi = s.split(h, [None, 8])
+    wo, wi = s.split(w, [None, 4])
+    wio, wii = s.split(wi, [None, 2])
+    co, ci = s.split(c, [None, 32])
+    s.reorder(n, ho, wo, co, hi, wio, ci, wii)
+    ci_wii = s.fuse(ci, wii)
+    s.vectorize(ci_wii)
+
+    s.compute_at(Sum, hi)
+    return s
+
+
+# Schedule for output layout: n11c-1024c, input layout: nhwc-8h2w32c2w
+def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str):
+    func = te.create_prim_func([ins, outs])
+    s = tir.Schedule(func)
+    Sum = s.get_block("sum")
+    Avg = s.get_block("avg")
+
+    apply_transform(s, Sum, 0, "read", input_layout)
+    apply_transform(s, Avg, 0, "write", output_layout)
+
+    bn, bh, bw, bc, rx, ry = s.get_loops(Sum)
+    bco, bci = s.split(bc, [None, 1024])
+    bcio, bcii = s.split(bci, [None, 64])
+    s.reorder(bn, bh, bw, bco, bcio, rx, ry, bcii)  # --- DOESN'T do anything
+    # s.vectorize(bcii) # --- DOESN'T WORK -- errors out
+
+    n, h, w, c = s.get_loops(Avg)
+    co, ci = s.split(c, [None, 1024])
+    cio, cii = s.split(ci, [None, 64])
+    s.vectorize(cii)
+
+    s.compute_at(Sum, cio)
+    return s
+
+
+# TIR based schedule
+def avg_pool2d_STIR_schedule(outs, ins, output_layout: str, input_layout: str):
+    output_layout += "-1d"
+    input_layout += "-1d"
+    if output_layout == "nhwc-8h2w32c2w-1d":
+        return STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout, input_layout)
+    if output_layout == "n11c-1024c-1d":
+        return STIR_schedule_n11c_1024c(outs, ins, output_layout, input_layout)
+    else:
+        raise RuntimeError(f"Unexpected layout '{output_layout}'")
+
+
+# Schedule for input and output layout nhwc-8h2w32c2w
+def schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: str):
+    A = ins
+    M = outs
+    s = te.create_schedule([M.op])
+    B = s[M].op.input_tensors[0]
+
+    # Apply layout transformation
+    input_layout = get_layout_transform_fn(input_layout)
+    output_layout = get_layout_transform_fn(output_layout)
+    s[A].transform_layout(input_layout)
+    M_axis = s[M].transform_layout(output_layout)
+
+    # Schedule 'M'
+    m_inner = s[M].fuse(M_axis[7], M_axis[6])
+    s[M].vectorize(m_inner)
+
+    # Schedule 'B'
+    bn, bh, bw, bc = s[B].op.axis
+    rx, ry = s[B].op.reduce_axis
+    bwo, bwi = s[B].split(bw, factor=4)
+    bwio, bwii = s[B].split(bwi, factor=2)
+    bco, bci = s[B].split(bc, factor=32)
+    s[B].reorder(bn, bco, bh, bwo, bwio, ry, rx, bci, bwii)
+    b_inner = s[B].fuse(bci, bwii)
+    # s[B].vectorize(b_inner) # Doesn't work
+
+    s[B].compute_at(s[M], M_axis[5])
+    return s
+
+
+# Schedule for output layout: n11c-1024c, input layout: nhwc-8h2w32c2w
+def schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str):
+    A = ins
+    M = outs
+    s = te.create_schedule([M.op])
+    B = s[M].op.input_tensors[0]
+
+    # Apply layout transformation
+    input_layout = get_layout_transform_fn(input_layout)
+    output_layout = get_layout_transform_fn(output_layout)
+    s[A].transform_layout(input_layout)
+    M_axis = s[M].transform_layout(output_layout)
+
+    # Schedule 'M'
+    mco, mci = s[M].split(M_axis[4], factor=64)
+    s[M].vectorize(mci)
+
+    # Schedule 'B'
+    bn, bh, bw, bc = s[B].op.axis
+    rx, ry = s[B].op.reduce_axis
+    bco, bci = s[B].split(bc, factor=64)
+    s[B].reorder(bco, rx, ry, bci)
+    # s[B].vectorize(bci) # Doesn't work
+
+    s[B].compute_at(s[M], mco)
+    return s
+
+
+# te based schedule
+def avg_pool2d_schedule(outs, ins, output_layout: str, input_layout: str):
+    output_layout += "-2d"
+    input_layout += "-2d"
+    if output_layout == "nhwc-8h2w32c2w-2d":
+        return schedule_nhwc_8h2w32c2w(outs, ins, output_layout, input_layout)
+    if output_layout == "n11c-1024c-2d":
+        return schedule_n11c_1024c(outs, ins, output_layout, input_layout)
+    else:
+        raise RuntimeError(f"Unexpected layout '{output_layout}'")
diff --git a/python/tvm/topi/hexagon/utils.py b/python/tvm/topi/hexagon/utils.py
new file mode 100644
index 000000000000..1dc84139ae70
--- /dev/null
+++ b/python/tvm/topi/hexagon/utils.py
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from tvm import te
+
+
+def n11c_1024c_2d(n, h, w, c):
+    return [n, h, w, c // 1024, te.AXIS_SEPARATOR, c % 1024]
+
+
+def n11c_1024c_1d(n, h, w, c):
+    return [n, h, w, c // 1024, c % 1024]
+
+
+def nhwc_8h2w32c2w_2d(n, h, w, c):
+    return [n, h // 8, w // 4, c // 32, te.AXIS_SEPARATOR, h % 8, (w % 4) // 2, c % 32, w % 2]
+
+
+def nhwc_8h2w32c2w_1d(n, h, w, c):
+    return [n, h // 8, w // 4, c // 32, h % 8, (w % 4) // 2, c % 32, w % 2]
+
+
+def get_layout_transform_fn(layout):
+    if layout == "nhwc-8h2w32c2w-2d":
+        return nhwc_8h2w32c2w_2d
+    if layout == "nhwc-8h2w32c2w-1d":
+        return nhwc_8h2w32c2w_1d
+    elif layout == "n11c-1024c-2d":
+        return n11c_1024c_2d
+    elif layout == "n11c-1024c-1d":
+        return n11c_1024c_1d
+    else:
+        raise RuntimeError(f"Unexpected layout '{layout}'")
+
+
+def apply_transform(s, block, block_index: int, buffer_type: str, layout: str):
+    """Apply transform layout on a buffer
+
+    Parameters
+    ----------
+    s: Schedule
+    block : BlockRV
+        The block that accesses the target buffer
+    buffer_index: int
+        The index of the buffer in block's read or write region
+    buffer_type : str
+        Type of the buffer index, "read" or "write"
+    layout : str
+        Layout of the buffer
+    """
+    transform_fn = get_layout_transform_fn(layout)
+    if layout == "nhwc-8h2w32c2w-1d":
+        axis_separators = [4]
+    elif layout == "n11c-1024c-1d":
+        axis_separators = [2]
+    else:
+        raise RuntimeError(f"Unexpected layout '{layout}'")
+
+    s.transform_layout(block, block_index, buffer_type, transform_fn)
+    if axis_separators:
+        s.set_axis_separator(block, block_index, buffer_type, axis_separators)
diff --git a/tests/python/contrib/test_hexagon/infrastructure.py b/tests/python/contrib/test_hexagon/infrastructure.py
index 0c9a9478c870..541414176a08 100644
--- a/tests/python/contrib/test_hexagon/infrastructure.py
+++ b/tests/python/contrib/test_hexagon/infrastructure.py
@@ -47,8 +47,7 @@ def allocate_hexagon_array(
         numpy.prod(tensor_shape[dim_i:dim_f])
         for dim_i, dim_f in zip(boundaries[:-1], boundaries[1:])
     ]
-
-    arr = tvm.nd.empty(physical_shape, dtype=dtype, device=dev)
+    arr = tvm.nd.empty(physical_shape, dtype=dtype, device=dev, mem_scope=mem_scope)
 
     if data is not None:
         arr.copyfrom(data.reshape(physical_shape))
@@ -228,3 +227,18 @@ def compute(n, ho, wo, ko, hi, wi, ki):
         )
 
     return output_shape, compute
+
+
+# Transpose and reshape numpy array according to the specified layout
+def transform_numpy(arr_np, layout):
+    if layout == "nhwc":
+        return arr_np
+    elif layout == "nhwc-8h2w32c2w":
+        N, H, W, C = arr_np.shape
+        return arr_np.reshape([N, H // 8, 8, W // 4, 2, 2, C // 32, 32]).transpose(0, 1, 3, 6, 2, 4, 7, 5)
+    elif layout == "n11c-1024c":
+        N, H, W, C = arr_np.shape
+        assert (H == 1 and W == 1), "The size of H and W must be 1!"
+        return arr_np.reshape([N, C//1024, 1024]).transpose(0, 1, 2)
+    else:
+        raise RuntimeError(f"Unexpected layout '{layout}'")
diff --git a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
new file mode 100644
index 000000000000..a1d135acb358
--- /dev/null
+++ b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
@@ -0,0 +1,434 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+import numpy as np
+
+np.set_printoptions(threshold=np.inf)
+from tvm import te, topi
+from tvm.tir.stmt_functor import post_order_visit
+
+import tvm.testing
+from tvm.topi import testing
+from tvm.contrib.hexagon.build import HexagonLauncher
+import tvm.topi.hexagon.slice_ops as sl
+from .infrastructure import allocate_hexagon_array, transform_numpy
+
+
+input_layout = tvm.testing.parameter(
+    "nhwc-8h2w32c2w",
+)
+
+
+@tvm.testing.fixture
+def input_np(input_shape, dtype):
+    return np.random.random(input_shape).astype(dtype)
+
+
+@tvm.testing.fixture
+def transformed_expected_output_np(expected_output_np, output_layout):
+    return transform_numpy(expected_output_np, output_layout)
+
+
+@tvm.testing.fixture
+def transformed_input_np_padded(input_np_padded, input_layout):
+    return transform_numpy(input_np_padded, input_layout)
+
+
+class TestAvgPool2dSlice:
+    # NOTE: input_layout is always assumed to be "nhwc-8h2w32c2w"
+    (
+        output_shape,
+        kernel,
+        stride,
+        dilation,
+        padding,
+        ceil_mode,
+        count_include_pad,
+        output_layout,
+        dtype,
+        use_te_sched,
+    ) = tvm.testing.parameters(
+        (
+            [1, 8, 8, 32],
+            [3, 3],
+            [1, 1],
+            [1, 1],
+            [0, 0, 0, 0],
+            False,
+            True,
+            "nhwc-8h2w32c2w",
+            "float16",
+            False,
+        ),
+        (
+            [1, 16, 16, 32],
+            [3, 3],
+            [1, 1],
+            [1, 1],
+            [0, 0, 0, 0],
+            False,
+            True,
+            "nhwc-8h2w32c2w",
+            "float16",
+            False,
+        ),
+        (
+            [1, 8, 8, 32],
+            [8, 8],
+            [1, 1],
+            [1, 1],
+            [0, 0, 0, 0],
+            False,
+            True,
+            "nhwc-8h2w32c2w",
+            "float16",
+            False,
+        ),
+        (
+            [1, 8, 8, 32],
+            [1, 1],
+            [1, 1],
+            [1, 1],
+            [0, 0, 0, 0],
+            False,
+            True,
+            "nhwc-8h2w32c2w",
+            "float16",
+            False,
+        ),
+        # Test non-one stride and dilation
+        (
+            [1, 8, 8, 32],
+            [3, 3],
+            [2, 3],
+            [1, 1],
+            [0, 0, 0, 0],
+            False,
+            True,
+            "nhwc-8h2w32c2w",
+            "float16",
+            False,
+        ),
+        (
+            [1, 8, 8, 32],
+            [3, 3],
+            [2, 2],
+            [2, 2],
+            [0, 0, 0, 0],
+            False,
+            True,
+            "nhwc-8h2w32c2w",
+            "float16",
+            False,
+        ),
+        (
+            [1, 8, 8, 32],
+            [3, 3],
+            [2, 2],
+            [2, 3],
+            [0, 0, 0, 0],
+            False,
+            True,
+            "nhwc-8h2w32c2w",
+            "float16",
+            False,
+        ),
+        # Test non-zero padding
+        (
+            [1, 8, 8, 32],
+            [3, 3],
+            [1, 1],
+            [1, 1],
+            [1, 1, 1, 1],
+            False,
+            True,
+            "nhwc-8h2w32c2w",
+            "float16",
+            False,
+        ),
+        (
+            [1, 8, 8, 32],
+            [3, 3],
+            [1, 1],
+            [1, 1],
+            [1, 2, 3, 4],
+            False,
+            True,
+            "nhwc-8h2w32c2w",
+            "float16",
+            False,
+        ),
+        (
+            [1, 8, 8, 32],
+            [3, 3],
+            [1, 1],
+            [1, 1],
+            [1, 2, 3, 4],
+            False,
+            True,
+            "nhwc-8h2w32c2w",
+            "float16",
+            False,
+        ),
+        (
+            [1, 8, 8, 32],
+            [3, 3],
+            [3, 2],
+            [2, 3],
+            [1, 2, 3, 4],
+            False,
+            True,
+            "nhwc-8h2w32c2w",
+            "float16",
+            False,
+        ),
+        # Test n11c-1024c layout which will require input and output to have different layout
+        (
+            [1, 1, 1, 2048],
+            [8, 8],
+            [1, 1],
+            [1, 1],
+            [0, 0, 0, 0],
+            False,
+            True,
+            "n11c-1024c",
+            "float16",
+            False,
+        ),
+        (
+            [1, 1, 1, 2048],
+            [6, 6],
+            [1, 1],
+            [1, 1],
+            [0, 0, 0, 0],
+            False,
+            True,
+            "n11c-1024c",
+            "float16",
+            False,
+        ),
+        (
+            [1, 1, 1, 2048],
+            [3, 3],
+            [2, 2],
+            [1, 1],
+            [0, 0, 0, 0],
+            False,
+            True,
+            "n11c-1024c",
+            "float16",
+            False,
+        ),
+        (
+            [1, 1, 1, 2048],
+            [4, 4],
+            [2, 2],
+            [2, 3],
+            [0, 0, 0, 0],
+            False,
+            True,
+            "n11c-1024c",
+            "float16",
+            False,
+        ),
+        # Use 'te' schedule
+        (
+            [1, 8, 8, 32],
+            [3, 3],
+            [1, 1],
+            [1, 1],
+            [0, 0, 0, 0],
+            False,
+            True,
+            "nhwc-8h2w32c2w",
+            "float16",
+            True,
+        ),
+        (
+            [1, 1, 1, 2048],
+            [8, 8],
+            [1, 1],
+            [1, 1],
+            [0, 0, 0, 0],
+            False,
+            True,
+            "n11c-1024c",
+            "float16",
+            True,
+        ),
+    )
+
+    @tvm.testing.fixture
+    def expected_output_np(
+        self,
+        input_np,
+        kernel,
+        stride,
+        dilation,
+        padding,
+        ceil_mode,
+        count_include_pad,
+    ):
+        pad_before = padding[:2]
+        pad_after = padding[2:]
+        ref_np = tvm.topi.testing.poolnd_python(
+            input_np,
+            kernel,
+            stride,
+            dilation,
+            pad_before,
+            pad_after,
+            "avg",  # pool_type
+            count_include_pad,
+            False,  # ceil_mode,
+            layout="NHWC",
+        )
+        return ref_np
+
+    @tvm.testing.fixture
+    def input_shape(self, output_shape, kernel, padding, stride, dilation, output_layout):
+        # Input shape without crouton padding; 'ceil' is being ignored from calculation:
+        o_b, o_h, o_w, o_c = output_shape
+        d_h, d_w = dilation
+        s_h, s_w = stride
+        k_h, k_w = kernel
+        pad_before_h, pad_before_w = padding[:2]
+        pad_after_h, pad_after_w = padding[2:]
+
+        if output_layout == "n11c-1024c":
+            assert (
+                pad_before_w == 0 and pad_after_w == 0 and pad_before_h == 0 and pad_after_h == 0
+            ), "Padding must be zero for n11c-1024c layout!!"
+            assert o_h == 1 and o_w == 1, "Output height and width must be 1!"
+
+        in_h = (o_h - 1) * s_h + d_h * (k_h - 1) + 1 - pad_before_h - pad_after_h
+        in_w = (o_w - 1) * s_w + d_w * (k_w - 1) + 1 - pad_before_w - pad_after_w
+
+        return [o_b, in_h, in_w, o_c]
+
+    @tvm.testing.fixture
+    def input_shape_padded(self, input_shape, padding, output_layout):
+        # Input shape with regular and crouton padding.
+        # Input width and height are padded to a multiple of croutons.
+        # NOTE: Input layout is always assumed to be nhwc-8h2w32c2w. Only the output layout can be
+        # different.
+        pad_before_h, pad_before_w = padding[:2]
+        pad_after_h, pad_after_w = padding[2:]
+        padded_input_height = ((input_shape[1] + pad_before_h + pad_after_h + 7) // 8) * 8
+        padded_input_width = ((input_shape[2] + pad_before_w + pad_after_w + 3) // 4) * 4
+        return [input_shape[0], padded_input_height, padded_input_width, input_shape[3]]
+
+    @tvm.testing.fixture
+    def input_np_padded(self, input_np, input_shape, input_shape_padded, padding):
+        pad_before_h, pad_before_w = padding[:2]
+        pad_after_h = (
+            input_shape_padded[1] - input_shape[1] - pad_before_h
+        )  # pad_after for height with crouton padding
+        pad_after_w = (
+            input_shape_padded[2] - input_shape[2] - pad_before_w
+        )  # pad_after for width with crouton padding
+        input_padded = np.pad(
+            input_np,
+            ((0, 0), (pad_before_h, pad_after_h), (pad_before_w, pad_after_w), (0, 0)),
+            "constant",
+        )
+        return input_padded
+
+    @tvm.testing.requires_hexagon
+    def test_avg_pool2d_slice(
+        self,
+        use_te_sched,
+        stride,
+        kernel,
+        dtype,
+        dilation,
+        padding,
+        count_include_pad,
+        input_layout,
+        output_layout,
+        output_shape,
+        input_shape,
+        input_shape_padded,
+        input_np,
+        input_np_padded,
+        transformed_input_np_padded,
+        transformed_expected_output_np,
+        expected_output_np,
+        hexagon_session,
+    ):
+
+        target_hexagon = tvm.target.hexagon("v69")
+        A = te.placeholder(input_shape_padded, name="A", dtype=dtype)
+
+        M = sl.avg_pool2d_compute(
+            A, output_shape, kernel, stride, dilation)
+        if not use_te_sched:
+            # tir schedule
+            tir_schedule = sl.avg_pool2d_STIR_schedule(M, A, output_layout, input_layout)
+            sch = tir_schedule.mod
+        else:
+            # te schedule
+            te_s = sl.avg_pool2d_schedule(M, A, output_layout, input_layout)
+            sch = te_s
+
+        input_axis_separator = [4]
+        if output_layout == "nhwc-8h2w32c2w":
+            output_axis_separator = [4]
+        elif output_layout == "n11c-1024c":
+            output_axis_separator = [2]
+        else:
+            raise RuntimeError(f"Unexpected layout '{output_layout}'")
+
+        with tvm.transform.PassContext(opt_level=3, config={"tir.disable_assert": True}):
+            func = tvm.build(
+                sch,
+                [A, M],
+                tvm.target.Target(target_hexagon, host=target_hexagon),
+                name="avg_pool2d",
+            )
+
+        input_arr = allocate_hexagon_array(
+            hexagon_session.device,
+            data=transformed_input_np_padded,
+            axis_separators=input_axis_separator,
+            mem_scope="global.vtcm",
+        )
+        output_arr = allocate_hexagon_array(
+            hexagon_session.device,
+            transformed_expected_output_np.shape,
+            dtype,
+            axis_separators=output_axis_separator,
+            mem_scope="global.vtcm",
+        )
+
+        mod = hexagon_session.load_module(func)
+        mod(input_arr, output_arr)
+
+        b, h, w, c = output_shape
+        if output_layout == "nhwc-8h2w32c2w":
+            output_np = output_arr.numpy().reshape([b, h // 8, w // 4, c // 32, 8, 2, 32, 2])
+        elif output_layout == "n11c-1024c":
+            output_np = output_arr.numpy().reshape([b, c // 1024, 1024])
+        else:
+            raise RuntimeError(f"Unexpected layout '{output_layout}'")
+
+        np.testing.assert_allclose(output_np, transformed_expected_output_np, rtol=1e-3, atol=1e-3)
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main(sys.argv))

From 793287651d7ca5a575521065151019eedd57e46c Mon Sep 17 00:00:00 2001
From: Jyotsna Verma <jverma@quicinc.com>
Date: Fri, 27 May 2022 11:21:55 -0500
Subject: [PATCH 2/9] Address review comments and fix the STIR schedule

---
 python/tvm/topi/hexagon/slice_ops/__init__.py |   2 +-
 .../tvm/topi/hexagon/slice_ops/avg_pool2d.py  | 168 ++++++------------
 python/tvm/topi/hexagon/utils.py              |  35 +---
 .../contrib/test_hexagon/infrastructure.py    |  28 +--
 .../test_hexagon/test_avg_pool2d_slice.py     | 140 +++++----------
 5 files changed, 113 insertions(+), 260 deletions(-)

diff --git a/python/tvm/topi/hexagon/slice_ops/__init__.py b/python/tvm/topi/hexagon/slice_ops/__init__.py
index e9d4d00c4285..b52d410676af 100644
--- a/python/tvm/topi/hexagon/slice_ops/__init__.py
+++ b/python/tvm/topi/hexagon/slice_ops/__init__.py
@@ -19,4 +19,4 @@
 
 # pylint: disable=wildcard-import
 
-from .avg_pool2d import avg_pool2d_compute, avg_pool2d_schedule, avg_pool2d_STIR_schedule
+from .avg_pool2d import avg_pool2d_compute, avg_pool2d_STIR_schedule
diff --git a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
index ef87677c7e5d..3d4a8e4ac3c9 100644
--- a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
+++ b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
@@ -19,29 +19,45 @@
 from tvm import te
 from tvm import tir
 from tvm.script import tir as T
-from ..utils import apply_transform, get_layout_transform_fn
-
+from ..utils import get_layout_transform_fn
+
+"""
+The slice op implementation for avg_pool2d makes serveral assumptions:
+
+1) The input must be padded in advance to account for 'padding'. In addition,
+   both input and output must be padded as per the physical buffer layout.
+2) The current implementation assumes 'count_include_pad' to be 'True'. It can be
+   modified to support 'False' case but the element count for the pooling window
+   must be pre-computed and provided as an input to reduce the run-time overhead.
+3) 'padding' is ignored. It must be handled outside of the sliced op.
+4) Please note that this implementation will not work if the output includes any
+   physical layout related padding as it can result into out-of-bound access
+   for the input.
+"""
+
+def validate_out_shape(out_shape, in_shape, kernel, stride, dilation):
+    ob, oh, ow, oc = out_shape
+    ib, ih, iw, ic = in_shape
+    kh, kw = kernel
+    sh, sw = stride
+    dh, dw = dilation
+    if (ih < (oh - 1) * sh + dh * (kh - 1)  + 1 ):
+        raise RuntimeError(f"Output height is too large")
+    if (iw < (ow - 1) * sw + dw * (kw - 1)  + 1 ):
+        raise RuntimeError(f"Output width is too large")
 
-# The slice op implementation for avg_pool2d makes serveral assumptions:
-# 1) Both input and output are a multiple of croutons, and the input is already
-#    padded for a given output shape as per any crouton and non-crouton related
-#    padding.
-# 2) The current implementation assumes 'count_include_pad' to be 'True'. It can
-#    modified to support 'False' but the element count for the pooling window must
-#    be pre-computed and provided as an input to reduce the run-time overhead.
-# 3) 'padding' is also ignored. It must be handled outside of the sliced op.
-# 4) Please note that this implementation will not work if the output was padded
-#    for the croutons. Since we loop over the logical output shape, this can result
-#    into out-of-bound access for the input.
 
 def avg_pool2d_compute(A, out_shape, kernel, stride, dilation):
     kh, kw = kernel
     rh = te.reduce_axis((0, kh), name="rh")
     rw = te.reduce_axis((0, kw), name="rw")
     ob, oh, ow, oc = out_shape
+    if (isinstance(ob, int)):
+        validate_out_shape(out_shape, A.shape, kernel, stride, dilation)
+
     sh, sw = stride
     dh, dw = dilation
-    Area = float(1) / (kh * kw)
+    InvArea = float(1) / (kh * kw)
 
     Sum = te.compute(
         out_shape,
@@ -51,7 +67,7 @@ def avg_pool2d_compute(A, out_shape, kernel, stride, dilation):
         name="sum",
     )
     Avg = te.compute(
-        out_shape, lambda b, h, w, c: (Sum[b, h, w, c] * Area).astype(A.dtype), name="avg"
+        out_shape, lambda b, h, w, c: (Sum[b, h, w, c] * InvArea).astype(A.dtype), name="avg"
     )
     return Avg
 
@@ -63,18 +79,10 @@ def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: st
     Sum = s.get_block("sum")
     Avg = s.get_block("avg")
 
-    apply_transform(s, Sum, 0, "read", input_layout)
-    apply_transform(s, Avg, 0, "write", output_layout)
-
-    # Schedule 'Sum'
-    bn, bh, bw, bc, rx, ry = s.get_loops(Sum)
-    bho, bhi = s.split(bh, [None, 8])
-    bwo, bwi = s.split(bw, [None, 4])
-    bwio, bwii = s.split(bwi, [None, 2])  # Doesn't seem to be doing anything
-    bco, bci = s.split(bc, [None, 32])
-    s.reorder(bn, bho, bwo, bco, bhi, bwio, rx, ry, bci, bwii)  # --- DOESN'T do anything
-    bci_wii = s.fuse(bci, bwii)  # --- DOESN'T do anything
-    # s.vectorize(bci_wii) # --- DOESN'T WORK -- errors out
+    input_transform_fn = get_layout_transform_fn(input_layout)
+    output_transform_fn = get_layout_transform_fn(output_layout)
+    s.transform_layout(Sum, ("read", 0), input_transform_fn)
+    s.transform_layout(Avg, ("write", 0), output_transform_fn)
 
     # Schedule 'Avg'
     n, h, w, c = s.get_loops(Avg)
@@ -86,7 +94,12 @@ def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: st
     ci_wii = s.fuse(ci, wii)
     s.vectorize(ci_wii)
 
-    s.compute_at(Sum, hi)
+    # Schedule 'Sum'
+    s.compute_at(Sum, wio)
+    Sum_axis = s.get_loops(Sum)
+    s.reorder(Sum_axis[-2], Sum_axis[-1], Sum_axis[-4], Sum_axis[-3])
+    ci_wii = s.fuse(Sum_axis[-4], Sum_axis[-3])
+    # s.vectorize(ci_wii) # Doesn't work
     return s
 
 
@@ -97,102 +110,29 @@ def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str):
     Sum = s.get_block("sum")
     Avg = s.get_block("avg")
 
-    apply_transform(s, Sum, 0, "read", input_layout)
-    apply_transform(s, Avg, 0, "write", output_layout)
-
-    bn, bh, bw, bc, rx, ry = s.get_loops(Sum)
-    bco, bci = s.split(bc, [None, 1024])
-    bcio, bcii = s.split(bci, [None, 64])
-    s.reorder(bn, bh, bw, bco, bcio, rx, ry, bcii)  # --- DOESN'T do anything
-    # s.vectorize(bcii) # --- DOESN'T WORK -- errors out
+    input_transform_fn = get_layout_transform_fn(input_layout)
+    output_transform_fn = get_layout_transform_fn(output_layout)
+    s.transform_layout(Sum, ("read", 0), input_transform_fn)
+    s.transform_layout(Avg, ("write", 0), output_transform_fn)
 
+    # Schedule 'Avg'
     n, h, w, c = s.get_loops(Avg)
     co, ci = s.split(c, [None, 1024])
     cio, cii = s.split(ci, [None, 64])
     s.vectorize(cii)
 
+    # Schedule 'Sum'
     s.compute_at(Sum, cio)
+    Sum_axis = s.get_loops(Sum)
+    s.reorder(Sum_axis[-2], Sum_axis[-1], Sum_axis[-3]) #
+    # s.vectorize(Sum_axis[-3]) # Doesn't work
     return s
 
 
-# TIR based schedule
+# STIR based schedule
 def avg_pool2d_STIR_schedule(outs, ins, output_layout: str, input_layout: str):
-    output_layout += "-1d"
-    input_layout += "-1d"
-    if output_layout == "nhwc-8h2w32c2w-1d":
-        return STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout, input_layout)
-    if output_layout == "n11c-1024c-1d":
-        return STIR_schedule_n11c_1024c(outs, ins, output_layout, input_layout)
-    else:
-        raise RuntimeError(f"Unexpected layout '{output_layout}'")
-
-
-# Schedule for input and output layout nhwc-8h2w32c2w
-def schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: str):
-    A = ins
-    M = outs
-    s = te.create_schedule([M.op])
-    B = s[M].op.input_tensors[0]
-
-    # Apply layout transformation
-    input_layout = get_layout_transform_fn(input_layout)
-    output_layout = get_layout_transform_fn(output_layout)
-    s[A].transform_layout(input_layout)
-    M_axis = s[M].transform_layout(output_layout)
-
-    # Schedule 'M'
-    m_inner = s[M].fuse(M_axis[7], M_axis[6])
-    s[M].vectorize(m_inner)
-
-    # Schedule 'B'
-    bn, bh, bw, bc = s[B].op.axis
-    rx, ry = s[B].op.reduce_axis
-    bwo, bwi = s[B].split(bw, factor=4)
-    bwio, bwii = s[B].split(bwi, factor=2)
-    bco, bci = s[B].split(bc, factor=32)
-    s[B].reorder(bn, bco, bh, bwo, bwio, ry, rx, bci, bwii)
-    b_inner = s[B].fuse(bci, bwii)
-    # s[B].vectorize(b_inner) # Doesn't work
-
-    s[B].compute_at(s[M], M_axis[5])
-    return s
-
-
-# Schedule for output layout: n11c-1024c, input layout: nhwc-8h2w32c2w
-def schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str):
-    A = ins
-    M = outs
-    s = te.create_schedule([M.op])
-    B = s[M].op.input_tensors[0]
-
-    # Apply layout transformation
-    input_layout = get_layout_transform_fn(input_layout)
-    output_layout = get_layout_transform_fn(output_layout)
-    s[A].transform_layout(input_layout)
-    M_axis = s[M].transform_layout(output_layout)
-
-    # Schedule 'M'
-    mco, mci = s[M].split(M_axis[4], factor=64)
-    s[M].vectorize(mci)
-
-    # Schedule 'B'
-    bn, bh, bw, bc = s[B].op.axis
-    rx, ry = s[B].op.reduce_axis
-    bco, bci = s[B].split(bc, factor=64)
-    s[B].reorder(bco, rx, ry, bci)
-    # s[B].vectorize(bci) # Doesn't work
-
-    s[B].compute_at(s[M], mco)
-    return s
-
-
-# te based schedule
-def avg_pool2d_schedule(outs, ins, output_layout: str, input_layout: str):
-    output_layout += "-2d"
-    input_layout += "-2d"
     if output_layout == "nhwc-8h2w32c2w-2d":
-        return schedule_nhwc_8h2w32c2w(outs, ins, output_layout, input_layout)
+        return STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout, input_layout)
     if output_layout == "n11c-1024c-2d":
-        return schedule_n11c_1024c(outs, ins, output_layout, input_layout)
-    else:
-        raise RuntimeError(f"Unexpected layout '{output_layout}'")
+        return STIR_schedule_n11c_1024c(outs, ins, output_layout, input_layout)
+    raise RuntimeError(f"Unexpected layout '{output_layout}'")
diff --git a/python/tvm/topi/hexagon/utils.py b/python/tvm/topi/hexagon/utils.py
index 1dc84139ae70..5dc7e15801c0 100644
--- a/python/tvm/topi/hexagon/utils.py
+++ b/python/tvm/topi/hexagon/utils.py
@@ -39,37 +39,8 @@ def get_layout_transform_fn(layout):
         return nhwc_8h2w32c2w_2d
     if layout == "nhwc-8h2w32c2w-1d":
         return nhwc_8h2w32c2w_1d
-    elif layout == "n11c-1024c-2d":
+    if layout == "n11c-1024c-2d":
         return n11c_1024c_2d
-    elif layout == "n11c-1024c-1d":
+    if layout == "n11c-1024c-1d":
         return n11c_1024c_1d
-    else:
-        raise RuntimeError(f"Unexpected layout '{layout}'")
-
-
-def apply_transform(s, block, block_index: int, buffer_type: str, layout: str):
-    """Apply transform layout on a buffer
-
-    Parameters
-    ----------
-    s: Schedule
-    block : BlockRV
-        The block that accesses the target buffer
-    buffer_index: int
-        The index of the buffer in block's read or write region
-    buffer_type : str
-        Type of the buffer index, "read" or "write"
-    layout : str
-        Layout of the buffer
-    """
-    transform_fn = get_layout_transform_fn(layout)
-    if layout == "nhwc-8h2w32c2w-1d":
-        axis_separators = [4]
-    elif layout == "n11c-1024c-1d":
-        axis_separators = [2]
-    else:
-        raise RuntimeError(f"Unexpected layout '{layout}'")
-
-    s.transform_layout(block, block_index, buffer_type, transform_fn)
-    if axis_separators:
-        s.set_axis_separator(block, block_index, buffer_type, axis_separators)
+    raise RuntimeError(f"Unexpected layout '{layout}'")
diff --git a/tests/python/contrib/test_hexagon/infrastructure.py b/tests/python/contrib/test_hexagon/infrastructure.py
index 541414176a08..ae9f5f947f67 100644
--- a/tests/python/contrib/test_hexagon/infrastructure.py
+++ b/tests/python/contrib/test_hexagon/infrastructure.py
@@ -229,16 +229,18 @@ def compute(n, ho, wo, ko, hi, wi, ki):
     return output_shape, compute
 
 
-# Transpose and reshape numpy array according to the specified layout
-def transform_numpy(arr_np, layout):
-    if layout == "nhwc":
-        return arr_np
-    elif layout == "nhwc-8h2w32c2w":
-        N, H, W, C = arr_np.shape
-        return arr_np.reshape([N, H // 8, 8, W // 4, 2, 2, C // 32, 32]).transpose(0, 1, 3, 6, 2, 4, 7, 5)
-    elif layout == "n11c-1024c":
-        N, H, W, C = arr_np.shape
-        assert (H == 1 and W == 1), "The size of H and W must be 1!"
-        return arr_np.reshape([N, C//1024, 1024]).transpose(0, 1, 2)
-    else:
-        raise RuntimeError(f"Unexpected layout '{layout}'")
+# Reshape and transpose numpy array according to the specified layout
+def transform_numpy(arr_np, current_layout: str, new_layout:str):
+    if current_layout == "nhwc":
+        if new_layout == "nhwc":
+            return arr_np
+        if new_layout in ["nhwc-8h2w32c2w-2d", "nhwc-8h2w32c2w-1d"]:
+            N, H, W, C = arr_np.shape
+            return arr_np.reshape([N, H // 8, 8, W // 4, 2, 2, C // 32, 32]).transpose(0, 1, 3, 6, 2, 4, 7, 5)
+        if new_layout in ["n11c-1024c-2d", "n11c-1024c-1d"]:
+            N, H, W, C = arr_np.shape
+            assert (H == 1 and W == 1), "The size of H and W must be 1"
+            return arr_np.reshape([N, 1, 1, C//1024, 1024])
+
+        raise RuntimeError(f"Unexpected new_layout '{new_layout}'")
+    raise RuntimeError(f"Unexpected current_layout '{current_layout}'")
diff --git a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
index a1d135acb358..d46e2db68651 100644
--- a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
+++ b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
@@ -18,9 +18,7 @@
 import pytest
 import numpy as np
 
-np.set_printoptions(threshold=np.inf)
 from tvm import te, topi
-from tvm.tir.stmt_functor import post_order_visit
 
 import tvm.testing
 from tvm.topi import testing
@@ -30,7 +28,7 @@
 
 
 input_layout = tvm.testing.parameter(
-    "nhwc-8h2w32c2w",
+    "nhwc-8h2w32c2w-2d",
 )
 
 
@@ -41,16 +39,16 @@ def input_np(input_shape, dtype):
 
 @tvm.testing.fixture
 def transformed_expected_output_np(expected_output_np, output_layout):
-    return transform_numpy(expected_output_np, output_layout)
+    return transform_numpy(expected_output_np, "nhwc", output_layout)
 
 
 @tvm.testing.fixture
 def transformed_input_np_padded(input_np_padded, input_layout):
-    return transform_numpy(input_np_padded, input_layout)
+    return transform_numpy(input_np_padded, "nhwc", input_layout)
 
 
 class TestAvgPool2dSlice:
-    # NOTE: input_layout is always assumed to be "nhwc-8h2w32c2w"
+    # NOTE: input_layout is always assumed to be "nhwc-8h2w32c2w-2d"
     (
         output_shape,
         kernel,
@@ -61,7 +59,6 @@ class TestAvgPool2dSlice:
         count_include_pad,
         output_layout,
         dtype,
-        use_te_sched,
     ) = tvm.testing.parameters(
         (
             [1, 8, 8, 32],
@@ -71,9 +68,8 @@ class TestAvgPool2dSlice:
             [0, 0, 0, 0],
             False,
             True,
-            "nhwc-8h2w32c2w",
+            "nhwc-8h2w32c2w-2d",
             "float16",
-            False,
         ),
         (
             [1, 16, 16, 32],
@@ -83,9 +79,8 @@ class TestAvgPool2dSlice:
             [0, 0, 0, 0],
             False,
             True,
-            "nhwc-8h2w32c2w",
+            "nhwc-8h2w32c2w-2d",
             "float16",
-            False,
         ),
         (
             [1, 8, 8, 32],
@@ -95,21 +90,8 @@ class TestAvgPool2dSlice:
             [0, 0, 0, 0],
             False,
             True,
-            "nhwc-8h2w32c2w",
-            "float16",
-            False,
-        ),
-        (
-            [1, 8, 8, 32],
-            [1, 1],
-            [1, 1],
-            [1, 1],
-            [0, 0, 0, 0],
-            False,
-            True,
-            "nhwc-8h2w32c2w",
+            "nhwc-8h2w32c2w-2d",
             "float16",
-            False,
         ),
         # Test non-one stride and dilation
         (
@@ -120,9 +102,8 @@ class TestAvgPool2dSlice:
             [0, 0, 0, 0],
             False,
             True,
-            "nhwc-8h2w32c2w",
+            "nhwc-8h2w32c2w-2d",
             "float16",
-            False,
         ),
         (
             [1, 8, 8, 32],
@@ -132,9 +113,8 @@ class TestAvgPool2dSlice:
             [0, 0, 0, 0],
             False,
             True,
-            "nhwc-8h2w32c2w",
+            "nhwc-8h2w32c2w-2d",
             "float16",
-            False,
         ),
         (
             [1, 8, 8, 32],
@@ -144,9 +124,8 @@ class TestAvgPool2dSlice:
             [0, 0, 0, 0],
             False,
             True,
-            "nhwc-8h2w32c2w",
+            "nhwc-8h2w32c2w-2d",
             "float16",
-            False,
         ),
         # Test non-zero padding
         (
@@ -157,9 +136,8 @@ class TestAvgPool2dSlice:
             [1, 1, 1, 1],
             False,
             True,
-            "nhwc-8h2w32c2w",
+            "nhwc-8h2w32c2w-2d",
             "float16",
-            False,
         ),
         (
             [1, 8, 8, 32],
@@ -169,9 +147,8 @@ class TestAvgPool2dSlice:
             [1, 2, 3, 4],
             False,
             True,
-            "nhwc-8h2w32c2w",
+            "nhwc-8h2w32c2w-2d",
             "float16",
-            False,
         ),
         (
             [1, 8, 8, 32],
@@ -181,9 +158,8 @@ class TestAvgPool2dSlice:
             [1, 2, 3, 4],
             False,
             True,
-            "nhwc-8h2w32c2w",
+            "nhwc-8h2w32c2w-2d",
             "float16",
-            False,
         ),
         (
             [1, 8, 8, 32],
@@ -193,11 +169,10 @@ class TestAvgPool2dSlice:
             [1, 2, 3, 4],
             False,
             True,
-            "nhwc-8h2w32c2w",
+            "nhwc-8h2w32c2w-2d",
             "float16",
-            False,
         ),
-        # Test n11c-1024c layout which will require input and output to have different layout
+        # Test n11c-1024c-2d layout which will require input and output to have different layout
         (
             [1, 1, 1, 2048],
             [8, 8],
@@ -206,9 +181,8 @@ class TestAvgPool2dSlice:
             [0, 0, 0, 0],
             False,
             True,
-            "n11c-1024c",
+            "n11c-1024c-2d",
             "float16",
-            False,
         ),
         (
             [1, 1, 1, 2048],
@@ -218,9 +192,8 @@ class TestAvgPool2dSlice:
             [0, 0, 0, 0],
             False,
             True,
-            "n11c-1024c",
+            "n11c-1024c-2d",
             "float16",
-            False,
         ),
         (
             [1, 1, 1, 2048],
@@ -230,9 +203,8 @@ class TestAvgPool2dSlice:
             [0, 0, 0, 0],
             False,
             True,
-            "n11c-1024c",
+            "n11c-1024c-2d",
             "float16",
-            False,
         ),
         (
             [1, 1, 1, 2048],
@@ -242,34 +214,8 @@ class TestAvgPool2dSlice:
             [0, 0, 0, 0],
             False,
             True,
-            "n11c-1024c",
+            "n11c-1024c-2d",
             "float16",
-            False,
-        ),
-        # Use 'te' schedule
-        (
-            [1, 8, 8, 32],
-            [3, 3],
-            [1, 1],
-            [1, 1],
-            [0, 0, 0, 0],
-            False,
-            True,
-            "nhwc-8h2w32c2w",
-            "float16",
-            True,
-        ),
-        (
-            [1, 1, 1, 2048],
-            [8, 8],
-            [1, 1],
-            [1, 1],
-            [0, 0, 0, 0],
-            False,
-            True,
-            "n11c-1024c",
-            "float16",
-            True,
         ),
     )
 
@@ -302,7 +248,7 @@ def expected_output_np(
 
     @tvm.testing.fixture
     def input_shape(self, output_shape, kernel, padding, stride, dilation, output_layout):
-        # Input shape without crouton padding; 'ceil' is being ignored from calculation:
+        # Input shape without any padding; 'ceil' is being ignored from calculation:
         o_b, o_h, o_w, o_c = output_shape
         d_h, d_w = dilation
         s_h, s_w = stride
@@ -310,11 +256,11 @@ def input_shape(self, output_shape, kernel, padding, stride, dilation, output_la
         pad_before_h, pad_before_w = padding[:2]
         pad_after_h, pad_after_w = padding[2:]
 
-        if output_layout == "n11c-1024c":
+        if output_layout == "n11c-1024c-2d":
             assert (
                 pad_before_w == 0 and pad_after_w == 0 and pad_before_h == 0 and pad_after_h == 0
-            ), "Padding must be zero for n11c-1024c layout!!"
-            assert o_h == 1 and o_w == 1, "Output height and width must be 1!"
+            ), "Padding must be zero for n11c-1024c-2d layout"
+            assert o_h == 1 and o_w == 1, "Output height and width must be 1"
 
         in_h = (o_h - 1) * s_h + d_h * (k_h - 1) + 1 - pad_before_h - pad_after_h
         in_w = (o_w - 1) * s_w + d_w * (k_w - 1) + 1 - pad_before_w - pad_after_w
@@ -323,10 +269,10 @@ def input_shape(self, output_shape, kernel, padding, stride, dilation, output_la
 
     @tvm.testing.fixture
     def input_shape_padded(self, input_shape, padding, output_layout):
-        # Input shape with regular and crouton padding.
-        # Input width and height are padded to a multiple of croutons.
-        # NOTE: Input layout is always assumed to be nhwc-8h2w32c2w. Only the output layout can be
-        # different.
+        # Input shape is adjusted to account for 'padding'. Also, due to the physical
+        # layout of the buffer, height and width are adjusted so that they are a
+        # multiple of 8 and 4 respectively.
+        # NOTE: Input layout is always assumed to be nhwc-8h2w32c2w-2d.
         pad_before_h, pad_before_w = padding[:2]
         pad_after_h, pad_after_w = padding[2:]
         padded_input_height = ((input_shape[1] + pad_before_h + pad_after_h + 7) // 8) * 8
@@ -338,10 +284,10 @@ def input_np_padded(self, input_np, input_shape, input_shape_padded, padding):
         pad_before_h, pad_before_w = padding[:2]
         pad_after_h = (
             input_shape_padded[1] - input_shape[1] - pad_before_h
-        )  # pad_after for height with crouton padding
+        )
         pad_after_w = (
             input_shape_padded[2] - input_shape[2] - pad_before_w
-        )  # pad_after for width with crouton padding
+        )
         input_padded = np.pad(
             input_np,
             ((0, 0), (pad_before_h, pad_after_h), (pad_before_w, pad_after_w), (0, 0)),
@@ -352,7 +298,6 @@ def input_np_padded(self, input_np, input_shape, input_shape_padded, padding):
     @tvm.testing.requires_hexagon
     def test_avg_pool2d_slice(
         self,
-        use_te_sched,
         stride,
         kernel,
         dtype,
@@ -377,24 +322,20 @@ def test_avg_pool2d_slice(
 
         M = sl.avg_pool2d_compute(
             A, output_shape, kernel, stride, dilation)
-        if not use_te_sched:
-            # tir schedule
-            tir_schedule = sl.avg_pool2d_STIR_schedule(M, A, output_layout, input_layout)
-            sch = tir_schedule.mod
-        else:
-            # te schedule
-            te_s = sl.avg_pool2d_schedule(M, A, output_layout, input_layout)
-            sch = te_s
+
+        # tir schedule
+        tir_schedule = sl.avg_pool2d_STIR_schedule(M, A, output_layout, input_layout)
+        sch = tir_schedule.mod
 
         input_axis_separator = [4]
-        if output_layout == "nhwc-8h2w32c2w":
+        if output_layout == "nhwc-8h2w32c2w-2d":
+            output_axis_separator = [4]
+        elif output_layout == "n11c-1024c-2d":
             output_axis_separator = [4]
-        elif output_layout == "n11c-1024c":
-            output_axis_separator = [2]
         else:
             raise RuntimeError(f"Unexpected layout '{output_layout}'")
 
-        with tvm.transform.PassContext(opt_level=3, config={"tir.disable_assert": True}):
+        with tvm.transform.PassContext(opt_level=3):
             func = tvm.build(
                 sch,
                 [A, M],
@@ -418,12 +359,11 @@ def test_avg_pool2d_slice(
 
         mod = hexagon_session.load_module(func)
         mod(input_arr, output_arr)
-
         b, h, w, c = output_shape
-        if output_layout == "nhwc-8h2w32c2w":
+        if output_layout == "nhwc-8h2w32c2w-2d":
             output_np = output_arr.numpy().reshape([b, h // 8, w // 4, c // 32, 8, 2, 32, 2])
-        elif output_layout == "n11c-1024c":
-            output_np = output_arr.numpy().reshape([b, c // 1024, 1024])
+        elif output_layout == "n11c-1024c-2d":
+            output_np = output_arr.numpy().reshape([b, 1, 1, c // 1024, 1024])
         else:
             raise RuntimeError(f"Unexpected layout '{output_layout}'")
 

From f731032e9403d2c09e735886ec38e7857327ae75 Mon Sep 17 00:00:00 2001
From: Jyotsna Verma <jverma@quicinc.com>
Date: Tue, 31 May 2022 17:37:01 -0500
Subject: [PATCH 3/9] Fix formatting issues

---
 python/tvm/topi/hexagon/slice_ops/avg_pool2d.py       |  9 +++++----
 tests/python/contrib/test_hexagon/infrastructure.py   | 10 ++++++----
 .../contrib/test_hexagon/test_avg_pool2d_slice.py     | 11 +++--------
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
index 3d4a8e4ac3c9..1d6330a0acee 100644
--- a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
+++ b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
@@ -35,15 +35,16 @@
    for the input.
 """
 
+
 def validate_out_shape(out_shape, in_shape, kernel, stride, dilation):
     ob, oh, ow, oc = out_shape
     ib, ih, iw, ic = in_shape
     kh, kw = kernel
     sh, sw = stride
     dh, dw = dilation
-    if (ih < (oh - 1) * sh + dh * (kh - 1)  + 1 ):
+    if ih < (oh - 1) * sh + dh * (kh - 1) + 1:
         raise RuntimeError(f"Output height is too large")
-    if (iw < (ow - 1) * sw + dw * (kw - 1)  + 1 ):
+    if iw < (ow - 1) * sw + dw * (kw - 1) + 1:
         raise RuntimeError(f"Output width is too large")
 
 
@@ -52,7 +53,7 @@ def avg_pool2d_compute(A, out_shape, kernel, stride, dilation):
     rh = te.reduce_axis((0, kh), name="rh")
     rw = te.reduce_axis((0, kw), name="rw")
     ob, oh, ow, oc = out_shape
-    if (isinstance(ob, int)):
+    if isinstance(ob, int):
         validate_out_shape(out_shape, A.shape, kernel, stride, dilation)
 
     sh, sw = stride
@@ -124,7 +125,7 @@ def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str):
     # Schedule 'Sum'
     s.compute_at(Sum, cio)
     Sum_axis = s.get_loops(Sum)
-    s.reorder(Sum_axis[-2], Sum_axis[-1], Sum_axis[-3]) #
+    s.reorder(Sum_axis[-2], Sum_axis[-1], Sum_axis[-3])
     # s.vectorize(Sum_axis[-3]) # Doesn't work
     return s
 
diff --git a/tests/python/contrib/test_hexagon/infrastructure.py b/tests/python/contrib/test_hexagon/infrastructure.py
index ae9f5f947f67..607fa82565bc 100644
--- a/tests/python/contrib/test_hexagon/infrastructure.py
+++ b/tests/python/contrib/test_hexagon/infrastructure.py
@@ -230,17 +230,19 @@ def compute(n, ho, wo, ko, hi, wi, ki):
 
 
 # Reshape and transpose numpy array according to the specified layout
-def transform_numpy(arr_np, current_layout: str, new_layout:str):
+def transform_numpy(arr_np, current_layout: str, new_layout: str):
     if current_layout == "nhwc":
         if new_layout == "nhwc":
             return arr_np
         if new_layout in ["nhwc-8h2w32c2w-2d", "nhwc-8h2w32c2w-1d"]:
             N, H, W, C = arr_np.shape
-            return arr_np.reshape([N, H // 8, 8, W // 4, 2, 2, C // 32, 32]).transpose(0, 1, 3, 6, 2, 4, 7, 5)
+            return arr_np.reshape([N, H // 8, 8, W // 4, 2, 2, C // 32, 32]).transpose(
+                0, 1, 3, 6, 2, 4, 7, 5
+            )
         if new_layout in ["n11c-1024c-2d", "n11c-1024c-1d"]:
             N, H, W, C = arr_np.shape
-            assert (H == 1 and W == 1), "The size of H and W must be 1"
-            return arr_np.reshape([N, 1, 1, C//1024, 1024])
+            assert H == 1 and W == 1, "The size of H and W must be 1"
+            return arr_np.reshape([N, 1, 1, C // 1024, 1024])
 
         raise RuntimeError(f"Unexpected new_layout '{new_layout}'")
     raise RuntimeError(f"Unexpected current_layout '{current_layout}'")
diff --git a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
index d46e2db68651..687e88cd2466 100644
--- a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
+++ b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
@@ -282,12 +282,8 @@ def input_shape_padded(self, input_shape, padding, output_layout):
     @tvm.testing.fixture
     def input_np_padded(self, input_np, input_shape, input_shape_padded, padding):
         pad_before_h, pad_before_w = padding[:2]
-        pad_after_h = (
-            input_shape_padded[1] - input_shape[1] - pad_before_h
-        )
-        pad_after_w = (
-            input_shape_padded[2] - input_shape[2] - pad_before_w
-        )
+        pad_after_h = input_shape_padded[1] - input_shape[1] - pad_before_h
+        pad_after_w = input_shape_padded[2] - input_shape[2] - pad_before_w
         input_padded = np.pad(
             input_np,
             ((0, 0), (pad_before_h, pad_after_h), (pad_before_w, pad_after_w), (0, 0)),
@@ -320,8 +316,7 @@ def test_avg_pool2d_slice(
         target_hexagon = tvm.target.hexagon("v69")
         A = te.placeholder(input_shape_padded, name="A", dtype=dtype)
 
-        M = sl.avg_pool2d_compute(
-            A, output_shape, kernel, stride, dilation)
+        M = sl.avg_pool2d_compute(A, output_shape, kernel, stride, dilation)
 
         # tir schedule
         tir_schedule = sl.avg_pool2d_STIR_schedule(M, A, output_layout, input_layout)

From 856474cc75641f4189af0ea1e293254916e80147 Mon Sep 17 00:00:00 2001
From: Jyotsna Verma <jverma@quicinc.com>
Date: Wed, 1 Jun 2022 14:45:08 -0500
Subject: [PATCH 4/9] Address pylint errors

---
 .../tvm/topi/hexagon/slice_ops/avg_pool2d.py  | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
index 1d6330a0acee..8ad14854e330 100644
--- a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
+++ b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
@@ -14,15 +14,11 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+# pylint: disable=invalid-name, unused-variable, unused-argument, too-many-locals
 
-from tvm.ir.module import IRModule
-from tvm import te
-from tvm import tir
-from tvm.script import tir as T
-from ..utils import get_layout_transform_fn
+""" Compute and schedule for avg_pool2d slice op
 
-"""
-The slice op implementation for avg_pool2d makes serveral assumptions:
+Please note the following assumptions made by the implementation:
 
 1) The input must be padded in advance to account for 'padding'. In addition,
    both input and output must be padded as per the physical buffer layout.
@@ -35,20 +31,27 @@
    for the input.
 """
 
+#from tvm.ir.module import IRModule
+from tvm import te
+from tvm import tir
+from ..utils import get_layout_transform_fn
+
 
 def validate_out_shape(out_shape, in_shape, kernel, stride, dilation):
-    ob, oh, ow, oc = out_shape
-    ib, ih, iw, ic = in_shape
+    """Validate output shape"""
+    _, oh, ow, _ = out_shape
+    _, ih, iw, _ = in_shape
     kh, kw = kernel
     sh, sw = stride
     dh, dw = dilation
     if ih < (oh - 1) * sh + dh * (kh - 1) + 1:
-        raise RuntimeError(f"Output height is too large")
+        raise RuntimeError("Output height is too large")
     if iw < (ow - 1) * sw + dw * (kw - 1) + 1:
-        raise RuntimeError(f"Output width is too large")
+        raise RuntimeError("Output width is too large")
 
 
 def avg_pool2d_compute(A, out_shape, kernel, stride, dilation):
+    """avg_pool2d compute"""
     kh, kw = kernel
     rh = te.reduce_axis((0, kh), name="rh")
     rw = te.reduce_axis((0, kw), name="rw")
@@ -72,9 +75,8 @@ def avg_pool2d_compute(A, out_shape, kernel, stride, dilation):
     )
     return Avg
 
-
-# Schedule for input and output layout nhwc-8h2w32c2w
 def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: str):
+    """Schedule for input and output layout nhwc-8h2w32c2w"""
     func = te.create_prim_func([ins, outs])
     s = tir.Schedule(func)
     Sum = s.get_block("sum")
@@ -103,9 +105,8 @@ def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: st
     # s.vectorize(ci_wii) # Doesn't work
     return s
 
-
-# Schedule for output layout: n11c-1024c, input layout: nhwc-8h2w32c2w
 def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str):
+    """Schedule for output layout: n11c-1024c, input layout: nhwc-8h2w32c2w"""
     func = te.create_prim_func([ins, outs])
     s = tir.Schedule(func)
     Sum = s.get_block("sum")
@@ -129,9 +130,8 @@ def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str):
     # s.vectorize(Sum_axis[-3]) # Doesn't work
     return s
 
-
-# STIR based schedule
 def avg_pool2d_STIR_schedule(outs, ins, output_layout: str, input_layout: str):
+    """STIR based schedule"""
     if output_layout == "nhwc-8h2w32c2w-2d":
         return STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout, input_layout)
     if output_layout == "n11c-1024c-2d":

From 5910685e84294512bf6f02a7a8815f1b8600bb07 Mon Sep 17 00:00:00 2001
From: Jyotsna Verma <jverma@quicinc.com>
Date: Thu, 2 Jun 2022 10:31:53 -0500
Subject: [PATCH 5/9] Additional formatting issues

---
 python/tvm/topi/hexagon/slice_ops/avg_pool2d.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
index 8ad14854e330..306be543d8fb 100644
--- a/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
+++ b/python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
@@ -31,7 +31,6 @@
    for the input.
 """
 
-#from tvm.ir.module import IRModule
 from tvm import te
 from tvm import tir
 from ..utils import get_layout_transform_fn
@@ -75,6 +74,7 @@ def avg_pool2d_compute(A, out_shape, kernel, stride, dilation):
     )
     return Avg
 
+
 def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: str):
     """Schedule for input and output layout nhwc-8h2w32c2w"""
     func = te.create_prim_func([ins, outs])
@@ -105,6 +105,7 @@ def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: st
     # s.vectorize(ci_wii) # Doesn't work
     return s
 
+
 def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str):
     """Schedule for output layout: n11c-1024c, input layout: nhwc-8h2w32c2w"""
     func = te.create_prim_func([ins, outs])
@@ -130,6 +131,7 @@ def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str):
     # s.vectorize(Sum_axis[-3]) # Doesn't work
     return s
 
+
 def avg_pool2d_STIR_schedule(outs, ins, output_layout: str, input_layout: str):
     """STIR based schedule"""
     if output_layout == "nhwc-8h2w32c2w-2d":

From 7a26a5675864bd6000999eabbfe39e58eb7fed2d Mon Sep 17 00:00:00 2001
From: Jyotsna Verma <jverma@quicinc.com>
Date: Thu, 2 Jun 2022 11:30:55 -0500
Subject: [PATCH 6/9] more pylint fixes

---
 python/tvm/topi/hexagon/utils.py                    |  8 +++++++-
 tests/python/contrib/test_hexagon/infrastructure.py | 13 +++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/python/tvm/topi/hexagon/utils.py b/python/tvm/topi/hexagon/utils.py
index 5dc7e15801c0..af6e3de9c350 100644
--- a/python/tvm/topi/hexagon/utils.py
+++ b/python/tvm/topi/hexagon/utils.py
@@ -14,27 +14,33 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+# pylint: disable=invalid-name
+"""Common hexagon specific utilities"""
 from tvm import te
 
 
 def n11c_1024c_2d(n, h, w, c):
+    """Return index map for n11c_1024 2d layout"""
     return [n, h, w, c // 1024, te.AXIS_SEPARATOR, c % 1024]
 
 
 def n11c_1024c_1d(n, h, w, c):
+    """Return index map for n11c_1024 1d layout"""
     return [n, h, w, c // 1024, c % 1024]
 
 
 def nhwc_8h2w32c2w_2d(n, h, w, c):
+    """Return index map for nhwc_8h2w32c2w 2d layout"""
     return [n, h // 8, w // 4, c // 32, te.AXIS_SEPARATOR, h % 8, (w % 4) // 2, c % 32, w % 2]
 
 
 def nhwc_8h2w32c2w_1d(n, h, w, c):
+    """Return index map for nhwc_8h2w32c2w 1d layout"""
     return [n, h // 8, w // 4, c // 32, h % 8, (w % 4) // 2, c % 32, w % 2]
 
 
 def get_layout_transform_fn(layout):
+    """Return index map function as per the layout string"""
     if layout == "nhwc-8h2w32c2w-2d":
         return nhwc_8h2w32c2w_2d
     if layout == "nhwc-8h2w32c2w-1d":
diff --git a/tests/python/contrib/test_hexagon/infrastructure.py b/tests/python/contrib/test_hexagon/infrastructure.py
index 607fa82565bc..a6e1fa662e4a 100644
--- a/tests/python/contrib/test_hexagon/infrastructure.py
+++ b/tests/python/contrib/test_hexagon/infrastructure.py
@@ -14,6 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+# pylint: disable=invalid-name
 
 """ Hexagon testing infrastructure """
 
@@ -229,20 +230,20 @@ def compute(n, ho, wo, ko, hi, wi, ki):
     return output_shape, compute
 
 
-# Reshape and transpose numpy array according to the specified layout
 def transform_numpy(arr_np, current_layout: str, new_layout: str):
+    """Reshape and transpose numpy array according to the specified layout"""
     if current_layout == "nhwc":
         if new_layout == "nhwc":
             return arr_np
         if new_layout in ["nhwc-8h2w32c2w-2d", "nhwc-8h2w32c2w-1d"]:
-            N, H, W, C = arr_np.shape
-            return arr_np.reshape([N, H // 8, 8, W // 4, 2, 2, C // 32, 32]).transpose(
+            n, h, w, c = arr_np.shape
+            return arr_np.reshape([n, h // 8, 8, w // 4, 2, 2, c // 32, 32]).transpose(
                 0, 1, 3, 6, 2, 4, 7, 5
             )
         if new_layout in ["n11c-1024c-2d", "n11c-1024c-1d"]:
-            N, H, W, C = arr_np.shape
-            assert H == 1 and W == 1, "The size of H and W must be 1"
-            return arr_np.reshape([N, 1, 1, C // 1024, 1024])
+            n, h, w, c = arr_np.shape
+            assert h == 1 and w == 1, "The size of h and w must be 1"
+            return arr_np.reshape([n, 1, 1, c // 1024, 1024])
 
         raise RuntimeError(f"Unexpected new_layout '{new_layout}'")
     raise RuntimeError(f"Unexpected current_layout '{current_layout}'")

From cb28e17850d2284c96c2ff0a8d9b573f4298420a Mon Sep 17 00:00:00 2001
From: Jyotsna Verma <jverma@quicinc.com>
Date: Thu, 2 Jun 2022 11:32:21 -0500
Subject: [PATCH 7/9] Changed arch version to v68 for now

---
 tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
index 687e88cd2466..89c76ec8e996 100644
--- a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
+++ b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
@@ -313,7 +313,7 @@ def test_avg_pool2d_slice(
         hexagon_session,
     ):
 
-        target_hexagon = tvm.target.hexagon("v69")
+        target_hexagon = tvm.target.hexagon("v68")
         A = te.placeholder(input_shape_padded, name="A", dtype=dtype)
 
         M = sl.avg_pool2d_compute(A, output_shape, kernel, stride, dilation)

From 8f7e4adf2abd5397d3b543724a57f98385d0674d Mon Sep 17 00:00:00 2001
From: Jyotsna Verma <jverma@quicinc.com>
Date: Tue, 7 Jun 2022 12:09:41 -0500
Subject: [PATCH 8/9] Changing arch version back to v69

---
 tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
index 89c76ec8e996..687e88cd2466 100644
--- a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
+++ b/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
@@ -313,7 +313,7 @@ def test_avg_pool2d_slice(
         hexagon_session,
     ):
 
-        target_hexagon = tvm.target.hexagon("v68")
+        target_hexagon = tvm.target.hexagon("v69")
         A = te.placeholder(input_shape_padded, name="A", dtype=dtype)
 
         M = sl.avg_pool2d_compute(A, output_shape, kernel, stride, dilation)

From 1f9710885ba0743dad5a88f4786b38c3ffbeaf23 Mon Sep 17 00:00:00 2001
From: Jyotsna Verma <jverma@quicinc.com>
Date: Fri, 10 Jun 2022 16:56:38 -0500
Subject: [PATCH 9/9] Move the test to tests/python/contrib/test_hexagon/topi

---
 .../contrib/test_hexagon/{ => topi}/test_avg_pool2d_slice.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename tests/python/contrib/test_hexagon/{ => topi}/test_avg_pool2d_slice.py (99%)

diff --git a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py b/tests/python/contrib/test_hexagon/topi/test_avg_pool2d_slice.py
similarity index 99%
rename from tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
rename to tests/python/contrib/test_hexagon/topi/test_avg_pool2d_slice.py
index 687e88cd2466..6cbd84b7ee3a 100644
--- a/tests/python/contrib/test_hexagon/test_avg_pool2d_slice.py
+++ b/tests/python/contrib/test_hexagon/topi/test_avg_pool2d_slice.py
@@ -24,7 +24,7 @@
 from tvm.topi import testing
 from tvm.contrib.hexagon.build import HexagonLauncher
 import tvm.topi.hexagon.slice_ops as sl
-from .infrastructure import allocate_hexagon_array, transform_numpy
+from ..infrastructure import allocate_hexagon_array, transform_numpy
 
 
 input_layout = tvm.testing.parameter(