From 474840d885fc78298884081d4e47125ae871a1a7 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Tue, 7 Sep 2021 16:04:29 -0700 Subject: [PATCH 1/2] Add hexagon contrib tests for blocked conv2d and maxpool2d --- tests/python/contrib/test_hexagon/__init__.py | 18 + tests/python/contrib/test_hexagon/conftest.py | 46 ++ .../contrib/test_hexagon/infrastructure.py | 57 +++ .../test_hexagon/test_conv2d_blocked.py | 474 ++++++++++++++++++ .../test_hexagon/test_maxpool2d_blocked.py | 151 ++++++ 5 files changed, 746 insertions(+) create mode 100644 tests/python/contrib/test_hexagon/__init__.py create mode 100644 tests/python/contrib/test_hexagon/conftest.py create mode 100644 tests/python/contrib/test_hexagon/infrastructure.py create mode 100644 tests/python/contrib/test_hexagon/test_conv2d_blocked.py create mode 100644 tests/python/contrib/test_hexagon/test_maxpool2d_blocked.py diff --git a/tests/python/contrib/test_hexagon/__init__.py b/tests/python/contrib/test_hexagon/__init__.py new file mode 100644 index 000000000000..58dc4cc1e03d --- /dev/null +++ b/tests/python/contrib/test_hexagon/__init__.py @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" Testing infrastructure for Hexagon """ diff --git a/tests/python/contrib/test_hexagon/conftest.py b/tests/python/contrib/test_hexagon/conftest.py new file mode 100644 index 000000000000..8213c33123d8 --- /dev/null +++ b/tests/python/contrib/test_hexagon/conftest.py @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" Hexagon testing fixtures used to deduce testing argument + values from testing parameters """ + +import tvm +from .infrastructure import ceildiv + + +@tvm.testing.fixture +def shape_nhwc(batch, in_channel, in_size): + return (batch, in_size, in_size, in_channel) + + +@tvm.testing.fixture +def shape_oihw(out_channel, in_channel, kernel): + return (out_channel, in_channel, kernel, kernel) + + +@tvm.testing.fixture +def shape_oihw8i32o4i(out_channel, in_channel, kernel): + out_factor, in_factor, in_second_factor = 32, 32, 4 + return ( + int(ceildiv(out_channel, out_factor)), + int(ceildiv(in_channel, in_factor)), + kernel, + kernel, + in_factor // in_second_factor, + out_factor, + in_second_factor, + ) diff --git a/tests/python/contrib/test_hexagon/infrastructure.py b/tests/python/contrib/test_hexagon/infrastructure.py new file mode 100644 index 000000000000..9c7ba5b1b6a3 --- /dev/null +++ b/tests/python/contrib/test_hexagon/infrastructure.py @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" Hexagon testing infrastructure """ + +import tvm +import numpy + + +def ceildiv(o, d): + return tvm.tir.floordiv(o + d - 1, d) + + +def get_packed_layout(logical_shape_nhwc, block_shape, packed_C=True): + shape = [logical_shape_nhwc[0]] + off_h, off_w, off_c = block_shape + shape.append(ceildiv(logical_shape_nhwc[1], off_h)) + shape.append(ceildiv(logical_shape_nhwc[2], off_w)) + if packed_C: + shape.append(ceildiv(logical_shape_nhwc[3], off_c)) + shape.extend(block_shape) + else: + shape.extend([off_h, off_w, logical_shape_nhwc[-1]]) + return shape + + +def build_and_run(inputs, func, target, target_host, *args, **kwargs): + s, placeholders, binds = func(*args, **kwargs) + + func = tvm.build(s, placeholders, target=target, target_host=target_host, binds=binds) + dev = tvm.device(target) + tensors = [] + for tensor in inputs: + tensors.append(tvm.nd.array(tensor, dev)) + tensors.append( + tvm.nd.array( + numpy.zeros([i.value for i in placeholders[-1].shape], dtype=placeholders[-1].dtype), + dev, + ) + ) + func(*tensors) + + return tensors[-1].asnumpy() diff --git a/tests/python/contrib/test_hexagon/test_conv2d_blocked.py b/tests/python/contrib/test_hexagon/test_conv2d_blocked.py new file mode 100644 index 000000000000..a5a766b5b5df --- /dev/null +++ b/tests/python/contrib/test_hexagon/test_conv2d_blocked.py @@ -0,0 +1,474 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import sys + +import tvm +from tvm import te +from tvm import topi +from tvm.topi import testing +from .infrastructure import ceildiv +from .infrastructure import get_packed_layout +from .infrastructure import build_and_run + +import numpy as np +import pytest + + +def conv2d_logical( + shape_nhwc, + shape_kcrs, + kernel_size, + stride, + padding, + dtype, + storage_scope="global", +): + """ + Conv2d TE wherein both input activation and filter tensors + are defined with their logical NHWC/OIHW shapes, respectively. + The packed physical layout for the activation and filter are: + Activation: nhwc8h8w32c + Filter: oihw8i32o4i + """ + assert kernel_size == tuple(shape_kcrs[2:]) + + block_shape = 8, 8, 32 + block_H, block_W, block_C = block_shape + shape = get_packed_layout(shape_nhwc, block_shape) + logical_output_shape = ( + shape_nhwc[0], + (shape_nhwc[1] - kernel_size[0] + padding[0] + padding[1]) // stride[0] + 1, + (shape_nhwc[2] - kernel_size[1] + padding[2] + padding[3]) // stride[0] + 1, + shape_nhwc[3], + ) + output_shape = get_packed_layout(logical_output_shape, block_shape) + + N, H, W, C = shape_nhwc + X = te.placeholder(shape_nhwc, dtype=dtype) + # Combination of padding required by conv2d operator and padding to evenly divisible + # number of blocks. Note that this padding should be inlined in the schedule so + # as to avoid input copying. + pad_h = (block_H - ((H + padding[1]) % block_H)) % block_H + pad_w = (block_W - ((W + padding[3]) % block_W)) % block_W + X_pad = topi.nn.pad(X, [0, padding[0], padding[2], 0], [0, pad_h, pad_w, 0], pad_value=0) + # Calculate packed layout + X_packed = te.compute( + shape, + lambda n, ho, wo, co, hi, wi, ci: X_pad[ + n, ho * block_H + hi, wo * block_W + wi, co * block_C + ci + ], + ) + + # Filter shape using KCRS (OIHW) notation + K, C, R, S = shape_kcrs + filter_Ki, filter_Ci, filter_Cii = 32, 32, 4 + shape_filter = [ + ceildiv(K, filter_Ki), + ceildiv(C, filter_Ci), + R, + S, + filter_Ci // filter_Cii, + filter_Ki, + filter_Cii, + ] + filt = te.placeholder(shape_kcrs, dtype=dtype) + # Channel padding to multiples of 32 + pad_c = (filter_Ci - (C % filter_Ci)) % filter_Ci + pad_k = (filter_Ki - (K % filter_Ki)) % filter_Ki + filt_pad = topi.nn.pad( + filt, [0, 0, 0, 0], [pad_k, pad_c, R, S], pad_value=0, name="padded_filter" + ) + filt_packed = te.compute( + shape_filter, + lambda ko, co, r, s, cio, ki, cii: filt_pad[ + ko * filter_Ki + ki, co * filter_Ci + cio * filter_Cii + cii, r, s + ], + name="packed_filter", + ) + + rh = te.reduce_axis((0, kernel_size[0]), name="rh") + rw = te.reduce_axis((0, kernel_size[1]), name="rw") + rc = te.reduce_axis((0, C), name="rc") + + def compute(n, ho, wo, ko, hi, wi, ki): + # Construct blockized strided conv2d height index + h = ho * block_H + hi + h_contig = h * stride[0] + rh + h_block_id = h_contig // block_H + h_block_offset = h_contig % block_H + + # Construct blockized strided conv2d width index + w = wo * block_W + wi + w_contig = w * stride[1] + rw + w_block_id = w_contig // block_W + w_block_offset = w_contig % block_W + + # Construct blockized conv2d channel index + c_block_id = rc // block_C + c_block_offset = rc % block_C + + # Construct flat filter input channel indices + rco = rc // filter_Ci + rcio = (rc % filter_Ci) // filter_Cii + rcii = rc % filter_Cii + + return te.sum( + X_packed[ + n, + h_block_id, + w_block_id, + c_block_id, + h_block_offset, + w_block_offset, + c_block_offset, + ] + * filt_packed[ko, rco, rh, rw, rcio, ki, rcii], + axis=[rh, rw, rc], + ) + + Y = te.compute(output_shape, compute) + s = te.create_schedule(Y.op) + + # Ensure the padding and array packing is performed inline + s[X_pad].compute_inline() + s[X_packed].compute_inline() + + s[filt_pad].compute_inline() + s[filt_packed].compute_inline() + + binds = {} + if storage_scope and storage_scope != "global": + with tvm.transform.PassContext(): + Xb = tvm.tir.decl_buffer(shape, name="Xb", dtype=dtype, scope=storage_scope) + Yb = tvm.tir.decl_buffer(output_shape, name="Yb", dtype=dtype, scope=storage_scope) + binds = {X: Xb, Y: Yb} + + return (s, [X, filt, Y], binds) + + +def conv2d_packed_filter( + shape_nhwc, + shape_filter, + kernel_size, + stride, + padding, + dtype, + storage_scope="global", +): + """ + Conv2d TE wherein the input activation is defined by its + logical NHWC shape, but the filter is provided in the + packed layout oihw8i32o4i. The physical packed layout used + for the activation is: nhwc8h8w32c + """ + assert kernel_size == tuple(shape_filter[2:4]) + + block_shape = 8, 8, 32 + block_H, block_W, block_C = block_shape + shape = get_packed_layout(shape_nhwc, block_shape) + logical_output_shape = ( + shape_nhwc[0], + (shape_nhwc[1] - kernel_size[0] + padding[0] + padding[1]) // stride[0] + 1, + (shape_nhwc[2] - kernel_size[1] + padding[2] + padding[3]) // stride[0] + 1, + shape_nhwc[3], + ) + output_shape = get_packed_layout(logical_output_shape, block_shape) + + N, H, W, C = shape_nhwc + X = te.placeholder(shape_nhwc, dtype=dtype) + # Combination of padding required by conv2d operator and padding to evenly divisible + # number of blocks. Note that this padding should be inlined in the schedule so + # as to avoid input copying. + pad_h = (block_H - ((H + padding[1]) % block_H)) % block_H + pad_w = (block_W - ((W + padding[3]) % block_W)) % block_W + + X_pad = topi.nn.pad(X, [0, padding[0], padding[2], 0], [0, pad_h, pad_w, 0], pad_value=0) + # Calculate packed layout + packed_shape = get_packed_layout(X_pad.shape, block_shape) + + X_packed = te.compute( + packed_shape, + lambda n, ho, wo, co, hi, wi, ci: X_pad[ + n, ho * block_H + hi, wo * block_W + wi, co * block_C + ci + ], + ) + + # Filter shape using KCRS (OIHW) notation + filter_Ki, filter_Ci, filter_Cii = 32, 32, 4 + assert shape_filter[-1] == filter_Cii + assert shape_filter[-2] == filter_Ki + assert shape_filter[-3] == filter_Ci // filter_Cii + + filt_packed = te.placeholder(shape_filter, dtype=dtype) + + rh = te.reduce_axis((0, kernel_size[0]), name="rh") + rw = te.reduce_axis((0, kernel_size[1]), name="rw") + rc = te.reduce_axis((0, C), name="rc") + + def compute(n, ho, wo, ko, hi, wi, ki): + # Construct blockized strided conv2d height index + h = ho * block_H + hi + h_contig = h * stride[0] + rh + h_block_id = h_contig // block_H + h_block_offset = h_contig % block_H + + # Construct blockized strided conv2d width index + w = wo * block_W + wi + w_contig = w * stride[1] + rw + w_block_id = w_contig // block_W + w_block_offset = w_contig % block_W + + # Construct blockized conv2d channel index + c_block_id = rc // block_C + c_block_offset = rc % block_C + + # Construct flat filter input channel indices + rco = rc // filter_Ci + rcio = (rc % filter_Ci) // filter_Cii + rcii = rc % filter_Cii + + return te.sum( + X_packed[ + n, + h_block_id, + w_block_id, + c_block_id, + h_block_offset, + w_block_offset, + c_block_offset, + ] + * filt_packed[ko, rco, rh, rw, rcio, ki, rcii], + axis=[rh, rw, rc], + ) + + Y = te.compute(output_shape, compute) + s = te.create_schedule(Y.op) + + # Ensure the padding and array packing is performed inline + s[X_pad].compute_inline() + s[X_packed].compute_inline() + + # Perform scheduling + n, hid, wid, cid, hoff, woff, coff = s[Y].op.axis + slice = s[Y].fuse(wid, cid) + Xl = s.cache_read(X_packed, storage_scope, [Y]) + Yl = s.cache_write(Y, storage_scope) + + s[Yl].compute_at(s[Y], hid) + n, hid, slice, hoff, woff, coff = s[Yl].op.axis + s[Xl].compute_at(s[Yl], slice) + + binds = {} + if storage_scope and storage_scope != "global": + with tvm.transform.PassContext(): + Xb = tvm.tir.decl_buffer(shape, name="Xb", dtype=dtype, scope=storage_scope) + Yb = tvm.tir.decl_buffer(output_shape, name="Yb", dtype=dtype, scope=storage_scope) + binds = {X: Xb, Y: Yb} + + return (s, [X, filt_packed, Y], binds) + + +def conv2d_packed_filter_nhwhwc( + shape_nhwc, + shape_filter, + kernel_size, + stride, + padding, + dtype, + storage_scope="global", +): + """ + Conv2d TE wherein the input activation is defined by its + logical NHWC shape, but the filter is provided in the + packed layout oihw8i32o4i. The physical packed layout used + for the activation is: nhw8h8wc + + """ + assert kernel_size == tuple(shape_filter[2:4]) + + block_shape = 8, 8, 32 + block_H, block_W, _ = block_shape + shape = get_packed_layout(shape_nhwc, block_shape, packed_C=False) + logical_output_shape = ( + shape_nhwc[0], + (shape_nhwc[1] - kernel_size[0] + padding[0] + padding[1]) // stride[0] + 1, + (shape_nhwc[2] - kernel_size[1] + padding[2] + padding[3]) // stride[0] + 1, + shape_nhwc[3], + ) + output_shape = get_packed_layout(logical_output_shape, block_shape, packed_C=False) + + N, H, W, C = shape_nhwc + X = te.placeholder(shape_nhwc, dtype=dtype) + # Combination of padding required by conv2d operator and padding to evenly divisible + # number of blocks. Note that this padding should be inlined in the schedule so + # as to avoid input copying. + pad_h = (block_H - ((H + padding[1]) % block_H)) % block_H + pad_w = (block_W - ((W + padding[3]) % block_W)) % block_W + X_pad = topi.nn.pad(X, [0, padding[0], padding[2], 0], [0, pad_h, pad_w, 0], pad_value=0) + # Calculate packed layout + packed_shape = get_packed_layout(X_pad.shape, block_shape, packed_C=False) + X_packed = te.compute( + packed_shape, lambda n, ho, wo, hi, wi, c: X_pad[n, ho * block_H + hi, wo * block_W + wi, c] + ) + + # Filter shape using KCRS (OIHW) notation + filter_Ki, filter_Ci, filter_Cii = 32, 32, 4 + assert shape_filter[-1] == filter_Cii + assert shape_filter[-2] == filter_Ki + assert shape_filter[-3] == filter_Ci // filter_Cii + + filt_packed = te.placeholder(shape_filter, dtype=dtype) + + rh = te.reduce_axis((0, kernel_size[0]), name="rh") + rw = te.reduce_axis((0, kernel_size[1]), name="rw") + rc = te.reduce_axis((0, C), name="rc") + + def compute(n, ho, wo, hi, wi, k): + # Construct blockized strided conv2d height index + h = ho * block_H + hi + h_contig = h * stride[0] + rh + h_block_id = h_contig // block_H + h_block_offset = h_contig % block_H + + # Construct blockized strided conv2d width index + w = wo * block_W + wi + w_contig = w * stride[1] + rw + w_block_id = w_contig // block_W + w_block_offset = w_contig % block_W + + # Construct flat filter input channel indices + rco = rc // filter_Ci + rcio = (rc % filter_Ci) // filter_Cii + rcii = rc % filter_Cii + + # Construct split filter output channel index + ko = k // filter_Ki + ki = k % filter_Ki + + return te.sum( + X_packed[n, h_block_id, w_block_id, h_block_offset, w_block_offset, rc] + * filt_packed[ko, rco, rh, rw, rcio, ki, rcii], + axis=[rh, rw, rc], + ) + + Y = te.compute(output_shape, compute) + s = te.create_schedule(Y.op) + + # Ensure the padding and array packing is performed inline + s[X_pad].compute_inline() + s[X_packed].compute_inline() + + n, ho, wo, hi, wi, k = s[Y].op.axis + rh, rw, rc = s[Y].op.reduce_axis + + rco, rci = s[Y].split(rc, factor=32) + s[Y].reorder(n, rco, wo, ho, k, hi, wi) + Xl = s.cache_read(X_packed, storage_scope, [Y]) + s[Xl].compute_at(s[Y], rco) + + ko, ki = s[Y].split(k, factor=32) + s[Y].reorder(n, rco, wo, ho, ko, hi, wi, ki) + Fl = s.cache_read(filt_packed, storage_scope, [Y]) + s[Fl].compute_at(s[Y], ko) + + binds = {} + if storage_scope and storage_scope != "global": + with tvm.transform.PassContext(): + Xb = tvm.tir.decl_buffer(shape, name="Xb", dtype=dtype, scope=storage_scope) + Yb = tvm.tir.decl_buffer(output_shape, name="Yb", dtype=dtype, scope=storage_scope) + binds = {X: Xb, Y: Yb} + + return (s, [X, filt_packed, Y], binds) + + +class BaseConv2d: + batch = tvm.testing.parameter(1) + in_size = tvm.testing.parameter(8, 56) + in_channel = tvm.testing.parameter(64) + out_channel = tvm.testing.parameter(64) + kernel = tvm.testing.parameter(3) + stride = tvm.testing.parameter(1) + pad = tvm.testing.parameter(1) + dtype = tvm.testing.parameter("float32") + + +class TestConv2dLogical(BaseConv2d): + @tvm.testing.parametrize_targets("llvm") + def test_conv2d(self, shape_nhwc, shape_oihw, kernel, stride, pad, dtype, target): + inputs = [ + np.random.uniform(0, 255, size=shape_nhwc).astype(dtype), + np.random.uniform(0, 255, size=shape_oihw).astype(dtype), + ] + np_filter = inputs[1].transpose(2, 3, 1, 0) + ref_output = testing.conv2d_nhwc_python(inputs[0], np_filter, stride, pad) + output = build_and_run( + inputs, + conv2d_logical, + target, + target, + shape_nhwc=shape_nhwc, + shape_kcrs=shape_oihw, + kernel_size=(kernel, kernel), + stride=(stride, stride), + padding=(pad, pad, pad, pad), + dtype=dtype, + ) + return output, ref_output + + +class TestConv2dPackedFilter(BaseConv2d): + conv2d_impl = tvm.testing.parameter(conv2d_packed_filter, conv2d_packed_filter_nhwhwc) + + @tvm.testing.parametrize_targets("llvm") + def test_conv2d( + self, + conv2d_impl, + shape_nhwc, + shape_oihw, + shape_oihw8i32o4i, + kernel, + stride, + pad, + dtype, + target, + ): + inputs = [ + np.random.uniform(0, 255, size=shape_nhwc).astype(dtype), + np.random.uniform(0, 255, size=shape_oihw8i32o4i).astype(dtype), + ] + np_filter = ( + inputs[1].transpose(0, 5, 1, 4, 6, 2, 3).reshape(shape_oihw).transpose(2, 3, 1, 0) + ) + ref_output = testing.conv2d_nhwc_python(inputs[0], np_filter, stride, pad) + output = build_and_run( + inputs, + conv2d_impl, + target, + target, + shape_nhwc=shape_nhwc, + shape_filter=shape_oihw8i32o4i, + kernel_size=(kernel, kernel), + stride=(stride, stride), + padding=(pad, pad, pad, pad), + dtype=dtype, + ) + return output, ref_output + + +if __name__ == "__main__": + sys.exit(pytest.main(sys.argv)) diff --git a/tests/python/contrib/test_hexagon/test_maxpool2d_blocked.py b/tests/python/contrib/test_hexagon/test_maxpool2d_blocked.py new file mode 100644 index 000000000000..75df6504611e --- /dev/null +++ b/tests/python/contrib/test_hexagon/test_maxpool2d_blocked.py @@ -0,0 +1,151 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import sys + +import tvm +from tvm import te +from tvm import topi +from tvm.topi import testing +from .infrastructure import ceildiv +from .infrastructure import get_packed_layout +from .infrastructure import build_and_run + +import numpy as np +import pytest + +# Blocked layout: NHWC8h8w32c :: [N, H//8, W//8, C//32, 8h, 8w, 32c] +def maxpool2d_logical( + shape_nhwc, + window_shape, + stride, + padding, + dtype, + storage_scope="global", +): + """ + Maxpool2d TE wherein the input activation is defined by its + logical NHWC shape. The packed physical layout for the + activation is nhwc8h8w32c. + """ + + block_shape = 8, 8, 32 + block_H, block_W, block_C = block_shape + shape = get_packed_layout(shape_nhwc, block_shape) + logical_output_shape = ( + shape_nhwc[0], + (shape_nhwc[1] - window_shape[0] + padding[0] + padding[1]) // stride[0] + 1, + (shape_nhwc[2] - window_shape[1] + padding[2] + padding[3]) // stride[0] + 1, + shape_nhwc[3], + ) + output_shape = get_packed_layout(logical_output_shape, block_shape) + + N, H, W, C = shape_nhwc + X = te.placeholder(shape_nhwc, dtype=dtype) + + # Combination of padding required by maxpool operator and padding to evenly divisible + # number of blocks. Note that this padding should be inlined in the schedule so + # as to avoid input copying. + pad_h = (block_H - ((H + padding[1]) % block_H)) % block_H + pad_w = (block_W - ((W + padding[3]) % block_W)) % block_W + X_pad = topi.nn.pad(X, [0, padding[0], padding[2], 0], [0, pad_h, pad_w, 0], pad_value=0) + + # Calculate packed layout + X_packed = te.compute( + shape, + lambda n, ho, wo, co, hi, wi, ci: X_pad[ + n, ho * block_H + hi, wo * block_W + wi, co * block_C + ci + ], + ) + + rh = te.reduce_axis((0, window_shape[0]), name="rh") + rw = te.reduce_axis((0, window_shape[1]), name="rw") + + def compute(n, ho, wo, co, hi, wi, ci): + # Construct blockized strided maxpool height indices + h = ho * block_H + hi + h_contig = h * stride[0] + rh + h_block_id = h_contig // block_H + h_block_offset = h_contig % block_H + + # Construct blockized strided maxpool width indices + w = wo * block_W + wi + w_contig = w * stride[1] + rw + w_block_id = w_contig // block_W + w_block_offset = w_contig % block_W + + return te.max( + X_packed[n, h_block_id, w_block_id, co, h_block_offset, w_block_offset, ci], + axis=[rh, rw], + ) + + Y = te.compute(output_shape, compute) + s = te.create_schedule(Y.op) + + # Ensure the padding and array packing is performed inline + s[X_pad].compute_inline() + s[X_packed].compute_inline() + + binds = {} + if storage_scope and storage_scope != "global": + with tvm.transform.PassContext(): + Xb = tvm.tir.decl_buffer(shape, name="Xb", dtype=dtype, scope=storage_scope) + Yb = tvm.tir.decl_buffer(output_shape, name="Yb", dtype=dtype, scope=storage_scope) + binds = {X: Xb, Y: Yb} + + return (s, [X, Y], binds) + + +class BaseMaxPooling: + batch = tvm.testing.parameter(1) + in_size = tvm.testing.parameter(8, 112) + in_channel = tvm.testing.parameter(64) + window_size = tvm.testing.parameter(3) + stride = tvm.testing.parameter(2) + pad = tvm.testing.parameter(1) + dtype = tvm.testing.parameter("float32") + + +class TestMaxPooling(BaseMaxPooling): + @tvm.testing.parametrize_targets("llvm") + def test_maxpool(self, shape_nhwc, window_size, stride, pad, dtype, target): + inputs = [np.random.uniform(0, 255, size=shape_nhwc).astype(dtype)] + ref_output = testing.poolnd_python( + inputs[0], + (window_size, window_size), + strides=(stride, stride), + dilation=(1, 1), + padding_before=(pad, pad), + padding_after=(pad, pad), + pool_type="max", + ) + output = build_and_run( + inputs, + maxpool2d_logical, + target, + target, + shape_nhwc, + window_shape=(window_size, window_size), + stride=(stride, stride), + padding=(pad, pad, pad, pad), + dtype=dtype, + ) + return output, ref_output + + +if __name__ == "__main__": + sys.exit(pytest.main(sys.argv)) From ae5b095c4a887a8c3b7bfe2e3a2611f1e0c44453 Mon Sep 17 00:00:00 2001 From: Chris Sullivan Date: Thu, 9 Sep 2021 16:36:33 -0700 Subject: [PATCH 2/2] Restructure based on review comments --- tests/python/contrib/test_hexagon/conftest.py | 13 +- .../contrib/test_hexagon/infrastructure.py | 47 ++++++-- .../test_hexagon/test_conv2d_blocked.py | 111 +++++++++--------- .../test_hexagon/test_maxpool2d_blocked.py | 16 ++- 4 files changed, 106 insertions(+), 81 deletions(-) diff --git a/tests/python/contrib/test_hexagon/conftest.py b/tests/python/contrib/test_hexagon/conftest.py index 8213c33123d8..0329328de3df 100644 --- a/tests/python/contrib/test_hexagon/conftest.py +++ b/tests/python/contrib/test_hexagon/conftest.py @@ -19,7 +19,7 @@ values from testing parameters """ import tvm -from .infrastructure import ceildiv +from .infrastructure import get_packed_filter_layout @tvm.testing.fixture @@ -34,13 +34,4 @@ def shape_oihw(out_channel, in_channel, kernel): @tvm.testing.fixture def shape_oihw8i32o4i(out_channel, in_channel, kernel): - out_factor, in_factor, in_second_factor = 32, 32, 4 - return ( - int(ceildiv(out_channel, out_factor)), - int(ceildiv(in_channel, in_factor)), - kernel, - kernel, - in_factor // in_second_factor, - out_factor, - in_second_factor, - ) + return get_packed_filter_layout(out_channel, in_channel, kernel, kernel) diff --git a/tests/python/contrib/test_hexagon/infrastructure.py b/tests/python/contrib/test_hexagon/infrastructure.py index 9c7ba5b1b6a3..193a8630c3d2 100644 --- a/tests/python/contrib/test_hexagon/infrastructure.py +++ b/tests/python/contrib/test_hexagon/infrastructure.py @@ -25,23 +25,37 @@ def ceildiv(o, d): return tvm.tir.floordiv(o + d - 1, d) -def get_packed_layout(logical_shape_nhwc, block_shape, packed_C=True): - shape = [logical_shape_nhwc[0]] +def get_packed_activation_layout(shape_nhwc, block_shape, packed_C=True): + assert len(shape_nhwc) == 4 + shape = [shape_nhwc[0]] off_h, off_w, off_c = block_shape - shape.append(ceildiv(logical_shape_nhwc[1], off_h)) - shape.append(ceildiv(logical_shape_nhwc[2], off_w)) + shape.append(ceildiv(shape_nhwc[1], off_h)) + shape.append(ceildiv(shape_nhwc[2], off_w)) if packed_C: - shape.append(ceildiv(logical_shape_nhwc[3], off_c)) + shape.append(ceildiv(shape_nhwc[3], off_c)) shape.extend(block_shape) else: - shape.extend([off_h, off_w, logical_shape_nhwc[-1]]) + shape.extend([off_h, off_w, shape_nhwc[3]]) return shape +def get_packed_filter_layout(out_channel, in_channel, kernel_h, kernel_w): + out_factor, in_first_factor, in_second_factor = 32, 32, 4 + return ( + int(ceildiv(out_channel, out_factor)), + int(ceildiv(in_channel, in_first_factor)), + kernel_h, + kernel_w, + in_first_factor // in_second_factor, + out_factor, + in_second_factor, + ) + + def build_and_run(inputs, func, target, target_host, *args, **kwargs): - s, placeholders, binds = func(*args, **kwargs) + schedule, placeholders, binds = func(*args, **kwargs) - func = tvm.build(s, placeholders, target=target, target_host=target_host, binds=binds) + func = tvm.build(schedule, placeholders, target=target, target_host=target_host, binds=binds) dev = tvm.device(target) tensors = [] for tensor in inputs: @@ -55,3 +69,20 @@ def build_and_run(inputs, func, target, target_host, *args, **kwargs): func(*tensors) return tensors[-1].asnumpy() + + +def get_block_shape(): + return 8, 8, 32 + + +def get_conv2d_nhwc_shape(shape_nhwc, kernel_size, strides, padding, dilation, out_channels): + assert len(shape_nhwc) == 4 + kernel = [] + kernel.append((kernel_size[0] - 1) * dilation[0] + 1) + kernel.append((kernel_size[1] - 1) * dilation[1] + 1) + return ( + shape_nhwc[0], + (shape_nhwc[1] - kernel[0] + padding[0] + padding[1]) // strides[0] + 1, + (shape_nhwc[2] - kernel[1] + padding[2] + padding[3]) // strides[1] + 1, + out_channels, + ) diff --git a/tests/python/contrib/test_hexagon/test_conv2d_blocked.py b/tests/python/contrib/test_hexagon/test_conv2d_blocked.py index a5a766b5b5df..e0b7fb20ab8e 100644 --- a/tests/python/contrib/test_hexagon/test_conv2d_blocked.py +++ b/tests/python/contrib/test_hexagon/test_conv2d_blocked.py @@ -21,9 +21,14 @@ from tvm import te from tvm import topi from tvm.topi import testing -from .infrastructure import ceildiv -from .infrastructure import get_packed_layout -from .infrastructure import build_and_run +from .infrastructure import ( + ceildiv, + build_and_run, + get_block_shape, + get_conv2d_nhwc_shape, + get_packed_filter_layout, + get_packed_activation_layout, +) import numpy as np import pytest @@ -31,7 +36,7 @@ def conv2d_logical( shape_nhwc, - shape_kcrs, + shape_oihw, kernel_size, stride, padding, @@ -45,18 +50,15 @@ def conv2d_logical( Activation: nhwc8h8w32c Filter: oihw8i32o4i """ - assert kernel_size == tuple(shape_kcrs[2:]) + assert kernel_size == tuple(shape_oihw[2:]) - block_shape = 8, 8, 32 + block_shape = get_block_shape() block_H, block_W, block_C = block_shape - shape = get_packed_layout(shape_nhwc, block_shape) - logical_output_shape = ( - shape_nhwc[0], - (shape_nhwc[1] - kernel_size[0] + padding[0] + padding[1]) // stride[0] + 1, - (shape_nhwc[2] - kernel_size[1] + padding[2] + padding[3]) // stride[0] + 1, - shape_nhwc[3], + shape = get_packed_activation_layout(shape_nhwc, block_shape) + logical_output_shape = get_conv2d_nhwc_shape( + shape_nhwc, kernel_size, stride, padding, [1, 1], shape_oihw[0] ) - output_shape = get_packed_layout(logical_output_shape, block_shape) + output_shape = get_packed_activation_layout(logical_output_shape, block_shape) N, H, W, C = shape_nhwc X = te.placeholder(shape_nhwc, dtype=dtype) @@ -75,18 +77,10 @@ def conv2d_logical( ) # Filter shape using KCRS (OIHW) notation - K, C, R, S = shape_kcrs + K, C, R, S = shape_oihw filter_Ki, filter_Ci, filter_Cii = 32, 32, 4 - shape_filter = [ - ceildiv(K, filter_Ki), - ceildiv(C, filter_Ci), - R, - S, - filter_Ci // filter_Cii, - filter_Ki, - filter_Cii, - ] - filt = te.placeholder(shape_kcrs, dtype=dtype) + shape_filter = get_packed_filter_layout(K, C, R, S) + filt = te.placeholder(shape_oihw, dtype=dtype) # Channel padding to multiples of 32 pad_c = (filter_Ci - (C % filter_Ci)) % filter_Ci pad_k = (filter_Ki - (K % filter_Ki)) % filter_Ki @@ -163,7 +157,7 @@ def compute(n, ho, wo, ko, hi, wi, ki): def conv2d_packed_filter( shape_nhwc, - shape_filter, + shape_oihw8i32o4i, kernel_size, stride, padding, @@ -176,18 +170,21 @@ def conv2d_packed_filter( packed layout oihw8i32o4i. The physical packed layout used for the activation is: nhwc8h8w32c """ - assert kernel_size == tuple(shape_filter[2:4]) + assert kernel_size == tuple(shape_oihw8i32o4i[2:4]) - block_shape = 8, 8, 32 + block_shape = get_block_shape() block_H, block_W, block_C = block_shape - shape = get_packed_layout(shape_nhwc, block_shape) - logical_output_shape = ( - shape_nhwc[0], - (shape_nhwc[1] - kernel_size[0] + padding[0] + padding[1]) // stride[0] + 1, - (shape_nhwc[2] - kernel_size[1] + padding[2] + padding[3]) // stride[0] + 1, - shape_nhwc[3], + shape = get_packed_activation_layout(shape_nhwc, block_shape) + logical_output_shape = get_conv2d_nhwc_shape( + shape_nhwc, + kernel_size, + stride, + padding, + [1, 1], + shape_oihw8i32o4i[0] * shape_oihw8i32o4i[5], ) - output_shape = get_packed_layout(logical_output_shape, block_shape) + + output_shape = get_packed_activation_layout(logical_output_shape, block_shape) N, H, W, C = shape_nhwc X = te.placeholder(shape_nhwc, dtype=dtype) @@ -199,7 +196,7 @@ def conv2d_packed_filter( X_pad = topi.nn.pad(X, [0, padding[0], padding[2], 0], [0, pad_h, pad_w, 0], pad_value=0) # Calculate packed layout - packed_shape = get_packed_layout(X_pad.shape, block_shape) + packed_shape = get_packed_activation_layout(X_pad.shape, block_shape) X_packed = te.compute( packed_shape, @@ -210,11 +207,11 @@ def conv2d_packed_filter( # Filter shape using KCRS (OIHW) notation filter_Ki, filter_Ci, filter_Cii = 32, 32, 4 - assert shape_filter[-1] == filter_Cii - assert shape_filter[-2] == filter_Ki - assert shape_filter[-3] == filter_Ci // filter_Cii + assert shape_oihw8i32o4i[-1] == filter_Cii + assert shape_oihw8i32o4i[-2] == filter_Ki + assert shape_oihw8i32o4i[-3] == filter_Ci // filter_Cii - filt_packed = te.placeholder(shape_filter, dtype=dtype) + filt_packed = te.placeholder(shape_oihw8i32o4i, dtype=dtype) rh = te.reduce_axis((0, kernel_size[0]), name="rh") rw = te.reduce_axis((0, kernel_size[1]), name="rw") @@ -285,7 +282,7 @@ def compute(n, ho, wo, ko, hi, wi, ki): def conv2d_packed_filter_nhwhwc( shape_nhwc, - shape_filter, + shape_oihw8i32o4i, kernel_size, stride, padding, @@ -299,18 +296,20 @@ def conv2d_packed_filter_nhwhwc( for the activation is: nhw8h8wc """ - assert kernel_size == tuple(shape_filter[2:4]) + assert kernel_size == tuple(shape_oihw8i32o4i[2:4]) - block_shape = 8, 8, 32 + block_shape = get_block_shape() block_H, block_W, _ = block_shape - shape = get_packed_layout(shape_nhwc, block_shape, packed_C=False) - logical_output_shape = ( - shape_nhwc[0], - (shape_nhwc[1] - kernel_size[0] + padding[0] + padding[1]) // stride[0] + 1, - (shape_nhwc[2] - kernel_size[1] + padding[2] + padding[3]) // stride[0] + 1, - shape_nhwc[3], + shape = get_packed_activation_layout(shape_nhwc, block_shape, packed_C=False) + logical_output_shape = get_conv2d_nhwc_shape( + shape_nhwc, + kernel_size, + stride, + padding, + [1, 1], + shape_oihw8i32o4i[0] * shape_oihw8i32o4i[5], ) - output_shape = get_packed_layout(logical_output_shape, block_shape, packed_C=False) + output_shape = get_packed_activation_layout(logical_output_shape, block_shape, packed_C=False) N, H, W, C = shape_nhwc X = te.placeholder(shape_nhwc, dtype=dtype) @@ -321,18 +320,18 @@ def conv2d_packed_filter_nhwhwc( pad_w = (block_W - ((W + padding[3]) % block_W)) % block_W X_pad = topi.nn.pad(X, [0, padding[0], padding[2], 0], [0, pad_h, pad_w, 0], pad_value=0) # Calculate packed layout - packed_shape = get_packed_layout(X_pad.shape, block_shape, packed_C=False) + packed_shape = get_packed_activation_layout(X_pad.shape, block_shape, packed_C=False) X_packed = te.compute( packed_shape, lambda n, ho, wo, hi, wi, c: X_pad[n, ho * block_H + hi, wo * block_W + wi, c] ) # Filter shape using KCRS (OIHW) notation filter_Ki, filter_Ci, filter_Cii = 32, 32, 4 - assert shape_filter[-1] == filter_Cii - assert shape_filter[-2] == filter_Ki - assert shape_filter[-3] == filter_Ci // filter_Cii + assert shape_oihw8i32o4i[-1] == filter_Cii + assert shape_oihw8i32o4i[-2] == filter_Ki + assert shape_oihw8i32o4i[-3] == filter_Ci // filter_Cii - filt_packed = te.placeholder(shape_filter, dtype=dtype) + filt_packed = te.placeholder(shape_oihw8i32o4i, dtype=dtype) rh = te.reduce_axis((0, kernel_size[0]), name="rh") rw = te.reduce_axis((0, kernel_size[1]), name="rw") @@ -422,7 +421,7 @@ def test_conv2d(self, shape_nhwc, shape_oihw, kernel, stride, pad, dtype, target target, target, shape_nhwc=shape_nhwc, - shape_kcrs=shape_oihw, + shape_oihw=shape_oihw, kernel_size=(kernel, kernel), stride=(stride, stride), padding=(pad, pad, pad, pad), @@ -461,7 +460,7 @@ def test_conv2d( target, target, shape_nhwc=shape_nhwc, - shape_filter=shape_oihw8i32o4i, + shape_oihw8i32o4i=shape_oihw8i32o4i, kernel_size=(kernel, kernel), stride=(stride, stride), padding=(pad, pad, pad, pad), diff --git a/tests/python/contrib/test_hexagon/test_maxpool2d_blocked.py b/tests/python/contrib/test_hexagon/test_maxpool2d_blocked.py index 75df6504611e..67af8d87f708 100644 --- a/tests/python/contrib/test_hexagon/test_maxpool2d_blocked.py +++ b/tests/python/contrib/test_hexagon/test_maxpool2d_blocked.py @@ -21,9 +21,13 @@ from tvm import te from tvm import topi from tvm.topi import testing -from .infrastructure import ceildiv -from .infrastructure import get_packed_layout -from .infrastructure import build_and_run +from .infrastructure import ( + ceildiv, + build_and_run, + get_block_shape, + get_packed_filter_layout, + get_packed_activation_layout, +) import numpy as np import pytest @@ -43,16 +47,16 @@ def maxpool2d_logical( activation is nhwc8h8w32c. """ - block_shape = 8, 8, 32 + block_shape = get_block_shape() block_H, block_W, block_C = block_shape - shape = get_packed_layout(shape_nhwc, block_shape) + shape = get_packed_activation_layout(shape_nhwc, block_shape) logical_output_shape = ( shape_nhwc[0], (shape_nhwc[1] - window_shape[0] + padding[0] + padding[1]) // stride[0] + 1, (shape_nhwc[2] - window_shape[1] + padding[2] + padding[3]) // stride[0] + 1, shape_nhwc[3], ) - output_shape = get_packed_layout(logical_output_shape, block_shape) + output_shape = get_packed_activation_layout(logical_output_shape, block_shape) N, H, W, C = shape_nhwc X = te.placeholder(shape_nhwc, dtype=dtype)