diff --git a/python/tvm/topi/arm_cpu/injective.py b/python/tvm/topi/arm_cpu/injective.py index 7c3ea5261f5e..5c63e5a513db 100644 --- a/python/tvm/topi/arm_cpu/injective.py +++ b/python/tvm/topi/arm_cpu/injective.py @@ -68,7 +68,8 @@ def schedule_injective(outs): if list(s[x].op.axis): # do not vectorize for broadcast - (io, ii) = s[x].split(list(s[x].op.axis)[-1], 16 // np.dtype(x.dtype).itemsize) + dtype = "uint16" if x.dtype == "bfloat16" else x.dtype + (io, ii) = s[x].split(list(s[x].op.axis)[-1], 16 // np.dtype(dtype).itemsize) s[x].vectorize(ii) tvm.te.schedule.AutoInlineInjective(s) diff --git a/python/tvm/topi/nn/winograd_util.py b/python/tvm/topi/nn/winograd_util.py index c0f7097a6315..4bee06fcfaf8 100644 --- a/python/tvm/topi/nn/winograd_util.py +++ b/python/tvm/topi/nn/winograd_util.py @@ -169,6 +169,7 @@ def winograd_transform_matrices(tile_size, kernel_size, out_dtype): intp_pts = _interpolation_points(degree) A_data, B_data, G_data = _cook_toom_convolution(intp_pts, tile_size, kernel_size) + out_dtype = "uint16" if out_dtype == "bfloat16" else out_dtype return ( const_matrix(A_data.astype(out_dtype), "A"), const_matrix(B_data.astype(out_dtype), "B"), diff --git a/tests/python/integration/test_arm_aprofile.py b/tests/python/integration/test_arm_aprofile.py new file mode 100644 index 000000000000..c38217a1b1c0 --- /dev/null +++ b/tests/python/integration/test_arm_aprofile.py @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Tests for Arm(R) A-Profile Architecture.""" +import os +import numpy as np +import pytest +import tvm +import tvm.testing +from tvm import relay +from tvm.relay.transform import ToMixedPrecision, FoldConstant +from tvm.relay.build_module import bind_params_by_name + + +def get_mattr(dtype): + mattr = "+v8.2a,+neon" + if dtype == "float16": + mattr += ",+fullfp16" + elif dtype == "bfloat16": + mattr += ",+bf16" + return mattr + + +@tvm.testing.skip_if_32bit(reason="skipping test for i386.") +@pytest.mark.parametrize("dtype", ["float32", "float16", "bfloat16"]) +def test_conv2d(dtype): + """Test if Conv2d cross compiles with TVM schedules.""" + dtype = "float32" + ishape = [1, 28, 28, 3] # NHWC + kernel_size = (3, 3) + wshape = (kernel_size[0], kernel_size[1], ishape[-1], 2) # HWIO + weight_data = np.random.uniform(-128, 127, wshape).astype(dtype) + invar = relay.var("data", relay.TensorType(ishape, dtype)) + weight = relay.const(weight_data, dtype) + out = relay.op.nn.conv2d( + invar, + weight, + kernel_size=kernel_size, + strides=(1, 1), + padding=(0, 0), + dilation=(1, 1), + data_layout="NHWC", + kernel_layout="HWIO", + out_dtype=dtype, + out_layout="NHWC", + ) + mod = tvm.IRModule.from_expr(relay.Function([invar], out)) + params = {} + + prefixed_network_name = dtype + ".conv2d" + lib_path = os.getcwd() + "/" + prefixed_network_name + ".mod.so" + target = "llvm -mtriple=aarch64-linux-gnu -mattr=" + get_mattr(dtype) + + mod["main"] = bind_params_by_name(mod["main"], params) + if dtype in ["float16", "bfloat16"]: + mod = ToMixedPrecision(dtype)(mod) + mod = FoldConstant()(mod) + + with tvm.transform.PassContext(opt_level=3): + lib = tvm.relay.build(mod, target=target, params=params) + lib.export_library(lib_path, cc="aarch64-linux-gnu-gcc")