From f2ef782239822088086a9f5f6c3ef88cd8198f7d Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Fri, 19 Mar 2021 14:18:57 -0700 Subject: [PATCH 01/17] make cumbinop, refactor cumsum, add cumprod --- python/tvm/topi/cumprod.py | 67 +++++++++++++++++++++++++++ python/tvm/topi/cumsum.py | 93 ++++++++++++++++++++++++-------------- 2 files changed, 126 insertions(+), 34 deletions(-) create mode 100644 python/tvm/topi/cumprod.py diff --git a/python/tvm/topi/cumprod.py b/python/tvm/topi/cumprod.py new file mode 100644 index 000000000000..b3ce51265af4 --- /dev/null +++ b/python/tvm/topi/cumprod.py @@ -0,0 +1,67 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name +"""Cumprod operator""" +from typing import Callable, Optional + +import tvm + +from ..tir import generic +from .cumsum import cumbinop + + +def cumprod( + data: tvm.te.Tensor, + axis: Optional[int] = None, + dtype: Optional[int] = None, + exclusive: Optional[bool] = None, +): + """Numpy style cumprod op. Return the cumulative sum of the elements along a given axis. + + Parameters + ---------- + data : tvm.te.Tensor + The input data to the operator. + + axis : int, optional + Axis along which the cumulative product is computed. The default (None) is to compute + the cumsum over the flattened array. + + dtype : string, optional + Type of the returned array and of the accumulator in which the elements are summed. + If dtype is not specified, it defaults to the dtype of data. + + exclusive : bool, optional + If True, will return exclusive sum in which the first element is not + included. In other terms, if True, the j-th output element would be + the product of the first (j-1) elements. Otherwise, it would be the product of + the first j elements. + + Returns + ------- + result : tvm.te.Tensor + The result has the same size as data, and the same shape as data if axis is not None. + If axis is None, the result is a 1-d array. + """ + cumbinop( + data=data, + axis=axis, + dtype=dtype, + exclusive=exclusive, + binop=generic.multiply, + identity_value=1, + ) diff --git a/python/tvm/topi/cumsum.py b/python/tvm/topi/cumsum.py index 2013a352874d..0b064021c36e 100644 --- a/python/tvm/topi/cumsum.py +++ b/python/tvm/topi/cumsum.py @@ -16,39 +16,26 @@ # under the License. # pylint: disable=invalid-name """Cumsum operator""" -from ..tir import decl_buffer, ir_builder -from ..te import extern -from .utils import prod, get_const_int -from .math import cast - - -def cumsum(data, axis=None, dtype=None, exclusive=None): - """Numpy style cumsum op. Return the cumulative sum of the elements along a given axis. - - Parameters - ---------- - data : tvm.te.Tensor - The input data to the operator. +from typing import Callable, Optional - axis : int, optional - Axis along which the cumulative sum is computed. The default (None) is to compute - the cumsum over the flattened array. +import tvm - dtype : string, optional - Type of the returned array and of the accumulator in which the elements are summed. - If dtype is not specified, it defaults to the dtype of data. +from ..te import extern +from ..tir import decl_buffer, generic, ir_builder +from .math import cast +from .utils import get_const_int, prod - exclusive : int, optional - If set to 1 will return exclusive sum in which the first element is not - included. In other terms, if set to 1, the j-th output element would be - the sum of the first (j-1) elements. Otherwise, it would be the sum of - the first j elements. - Returns - ------- - result : tvm.te.Tensor - The result has the same size as data, and the same shape as data if axis is not None. - If axis is None, the result is a 1-d array. +def cumbinop( + data: tvm.te.Tensor, + axis: Optional[int] = None, + dtype: Optional[str] = None, + exclusive: Optional[bool] = None, + binop: Callable[["tvm.Expr", "tvm.Expr"], "tvm.Expr"] = generic.add, + identity_value: "tvm.Expr" = 0, +) -> tvm.te.Tensor: + """ + TODO """ if dtype is None or dtype == "": dtype = data.dtype @@ -82,7 +69,7 @@ def maybe_cast(x): axis_mul_after *= value if exclusive is None: - exclusive = 0 + exclusive = False def gen_ir(data_buf, out_buf): ib = ir_builder.create() @@ -93,18 +80,18 @@ def gen_ir(data_buf, out_buf): i = fused // axis_mul_after j = fused % axis_mul_after base_idx = i * cumsum_axis_len * axis_mul_after + j - if exclusive == 0: + if exclusive: out_buf[base_idx] = maybe_cast(data_buf[base_idx]) else: - out_buf[base_idx] = cast(0, dtype) + out_buf[base_idx] = cast(identity_value, dtype) with ib.for_range(0, cumsum_axis_len - 1, "_k") as _k: k = _k + 1 cur_idx = base_idx + k * axis_mul_after prev_idx = base_idx + (k - 1) * axis_mul_after if exclusive == 0: - out_buf[cur_idx] = out_buf[prev_idx] + maybe_cast(data_buf[cur_idx]) + out_buf[cur_idx] = binop(out_buf[prev_idx], maybe_cast(data_buf[cur_idx])) else: - out_buf[cur_idx] = out_buf[prev_idx] + maybe_cast(data_buf[prev_idx]) + out_buf[cur_idx] = binop(out_buf[prev_idx], maybe_cast(data_buf[prev_idx])) return ib.get() @@ -119,3 +106,41 @@ def gen_ir(data_buf, out_buf): name="cumsum_generic", tag="cumsum_generic", ) + + +def cumsum( + data: tvm.te.Tensor, + axis: Optional[int] = None, + dtype: Optional[int] = None, + exclusive: Optional[bool] = None, +): + """Numpy style cumsum op. Return the cumulative sum of the elements along a given axis. + + Parameters + ---------- + data : tvm.te.Tensor + The input data to the operator. + + axis : int, optional + Axis along which the cumulative sum is computed. The default (None) is to compute + the cumsum over the flattened array. + + dtype : string, optional + Type of the returned array and of the accumulator in which the elements are summed. + If dtype is not specified, it defaults to the dtype of data. + + exclusive : bool, optional + If True, will return exclusive sum in which the first element is not + included. In other terms, if True, the j-th output element would be + the sum of the first (j-1) elements. Otherwise, it would be the sum of + the first j elements. + + Returns + ------- + result : tvm.te.Tensor + The result has the same size as data, and the same shape as data if axis is not None. + If axis is None, the result is a 1-d array. + """ + cumbinop( + data=data, axis=axis, dtype=dtype, exclusive=exclusive, binop=generic.add, identity_value=0 + ) From 2dc85a0bd7109bbe49695a04ec5106e90508633c Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Fri, 19 Mar 2021 17:28:28 -0700 Subject: [PATCH 02/17] cumsum exclusive test --- python/tvm/topi/cumsum.py | 8 +-- tests/python/topi/python/test_topi_cumsum.py | 51 +++++++++++++++----- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/python/tvm/topi/cumsum.py b/python/tvm/topi/cumsum.py index 0b064021c36e..271bb151372e 100644 --- a/python/tvm/topi/cumsum.py +++ b/python/tvm/topi/cumsum.py @@ -81,9 +81,9 @@ def gen_ir(data_buf, out_buf): j = fused % axis_mul_after base_idx = i * cumsum_axis_len * axis_mul_after + j if exclusive: - out_buf[base_idx] = maybe_cast(data_buf[base_idx]) - else: out_buf[base_idx] = cast(identity_value, dtype) + else: + out_buf[base_idx] = maybe_cast(data_buf[base_idx]) with ib.for_range(0, cumsum_axis_len - 1, "_k") as _k: k = _k + 1 cur_idx = base_idx + k * axis_mul_after @@ -113,7 +113,7 @@ def cumsum( axis: Optional[int] = None, dtype: Optional[int] = None, exclusive: Optional[bool] = None, -): +) -> tvm.te.Tensor: """Numpy style cumsum op. Return the cumulative sum of the elements along a given axis. Parameters @@ -141,6 +141,6 @@ def cumsum( The result has the same size as data, and the same shape as data if axis is not None. If axis is None, the result is a 1-d array. """ - cumbinop( + return cumbinop( data=data, axis=axis, dtype=dtype, exclusive=exclusive, binop=generic.add, identity_value=0 ) diff --git a/tests/python/topi/python/test_topi_cumsum.py b/tests/python/topi/python/test_topi_cumsum.py index cfe5130643c5..4afe1f3926f0 100644 --- a/tests/python/topi/python/test_topi_cumsum.py +++ b/tests/python/topi/python/test_topi_cumsum.py @@ -17,19 +17,34 @@ import numpy as np import tvm import tvm.testing -from tvm import topi import tvm.topi.testing +from tvm import topi @tvm.testing.parametrize_targets def test_cumsum(ctx, target): - def check_cumsum(np_ref, data, axis=None, dtype=None): + def check_cumsum(np_ref, data, axis=None, dtype=None, exclusive=False): implementations = { - "generic": (lambda x: topi.cumsum(x, axis, dtype), topi.generic.schedule_extern), - "cuda": (lambda x: topi.cuda.cumsum(x, axis, dtype), topi.cuda.schedule_scan), - "nvptx": (lambda x: topi.cuda.cumsum(x, axis, dtype), topi.cuda.schedule_scan), - "vulkan": (lambda x: topi.cuda.cumsum(x, axis, dtype), topi.cuda.schedule_scan), - "metal": (lambda x: topi.cuda.cumsum(x, axis, dtype), topi.cuda.schedule_scan), + "generic": ( + lambda x: topi.cumsum(x, axis, dtype, exclusive=exclusive), + topi.generic.schedule_extern, + ), + "cuda": ( + lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive), + topi.cuda.schedule_scan, + ), + "nvptx": ( + lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive), + topi.cuda.schedule_scan, + ), + "vulkan": ( + lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive), + topi.cuda.schedule_scan, + ), + "metal": ( + lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive), + topi.cuda.schedule_scan, + ), } fcompute, fschedule = tvm.topi.testing.dispatch(target, implementations) tvm.topi.testing.compare_numpy_tvm([data], np_ref, target, ctx, fcompute, fschedule) @@ -70,10 +85,24 @@ def check_cumsum(np_ref, data, axis=None, dtype=None): data = np.random.randint(1 << 30, (1 << 31) - 1, size=(100)).astype(in_dtype) check_cumsum(np.cumsum(data), data, dtype="int64") + data = np.random.randint(-100, 100, size=(100, 100)).astype("int64") + + expected_result = np.roll(np.cumsum(data), 1) + expected_result[0] = 0 + check_cumsum(expected_result, data, dtype="int64", exclusive=True) + + expected_result = np.roll(np.cumsum(data, axis=0, dtype=in_dtype), 1, axis=0) + expected_result[0, :] = 0 + check_cumsum(expected_result, data, axis=0, exclusive=True) + + expected_result = np.roll(np.cumsum(data, axis=1, dtype=in_dtype), 1, axis=1) + expected_result[:, 0] = 0 + check_cumsum(np.cumsum(data, axis=1, dtype=in_dtype), data, axis=1) + if __name__ == "__main__": test_cumsum(tvm.context("cpu"), tvm.target.Target("llvm")) - test_cumsum(tvm.context("cuda"), tvm.target.Target("cuda")) - test_cumsum(tvm.context("nvptx"), tvm.target.Target("nvptx")) - test_cumsum(tvm.context("vulkan"), tvm.target.Target("vulkan")) - test_cumsum(tvm.context("metal"), tvm.target.Target("metal")) + # test_cumsum(tvm.context("cuda"), tvm.target.Target("cuda")) + # test_cumsum(tvm.context("nvptx"), tvm.target.Target("nvptx")) + # test_cumsum(tvm.context("vulkan"), tvm.target.Target("vulkan")) + # test_cumsum(tvm.context("metal"), tvm.target.Target("metal")) From 94061501629dbd28ea18edd3ef2c457871c08efc Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Fri, 19 Mar 2021 17:35:32 -0700 Subject: [PATCH 03/17] Add cumprod + flesh out cumsum tests add cumprod and tests reinstate tests rethink --- python/tvm/topi/__init__.py | 1 + python/tvm/topi/cumprod.py | 7 +- python/tvm/topi/cumsum.py | 17 ++- tests/python/topi/python/test_topi_cumprod.py | 108 ++++++++++++++++++ tests/python/topi/python/test_topi_cumsum.py | 8 +- 5 files changed, 129 insertions(+), 12 deletions(-) create mode 100644 tests/python/topi/python/test_topi_cumprod.py diff --git a/python/tvm/topi/__init__.py b/python/tvm/topi/__init__.py index c196b33cf880..16a109f908be 100644 --- a/python/tvm/topi/__init__.py +++ b/python/tvm/topi/__init__.py @@ -43,6 +43,7 @@ from .scatter_add import * from .argwhere import * from .cumsum import * +from .cumprod import * from .einsum import * from .unique import * from . import generic diff --git a/python/tvm/topi/cumprod.py b/python/tvm/topi/cumprod.py index b3ce51265af4..3245184fbe61 100644 --- a/python/tvm/topi/cumprod.py +++ b/python/tvm/topi/cumprod.py @@ -57,11 +57,12 @@ def cumprod( The result has the same size as data, and the same shape as data if axis is not None. If axis is None, the result is a 1-d array. """ - cumbinop( + return cumbinop( data=data, + binop=generic.multiply, + identity_value=1, + op_name="cumprod_generic", axis=axis, dtype=dtype, exclusive=exclusive, - binop=generic.multiply, - identity_value=1, ) diff --git a/python/tvm/topi/cumsum.py b/python/tvm/topi/cumsum.py index 271bb151372e..e5e7a7036ae1 100644 --- a/python/tvm/topi/cumsum.py +++ b/python/tvm/topi/cumsum.py @@ -28,11 +28,12 @@ def cumbinop( data: tvm.te.Tensor, + binop: Callable[["tvm.Expr", "tvm.Expr"], "tvm.Expr"], + identity_value: "tvm.Expr", + op_name: str, axis: Optional[int] = None, dtype: Optional[str] = None, exclusive: Optional[bool] = None, - binop: Callable[["tvm.Expr", "tvm.Expr"], "tvm.Expr"] = generic.add, - identity_value: "tvm.Expr" = 0, ) -> tvm.te.Tensor: """ TODO @@ -103,8 +104,8 @@ def gen_ir(data_buf, out_buf): lambda ins, outs: gen_ir(ins[0], outs[0]), dtype=dtype, out_buffers=[out_buf], - name="cumsum_generic", - tag="cumsum_generic", + name=op_name, + tag=op_name, ) @@ -142,5 +143,11 @@ def cumsum( If axis is None, the result is a 1-d array. """ return cumbinop( - data=data, axis=axis, dtype=dtype, exclusive=exclusive, binop=generic.add, identity_value=0 + data=data, + binop=generic.add, + identity_value=0, + op_name="cumsum_generic", + axis=axis, + dtype=dtype, + exclusive=exclusive, ) diff --git a/tests/python/topi/python/test_topi_cumprod.py b/tests/python/topi/python/test_topi_cumprod.py new file mode 100644 index 000000000000..0c75308ddcea --- /dev/null +++ b/tests/python/topi/python/test_topi_cumprod.py @@ -0,0 +1,108 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import numpy as np +import tvm +import tvm.testing +import tvm.topi.testing +from tvm import topi + + +@tvm.testing.parametrize_targets +def test_cumprod(ctx, target): + def check_cumprod(np_ref, data, axis=None, dtype=None, exclusive=False): + implementations = { + "generic": ( + lambda x: topi.cumprod(x, axis, dtype, exclusive=exclusive), + topi.generic.schedule_extern, + ), + "cuda": ( + lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive), + topi.cuda.schedule_scan, + ), + "nvptx": ( + lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive), + topi.cuda.schedule_scan, + ), + "vulkan": ( + lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive), + topi.cuda.schedule_scan, + ), + "metal": ( + lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive), + topi.cuda.schedule_scan, + ), + } + fcompute, fschedule = tvm.topi.testing.dispatch(target, implementations) + tvm.topi.testing.compare_numpy_tvm([data], np_ref, target, ctx, fcompute, fschedule) + + data = np.array([2, 3, 0]) + check_cumprod(np.cumprod(data), data) + + data = np.random.rand(10) > 0.5 + data = data.astype(np.int32) + check_cumprod(np.cumprod(data, dtype=np.int32), data) + check_cumprod(np.cumprod(data), data, dtype="int64") + + data = np.random.rand(10) > 0.5 + check_cumprod(np.cumprod(data, dtype=np.int32), data, dtype="int32") + + for in_dtype in ["float32", "float64"]: + if target == "metal" and in_dtype == "float64": + # float64 is not supported in metal + continue + data = np.random.randn(10, 10).astype(in_dtype) + check_cumprod(np.cumprod(data), data) + check_cumprod(np.cumprod(data, axis=0), data, axis=0) + check_cumprod(np.cumprod(data, axis=1), data, axis=1) + + data = np.random.randn(10, 5, 10).astype(in_dtype) + check_cumprod(np.cumprod(data), data) + check_cumprod(np.cumprod(data, axis=0), data, axis=0) + check_cumprod(np.cumprod(data, axis=1), data, axis=1) + check_cumprod(np.cumprod(data, axis=-1), data, axis=-1) + + for in_dtype in ["int32", "int64"]: + data = np.random.randint(-100, 100, size=(100, 100)).astype(in_dtype) + check_cumprod(np.cumprod(data, dtype=in_dtype), data) + check_cumprod(np.cumprod(data), data, dtype="int64") + check_cumprod(np.cumprod(data, axis=0, dtype=in_dtype), data, axis=0) + check_cumprod(np.cumprod(data, axis=1, dtype=in_dtype), data, axis=1) + + data = np.random.randint(1 << 30, (1 << 31) - 1, size=(100)).astype(in_dtype) + check_cumprod(np.cumprod(data), data, dtype="int64") + + data = np.random.randint(-100, 100, size=(100, 100)).astype("int64") + + expected_result = np.roll(np.cumprod(data), 1) + expected_result[0] = 1 + check_cumprod(expected_result, data, dtype="int64", exclusive=True) + + expected_result = np.roll(np.cumprod(data, axis=0, dtype=in_dtype), 1, axis=0) + expected_result[0, :] = 1 + check_cumprod(expected_result, data, axis=0, exclusive=True) + + expected_result = np.roll(np.cumprod(data, axis=1, dtype=in_dtype), 1, axis=1) + expected_result[:, 0] = 1 + check_cumprod(np.cumprod(data, axis=1, dtype=in_dtype), data, axis=1) + + +if __name__ == "__main__": + test_cumprod(tvm.context("cpu"), tvm.target.Target("llvm")) + test_cumprod(tvm.context("cuda"), tvm.target.Target("cuda")) + test_cumprod(tvm.context("nvptx"), tvm.target.Target("nvptx")) + test_cumprod(tvm.context("vulkan"), tvm.target.Target("vulkan")) + test_cumprod(tvm.context("metal"), tvm.target.Target("metal")) diff --git a/tests/python/topi/python/test_topi_cumsum.py b/tests/python/topi/python/test_topi_cumsum.py index 4afe1f3926f0..0591609b6f3f 100644 --- a/tests/python/topi/python/test_topi_cumsum.py +++ b/tests/python/topi/python/test_topi_cumsum.py @@ -102,7 +102,7 @@ def check_cumsum(np_ref, data, axis=None, dtype=None, exclusive=False): if __name__ == "__main__": test_cumsum(tvm.context("cpu"), tvm.target.Target("llvm")) - # test_cumsum(tvm.context("cuda"), tvm.target.Target("cuda")) - # test_cumsum(tvm.context("nvptx"), tvm.target.Target("nvptx")) - # test_cumsum(tvm.context("vulkan"), tvm.target.Target("vulkan")) - # test_cumsum(tvm.context("metal"), tvm.target.Target("metal")) + test_cumsum(tvm.context("cuda"), tvm.target.Target("cuda")) + test_cumsum(tvm.context("nvptx"), tvm.target.Target("nvptx")) + test_cumsum(tvm.context("vulkan"), tvm.target.Target("vulkan")) + test_cumsum(tvm.context("metal"), tvm.target.Target("metal")) From b908bdf2c629e9af259a00f0b41f786e548c6e3f Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Mon, 22 Mar 2021 01:37:14 -0700 Subject: [PATCH 04/17] add rudimentary scan implementation --- python/tvm/topi/cuda/scan.py | 87 ++++++++++++++++++++++++++++++------ 1 file changed, 74 insertions(+), 13 deletions(-) diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py index 84ab5dcf9756..76f19e2ef939 100644 --- a/python/tvm/topi/cuda/scan.py +++ b/python/tvm/topi/cuda/scan.py @@ -16,13 +16,16 @@ # under the License. # pylint: disable=invalid-name, too-many-locals, too-many-statements "Scan related operators" +from typing import Callable, Optional + import tvm from tvm import te -from tvm.contrib.thrust import can_use_thrust, can_use_rocthrust -from ..transform import expand_dims, squeeze, transpose, reshape -from ..utils import ceil_div, swap, prod, get_const_int -from ..math import cast +from tvm.contrib.thrust import can_use_rocthrust, can_use_thrust + from .. import tag +from ..math import cast +from ..transform import expand_dims, reshape, squeeze, transpose +from ..utils import ceil_div, get_const_int, prod, swap from .injective import schedule_injective_from_existing @@ -347,7 +350,9 @@ def exclusive_scan( def do_scan(data, output_dtype): target = tvm.target.Target.current() - if target and ( + + # TODO: add support for a prod_scan + if target and binop == tvm.generic.add and ( can_use_thrust(target, "tvm.contrib.thrust.sum_scan") or can_use_rocthrust(target, "tvm.contrib.thrust.sum_scan") ): @@ -486,7 +491,29 @@ def traverse(op): return s -def cumsum(data, axis=None, dtype=None, exclusive=None): +def cumbinop( + data: tvm.te.Tensor, + binop: Callable[["tvm.Expr", "tvm.Expr"], "tvm.Expr"], + axis: Optional[int] = None, + dtype: Optional[str] = None, + exclusive: Optional[bool] = None, +) -> tvm.te.Tensor: + """TODO""" + if axis is None: + axis = 0 + data = reshape(data, (prod(data.shape),)) + axis = get_const_int(axis) + if exclusive is not None and exclusive != 0: + return exclusive_scan(data, axis, output_dtype=dtype, binop=binop) + return inclusive_scan(data, axis, output_dtype=dtype, binop=binop) + + +def cumsum( + data: tvm.te.Tensor, + axis: Optional[int] = None, + dtype: Optional[int] = None, + exclusive: Optional[bool] = None, +) -> tvm.te.Tensor: """Numpy style cumsum op. Return the cumulative sum of the elements along a given axis. Parameters @@ -514,10 +541,44 @@ def cumsum(data, axis=None, dtype=None, exclusive=None): The result has the same size as data, and the same shape as data if axis is not None. If axis is None, the result is a 1-d array. """ - if axis is None: - axis = 0 - data = reshape(data, (prod(data.shape),)) - axis = get_const_int(axis) - if exclusive is not None and exclusive != 0: - return exclusive_scan(data, axis, output_dtype=dtype, binop=tvm.tir.generic.add) - return inclusive_scan(data, axis, output_dtype=dtype, binop=tvm.tir.generic.add) + return cumbinop( + data=data, binop=tvm.tir.generic.add, axis=axis, dtype=dtype, exclusive=exclusive + ) + + +def cumprod( + data: tvm.te.Tensor, + axis: Optional[int] = None, + dtype: Optional[int] = None, + exclusive: Optional[bool] = None, +): + """Numpy style cumprod op. Return the cumulative sum of the elements along a given axis. + + Parameters + ---------- + data : tvm.te.Tensor + The input data to the operator. + + axis : int, optional + Axis along which the cumulative product is computed. The default (None) is to compute + the cumsum over the flattened array. + + dtype : string, optional + Type of the returned array and of the accumulator in which the elements are summed. + If dtype is not specified, it defaults to the dtype of data. + + exclusive : bool, optional + If True, will return exclusive sum in which the first element is not + included. In other terms, if True, the j-th output element would be + the product of the first (j-1) elements. Otherwise, it would be the product of + the first j elements. + + Returns + ------- + result : tvm.te.Tensor + The result has the same size as data, and the same shape as data if axis is not None. + If axis is None, the result is a 1-d array. + """ + return cumbinop( + data=data, binop=tvm.tir.generic.multiply, axis=axis, dtype=dtype, exclusive=exclusive + ) From cdedbb352da26edf3e5516be719bb482a81c2196 Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Mon, 22 Mar 2021 01:39:04 -0700 Subject: [PATCH 05/17] add attributes of cumprod node --- include/tvm/relay/attrs/transform.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index ff344f5e1a85..a0abf906b4e0 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -452,6 +452,20 @@ struct CumsumAttrs : public tvm::AttrsNode { } }; +/*! \brief Attributes used in cumprod operator */ +struct CumprodAttrs : public tvm::AttrsNode { + Integer axis; + DataType dtype; + Integer exclusive; + TVM_DECLARE_ATTRS(CumprodAttrs, "relay.attrs.CumprodAttrs") { + TVM_ATTR_FIELD(axis).describe("The axis to multiply over").set_default(NullValue()); + TVM_ATTR_FIELD(dtype).describe("Output data type").set_default(NullValue()); + TVM_ATTR_FIELD(exclusive) + .describe("The first element is not included") + .set_default(NullValue()); + } +}; + /*! \brief Attributes used in unique operator */ struct UniqueAttrs : public tvm::AttrsNode { bool sorted; From 0de8e21a1a7fbd840d7df807e4a5520f362e4dd0 Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Mon, 22 Mar 2021 01:43:59 -0700 Subject: [PATCH 06/17] add cumprod strategy --- python/tvm/relay/op/_transform.py | 19 +++++++++++++---- python/tvm/relay/op/strategy/generic.py | 27 ++++++++++++++++++------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index e90263d794bc..57f87421ed96 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -19,16 +19,17 @@ # pylint: disable=too-many-local-variables, too-many-arguments, no-else-return from __future__ import absolute_import + import tvm -from tvm import te -from tvm.te.hybrid import script +from tvm import te, topi from tvm.runtime import convert -from tvm import topi +from tvm.te.hybrid import script from tvm.topi.utils import get_const_int, get_const_tuple + from . import op as _reg from . import strategy -from .op import OpPattern from ._tensor import elemwise_shape_func +from .op import OpPattern _reg.register_broadcast_schedule("broadcast_to") _reg.register_broadcast_schedule("broadcast_to_like") @@ -159,6 +160,16 @@ def compute_cumsum(attrs, inputs, output_type): _reg.register_strategy("cumsum", strategy.cumsum_strategy) _reg.register_shape_func("cumsum", False, elemwise_shape_func) +# cumprod +@_reg.register_compute("cumprod") +def compute_cumprod(attrs, inputs, output_type): + """Compute definition of cumprod""" + return [topi.cumsum(inputs[0], attrs.axis, attrs.dtype, attrs.exclusive)] + + +_reg.register_strategy("cumprod", strategy.cumprod_strategy) +_reg.register_shape_func("cumprod", False, elemwise_shape_func) + @_reg.register_compute("unique") def compute_unique(attrs, inputs, output_type): diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index 04f25640574a..1d973ac41980 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -17,11 +17,12 @@ """Definition of generic operator strategy.""" # pylint: disable=invalid-name,unused-argument import logging - import re -from tvm import topi, _ffi, te, ir -from tvm.topi.utils import get_const_int, get_const_float, get_const_tuple, get_float_tuple + +from tvm import _ffi, ir, te, topi from tvm.target import generic_func, override_native_generic_func +from tvm.topi.utils import get_const_float, get_const_int, get_const_tuple, get_float_tuple + from .. import op as _op logger = logging.getLogger("strategy") @@ -1463,13 +1464,13 @@ def threefry_split_strategy(attrs, inputs, out_type, target): return strategy -def wrap_compute_cumsum(topi_compute): +def wrap_compute_cumbinop(topi_compute): """Wrap cumsum topi compute""" - def _compute_cumsum(attrs, inputs, _): + def _compute_cumbinop(attrs, inputs, _): return [topi_compute(inputs[0], attrs.axis, attrs.dtype, attrs.exclusive)] - return _compute_cumsum + return _compute_cumbinop @override_native_generic_func("cumsum_strategy") @@ -1477,13 +1478,25 @@ def cumsum_strategy(attrs, inputs, out_type, target): """cumsum generic strategy""" strategy = _op.OpStrategy() strategy.add_implementation( - wrap_compute_cumsum(topi.cumsum), + wrap_compute_cumbinop(topi.cumsum), wrap_topi_schedule(topi.generic.schedule_extern), name="cumsum.generic", ) return strategy +@override_native_generic_func("cumprod_strategy") +def cumprod_strategy(attrs, inputs, out_type, target): + """cumprod generic strategy""" + strategy = _op.OpStrategy() + strategy.add_implementation( + wrap_compute_cumbinop(topi.cumprod), + wrap_topi_schedule(topi.generic.schedule_extern), + name="cumprod.generic", + ) + return strategy + + def wrap_compute_unique(topi_compute): """Wrap unique topi compute""" From 4b48ec94c053cba3b1253526bdaff5bdefb82a3c Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Mon, 22 Mar 2021 01:49:04 -0700 Subject: [PATCH 07/17] add cuda strategy --- python/tvm/relay/op/strategy/cuda.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index e0d0f165219e..0da2d831c510 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -18,11 +18,12 @@ # pylint: disable=invalid-name,unused-argument,wildcard-import,unused-wildcard-import from tvm import topi from tvm.auto_scheduler import is_auto_scheduler_enabled -from tvm.te import SpecializedCondition from tvm.contrib import nvcc from tvm.contrib.thrust import can_use_thrust -from .generic import * +from tvm.te import SpecializedCondition + from .. import op as _op +from .generic import * @schedule_injective.register(["cuda", "gpu"]) @@ -1017,13 +1018,25 @@ def cumsum_strategy_cuda(attrs, inputs, out_type, target): """cumsum cuda strategy""" strategy = _op.OpStrategy() strategy.add_implementation( - wrap_compute_cumsum(topi.cuda.cumsum), + wrap_compute_cumbinop(topi.cuda.cumsum), wrap_topi_schedule(topi.cuda.schedule_scan), name="cumsum.cuda", ) return strategy +@cumprod_strategy.register(["cuda", "gpu"]) +def cumprod_strategy_cuda(attrs, inputs, out_type, target): + """cumprod cuda strategy""" + strategy = _op.OpStrategy() + strategy.add_implementation( + wrap_compute_cumbinop(topi.cuda.cumprod), + wrap_topi_schedule(topi.cuda.schedule_scan), + name="cumprod.cuda", + ) + return strategy + + @unique_strategy.register(["cuda", "gpu"]) def unique_strategy_cuda(attrs, inputs, out_type, target): """unique cuda strategy""" From 7b3e372f5ebfb4bc4ee0398fc809fc2ae39ab48f Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Mon, 22 Mar 2021 10:28:08 -0700 Subject: [PATCH 08/17] python relay node construction --- python/tvm/relay/op/transform.py | 59 ++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index df0ae767460a..189017f6c255 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -18,11 +18,11 @@ # pylint: disable=import-outside-toplevel """Transform operators.""" +from ...tir import expr as _expr +from ..expr import Constant, Expr, Tuple, TupleWrapper, const from . import _make from .dyn import _make as _dyn_make from .tensor import shape_of -from ..expr import TupleWrapper, const, Constant, Expr, Tuple -from ...tir import expr as _expr def cast(data, dtype): @@ -1577,6 +1577,61 @@ def cumsum(data, axis=None, dtype=None, exclusive=None): return _make.cumsum(data, axis, dtype, exclusive) +def cumprod(data, axis=None, dtype=None, exclusive=None): + """Numpy style cumprod op. Return the cumulative inclusive product of the elements along + a given axis. + + Parameters + ---------- + data : relay.Expr + The input data to the operator. + + axis : int, optional + Axis along which the cumulative product is computed. The default (None) is to compute + the cumprod over the flattened array. + + dtype : string, optional + Type of the returned array and of the accumulator in which the elements are multiplied. + If dtype is not specified, it defaults to the dtype of data. + + exclusive : int, optional + If set to 1 will return exclusive product in which the first element is not + included. In other terms, if set to 1, the j-th output element would be + the product of the first (j-1) elements. Otherwise, it would be the product of + the first j elements. The product of zero elements will be 1. + + Returns + ------- + result : relay.Expr + The result has the same size as data, and the same shape as data if axis is not None. + If axis is None, the result is a 1-d array. + + Examples + -------- + .. code-block:: python + a = [[1,2,3], [4,5,6]] + + cumprod(a) # if axis is not provided, cumprod is done over the flattened input. + -> [ 1, 2, 6, 24, 120, 720] + + cumprod(a, dtype="float32") + -> [ 1., 2., 6., 24., 120., 720.] + + cumprod(a, axis=0) # multiply over rows for each of the 3 columns + -> [[1, 2, 3], + [4, 10, 18]] + + cumprod(a, axis=1) + -> [[ 1, 2, 6], + [ 4, 20, 120]] + + a = [1, 1, 1, 0, 1, 1, 0] # a is a boolean array + cumprod(a, dtype=int32) # dtype should be provided to get the expected results + -> [1, 1, 1, 0, 0, 0, 0] + """ + return _make.cumprod(data, axis, dtype, exclusive) + + def unique(data, is_sorted=True, return_counts=False): """ Find the unique elements of a 1-D tensor. Please note `output` and `counts` are all padded to From 5062a3c8374a90bbdff57cb7ec864a292ac9fea9 Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Mon, 22 Mar 2021 13:14:58 -0700 Subject: [PATCH 09/17] change attrs to be reusuable --- include/tvm/relay/attrs/transform.h | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index a0abf906b4e0..d70f289001d4 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -438,27 +438,13 @@ struct MatrixSetDiagAttrs : public tvm::AttrsNode { } }; // struct MatrixSetDiagAttrs -/*! \brief Attributes used in cumsum operator */ -struct CumsumAttrs : public tvm::AttrsNode { +/*! \brief Attributes used in cumsum and cumprod operator */ +struct CumbinopAttrs : public tvm::AttrsNode { Integer axis; DataType dtype; Integer exclusive; - TVM_DECLARE_ATTRS(CumsumAttrs, "relay.attrs.CumsumAttrs") { - TVM_ATTR_FIELD(axis).describe("The axis to sum over").set_default(NullValue()); - TVM_ATTR_FIELD(dtype).describe("Output data type").set_default(NullValue()); - TVM_ATTR_FIELD(exclusive) - .describe("The first element is not included") - .set_default(NullValue()); - } -}; - -/*! \brief Attributes used in cumprod operator */ -struct CumprodAttrs : public tvm::AttrsNode { - Integer axis; - DataType dtype; - Integer exclusive; - TVM_DECLARE_ATTRS(CumprodAttrs, "relay.attrs.CumprodAttrs") { - TVM_ATTR_FIELD(axis).describe("The axis to multiply over").set_default(NullValue()); + TVM_DECLARE_ATTRS(CumbinopAttrs, "relay.attrs.CumbinopAttrs") { + TVM_ATTR_FIELD(axis).describe("The axis to operate over").set_default(NullValue()); TVM_ATTR_FIELD(dtype).describe("Output data type").set_default(NullValue()); TVM_ATTR_FIELD(exclusive) .describe("The first element is not included") From 67d9441b80179677edd96a4155355b0d082486ec Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Mon, 22 Mar 2021 13:19:27 -0700 Subject: [PATCH 10/17] add cumprod nodes --- src/relay/op/tensor/transform.cc | 34 +++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index b65068bd0506..2e6f76929eb3 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -3772,20 +3772,20 @@ RELAY_REGISTER_OP("adv_index") .set_attr("TOpPattern", kInjective) .set_attr("FTVMCompute", AdvIndexCompute); -TVM_REGISTER_NODE_TYPE(CumsumAttrs); +TVM_REGISTER_NODE_TYPE(CumbinopAttrs); -bool CumsumRel(const Array& types, int num_inputs, const Attrs& attrs, - const TypeReporter& reporter) { +bool CumbinopRel(const Array& types, int num_inputs, const Attrs& attrs, + const TypeReporter& reporter) { // types: [data, output] ICHECK_EQ(types.size(), 2) << "Expects two types, one for the input and another for the output"; const auto* data = types[0].as(); if (data == nullptr) { ICHECK(types[0].as()) - << "cumsum: expect input type to be TensorType but get " << types[0]; + << "cumbinop: expect input type to be TensorType but get " << types[0]; return false; } - const auto* param = attrs.as(); + const auto* param = attrs.as(); auto dtype = param->dtype; if (dtype.is_void()) { @@ -3806,7 +3806,7 @@ bool CumsumRel(const Array& types, int num_inputs, const Attrs& attrs, } Expr MakeCumsum(Expr data, Integer axis, DataType dtype, Integer exclusive) { - auto attrs = make_object(); + auto attrs = make_object(); attrs->dtype = dtype; attrs->axis = axis; attrs->exclusive = exclusive; @@ -3822,7 +3822,27 @@ RELAY_REGISTER_OP("cumsum") .set_num_inputs(1) .add_argument("data", "Tensor", "The input tensor.") .set_support_level(3) - .add_type_rel("Cumsum", CumsumRel) + .add_type_rel("Cumsum", CumbinopRel) + .set_attr("TOpPattern", kOpaque); + +Expr MakeCumprod(Expr data, Integer axis, DataType dtype, Integer exclusive) { + auto attrs = make_object(); + attrs->dtype = dtype; + attrs->axis = axis; + attrs->exclusive = exclusive; + static const Op& op = Op::Get("cumprod"); + return Call(op, {data}, Attrs(attrs), {}); +} + +TVM_REGISTER_GLOBAL("relay.op._make.cumprod").set_body_typed(MakeCumprod); + +RELAY_REGISTER_OP("cumprod") + .describe( + R"doc(Return the cumulative product of the elements along a given axis.)doc" TVM_ADD_FILELINE) + .set_num_inputs(1) + .add_argument("data", "Tensor", "The input tensor.") + .set_support_level(3) + .add_type_rel("Cumprod", CumbinopRel) .set_attr("TOpPattern", kOpaque); TVM_REGISTER_NODE_TYPE(UniqueAttrs); From f75d64cd3d02e35149ab3f6a9310484fca79a39b Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Mon, 22 Mar 2021 14:18:22 -0700 Subject: [PATCH 11/17] complete tests --- tests/python/relay/test_op_level3.py | 62 +++++++++++++++++++--------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index d2a5090943c3..2e411f3f273c 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -16,16 +16,16 @@ # under the License. """ Support level3 operator test cases. """ +from typing import Callable, Optional + import numpy as np import pytest import tvm -from tvm import te -from tvm import relay +import tvm.testing +from tvm import relay, te from tvm.error import TVMError from tvm.relay import create_executor, transform from tvm.relay.testing import check_grad, run_infer_type -from typing import Optional -import tvm.testing def test_zeros_ones(): @@ -1758,12 +1758,26 @@ def verify_adv_index(data_shape, index_shapes): verify_adv_index((10, 5, 15), [(1, 2, 1), (1, 2, 7)]) -@tvm.testing.parametrize_targets -def test_cumsum(target, ctx): - def verify_cumsum(data_np, np_out, axis=None, out_dtype=None, rtol=1e-5, atol=1e-5): +# Helper for testing binop functions +cumbinops_supported = {"cumsum": relay.op.cumsum, "cumprod": relay.op.cumprod} + + +def run_binop_tests(target, ctx, binop_type: str, gt_func: Callable[..., np.array]): + def assert_relay_cumbinop( + data_np: np.array, + np_out: np.array, + axis: int = None, + out_dtype: str = None, + rtol: float = 1e-5, + atol: float = 1e-5, + ): inp = relay.var("data", relay.TensorType(data_np.shape, str(data_np.dtype))) - out = relay.op.cumsum(inp, axis, out_dtype) + if binop_type not in cumbinops_supported.keys(): + raise ValueError( + f"Unknown function {binop_type}. Options: {cumbinops_supported.keys()}" + ) + out = cumbinops_supported[binop_type](inp, axis, out_dtype) func = relay.Function([inp], out) for kind in ["graph", "debug"]: @@ -1772,24 +1786,34 @@ def verify_cumsum(data_np, np_out, axis=None, out_dtype=None, rtol=1e-5, atol=1e tvm.testing.assert_allclose(op_res.asnumpy(), np_out, rtol=rtol, atol=atol) data = np.array([2, 3, 0]) - verify_cumsum(data, np.cumsum(data)) - verify_cumsum(data, np.cumsum(data), out_dtype="int64") + assert_relay_cumbinop(data, gt_func(data)) + assert_relay_cumbinop(data, gt_func(data), out_dtype="int64") data = np.random.randn(10, 10) - verify_cumsum(data, np.cumsum(data)) - verify_cumsum(data, np.cumsum(data, axis=0), axis=0) - verify_cumsum(data, np.cumsum(data, axis=1), axis=1) + assert_relay_cumbinop(data, gt_func(data)) + assert_relay_cumbinop(data, gt_func(data, axis=0), axis=0) + assert_relay_cumbinop(data, gt_func(data, axis=1), axis=1) data = np.random.randn(10, 5, 10).astype("float32") - verify_cumsum(data, np.cumsum(data), rtol=1e-4, atol=1e-4) - verify_cumsum(data, np.cumsum(data, axis=0), axis=0, rtol=1e-4, atol=1e-4) - verify_cumsum(data, np.cumsum(data, axis=1), axis=1, rtol=1e-4, atol=1e-4) - verify_cumsum(data, np.cumsum(data, axis=-1), axis=-1, rtol=1e-4, atol=1e-4) + assert_relay_cumbinop(data, gt_func(data), rtol=1e-4, atol=1e-4) + assert_relay_cumbinop(data, gt_func(data, axis=0), axis=0, rtol=1e-4, atol=1e-4) + assert_relay_cumbinop(data, gt_func(data, axis=1), axis=1, rtol=1e-4, atol=1e-4) + assert_relay_cumbinop(data, gt_func(data, axis=-1), axis=-1, rtol=1e-4, atol=1e-4) data = np.random.rand(10) > 0.5 data = data.astype(np.int32) - verify_cumsum(data, np.cumsum(data, dtype=np.int32)) - verify_cumsum(data, np.cumsum(data, dtype="int64"), out_dtype="int64") + assert_relay_cumbinop(data, gt_func(data, dtype=np.int32)) + assert_relay_cumbinop(data, gt_func(data, dtype="int64"), out_dtype="int64") + + +@tvm.testing.parametrize_targets +def test_cumsum(target, ctx): + run_binop_tests(target, ctx, binop_type="cumsum", gt_func=np.cumsum) + + +@tvm.testing.parametrize_targets +def test_cumprod(target, ctx): + run_binop_tests(target, ctx, binop_type="cumprod", gt_func=np.cumproduct) @tvm.testing.parametrize_targets From b3290bde1ac9a1b3506255b11b510e3f38fdd485 Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Mon, 22 Mar 2021 14:43:12 -0700 Subject: [PATCH 12/17] Fix some typos about sum --> prod typos fix sum -> prod more typos more typo fixes more typos add doc strings --- python/tvm/relay/op/_transform.py | 4 +- python/tvm/relay/op/strategy/generic.py | 2 +- python/tvm/topi/cuda/scan.py | 57 +++++++++++++++++++++---- python/tvm/topi/cumprod.py | 8 ++-- python/tvm/topi/cumsum.py | 43 ++++++++++++++++++- 5 files changed, 97 insertions(+), 17 deletions(-) diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index 57f87421ed96..fe2b2f91a2dd 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -162,9 +162,9 @@ def compute_cumsum(attrs, inputs, output_type): # cumprod @_reg.register_compute("cumprod") -def compute_cumprod(attrs, inputs, output_type): +def compute_cumprod(attrs, inputs, output_type) """Compute definition of cumprod""" - return [topi.cumsum(inputs[0], attrs.axis, attrs.dtype, attrs.exclusive)] + return [topi.cumprod(inputs[0], attrs.axis, attrs.dtype, attrs.exclusive)] _reg.register_strategy("cumprod", strategy.cumprod_strategy) diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index 1d973ac41980..3864d33a7c31 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -1465,7 +1465,7 @@ def threefry_split_strategy(attrs, inputs, out_type, target): def wrap_compute_cumbinop(topi_compute): - """Wrap cumsum topi compute""" + """Wrap cumbinop style topi compute""" def _compute_cumbinop(attrs, inputs, _): return [topi_compute(inputs[0], attrs.axis, attrs.dtype, attrs.exclusive)] diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py index 76f19e2ef939..928c6257a916 100644 --- a/python/tvm/topi/cuda/scan.py +++ b/python/tvm/topi/cuda/scan.py @@ -352,9 +352,13 @@ def do_scan(data, output_dtype): target = tvm.target.Target.current() # TODO: add support for a prod_scan - if target and binop == tvm.generic.add and ( - can_use_thrust(target, "tvm.contrib.thrust.sum_scan") - or can_use_rocthrust(target, "tvm.contrib.thrust.sum_scan") + if ( + target + and binop == tvm.generic.add + and ( + can_use_thrust(target, "tvm.contrib.thrust.sum_scan") + or can_use_rocthrust(target, "tvm.contrib.thrust.sum_scan") + ) ): return scan_thrust( data, output_dtype, exclusive=True, return_reduction=return_reduction, binop=binop @@ -498,7 +502,44 @@ def cumbinop( dtype: Optional[str] = None, exclusive: Optional[bool] = None, ) -> tvm.te.Tensor: - """TODO""" + """Cumulative binary operator with similar axis behavior as np.cumsum and np.cumprod. + + See cumprod and cumsum for an example of use. + + E.g. if * is your binary operator and the input tensor is [1, 2, 3, 4] the output may be + [1, 1 * 2, 1 * 2 * 3, 1 * 2 * 3 * 4] + + Parameters + ---------- + data : tvm.te.Tensor + The input data to the operator. + + binop: Callable (tvm.Expr, tvm.Expr) -> tvm.Expr + A binary operator which should be associative and commutative. E.g. if * is your + operator then a * (b * c) = (a * b) * c and a * b = b * a + + axis : int, optional + Axis along which the operation is computed. The default (None) is to compute + the cumulative operation over the flattened array. + + dtype : string, optional + Type of the returned array and of the accumulator in which the elements are computed. + If dtype is not specified, it defaults to the dtype of data. + + exclusive : int, optional + If set to 1 will return exclusive cumulative operation in which the first element is not + included. In other terms, if set to 1, the j-th output element would be + the cumulative operation of the first (j-1) elements. Otherwise, it would be the + cumulative operation of the first j elements. + + TODO: what happens to the identity element? + + Returns + ------- + result : tvm.te.Tensor + The result has the same size as data, and the same shape as data if axis is not None. + If axis is None, the result is a 1-d array. + """ if axis is None: axis = 0 data = reshape(data, (prod(data.shape),)) @@ -552,7 +593,7 @@ def cumprod( dtype: Optional[int] = None, exclusive: Optional[bool] = None, ): - """Numpy style cumprod op. Return the cumulative sum of the elements along a given axis. + """Numpy style cumprod op. Return the cumulative product of the elements along a given axis. Parameters ---------- @@ -561,14 +602,14 @@ def cumprod( axis : int, optional Axis along which the cumulative product is computed. The default (None) is to compute - the cumsum over the flattened array. + the cumproduct over the flattened array. dtype : string, optional - Type of the returned array and of the accumulator in which the elements are summed. + Type of the returned array and of the accumulator in which the elements are multiplied. If dtype is not specified, it defaults to the dtype of data. exclusive : bool, optional - If True, will return exclusive sum in which the first element is not + If True, will return exclusive product in which the first element is not included. In other terms, if True, the j-th output element would be the product of the first (j-1) elements. Otherwise, it would be the product of the first j elements. diff --git a/python/tvm/topi/cumprod.py b/python/tvm/topi/cumprod.py index 3245184fbe61..93aa033ac621 100644 --- a/python/tvm/topi/cumprod.py +++ b/python/tvm/topi/cumprod.py @@ -30,7 +30,7 @@ def cumprod( dtype: Optional[int] = None, exclusive: Optional[bool] = None, ): - """Numpy style cumprod op. Return the cumulative sum of the elements along a given axis. + """Numpy style cumprod op. Return the cumulative product of the elements along a given axis. Parameters ---------- @@ -39,14 +39,14 @@ def cumprod( axis : int, optional Axis along which the cumulative product is computed. The default (None) is to compute - the cumsum over the flattened array. + the cumprod over the flattened array. dtype : string, optional - Type of the returned array and of the accumulator in which the elements are summed. + Type of the returned array and of the accumulator in which the elements are multiplied. If dtype is not specified, it defaults to the dtype of data. exclusive : bool, optional - If True, will return exclusive sum in which the first element is not + If True, will return exclusive product in which the first element is not included. In other terms, if True, the j-th output element would be the product of the first (j-1) elements. Otherwise, it would be the product of the first j elements. diff --git a/python/tvm/topi/cumsum.py b/python/tvm/topi/cumsum.py index e5e7a7036ae1..355e1510a4f3 100644 --- a/python/tvm/topi/cumsum.py +++ b/python/tvm/topi/cumsum.py @@ -35,8 +35,47 @@ def cumbinop( dtype: Optional[str] = None, exclusive: Optional[bool] = None, ) -> tvm.te.Tensor: - """ - TODO + """Cumulative binary operator with similar axis behavior as np.cumsum and np.cumprod. + + See cumprod and cumsum for an example of use. + + E.g. if * is your binary operator and the input tensor is [1, 2, 3, 4] the output may be + [1, 1 * 2, 1 * 2 * 3, 1 * 2 * 3 * 4] + + Parameters + ---------- + data : tvm.te.Tensor + The input data to the operator. + + binop: Callable (tvm.Expr, tvm.Expr) -> tvm.Expr + A binary operator which should be associative and commutative. E.g. if * is your + operator then a * (b * c) = (a * b) * c and a * b = b * a + + identity_value: tvm.Expr + A value for the binary operation which provides the identity property. E.g. if * is + your operator and i is the identity_value then a * i = a for all a in the domain of + your operation. + + axis : int, optional + Axis along which the operation is computed. The default (None) is to compute + the cumulative operation over the flattened array. + + dtype : string, optional + Type of the returned array and of the accumulator in which the elements are computed. + If dtype is not specified, it defaults to the dtype of data. + + exclusive : int, optional + If set to 1 will return exclusive cumulative operation in which the first element is not + included. In other terms, if set to 1, the j-th output element would be + the cumulative operation of the first (j-1) elements. Otherwise, it would be the + cumulative operation of the first j elements. The cumulative operation of zero elements + is assumed to be the identity_value. + + Returns + ------- + result : tvm.te.Tensor + The result has the same size as data, and the same shape as data if axis is not None. + If axis is None, the result is a 1-d array. """ if dtype is None or dtype == "": dtype = data.dtype From 6f944d1454f93c0a3ed56399a11f4009466b7f76 Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Mon, 22 Mar 2021 16:44:04 -0700 Subject: [PATCH 13/17] Use Bool instead of int to represent exclusive make exclusive a bool up and down stack fix x fix bool err it is a bool now fix fix thing formatting to pass linter lint python cumprod pylint fix attribute fix ordering add exclusivity tests for end to end fix things cuda identity_value --- include/tvm/relay/attrs/transform.h | 6 +- python/tvm/relay/op/_transform.py | 2 +- python/tvm/topi/cuda/scan.py | 90 +++++++++++++++++++++------- python/tvm/topi/cumprod.py | 2 +- python/tvm/topi/cumsum.py | 30 +++++----- src/relay/op/tensor/transform.cc | 4 +- tests/python/relay/test_op_level3.py | 25 ++++++-- 7 files changed, 111 insertions(+), 48 deletions(-) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index d70f289001d4..4827579d2211 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -442,13 +442,15 @@ struct MatrixSetDiagAttrs : public tvm::AttrsNode { struct CumbinopAttrs : public tvm::AttrsNode { Integer axis; DataType dtype; - Integer exclusive; + Bool exclusive = Bool(false); TVM_DECLARE_ATTRS(CumbinopAttrs, "relay.attrs.CumbinopAttrs") { TVM_ATTR_FIELD(axis).describe("The axis to operate over").set_default(NullValue()); TVM_ATTR_FIELD(dtype).describe("Output data type").set_default(NullValue()); + + // Default is 0 which is "false" TVM_ATTR_FIELD(exclusive) .describe("The first element is not included") - .set_default(NullValue()); + .set_default(Bool(false)); } }; diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index fe2b2f91a2dd..16262833d1bf 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -162,7 +162,7 @@ def compute_cumsum(attrs, inputs, output_type): # cumprod @_reg.register_compute("cumprod") -def compute_cumprod(attrs, inputs, output_type) +def compute_cumprod(attrs, inputs, output_type): """Compute definition of cumprod""" return [topi.cumprod(inputs[0], attrs.axis, attrs.dtype, attrs.exclusive)] diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py index 928c6257a916..b6452417ba79 100644 --- a/python/tvm/topi/cuda/scan.py +++ b/python/tvm/topi/cuda/scan.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name, too-many-locals, too-many-statements "Scan related operators" -from typing import Callable, Optional +from typing import Callable, Optional, Union import tvm from tvm import te @@ -35,7 +35,7 @@ def _get_thrust_func_name(tvmop): return tvmop_to_thrust_func_name[tvmop] -def exclusive_scan_ir(data, output, reduction=None, binop=tvm.tir.generic.add): +def exclusive_scan_ir(data, output, reduction=None, binop=tvm.tir.generic.add, identity_value=0): """Low level IR to do exclusive sum scan along rows of 2D input. Parameters @@ -53,6 +53,11 @@ def exclusive_scan_ir(data, output, reduction=None, binop=tvm.tir.generic.add): A binary associative op to use for scan. The function takes two TIR expressions and produce a new TIR expression. By default it uses tvm.tir.generic.add to compute prefix sum. + + identity_value: int or float + A value for the binary operation which provides the identity property. E.g. if * is + your operator and i is the identity_value then a * i = a for all a in the domain of + your operation. """ batch_size = prod(data.shape[:-1]) @@ -137,7 +142,7 @@ def exclusive_scan_ir(data, output, reduction=None, binop=tvm.tir.generic.add): with ib.if_scope(bx < batch_size): if reduction is not None: reduction[bx] = output[(bx + 1) * scan_axis_size - 1] - output[(bx + 1) * scan_axis_size - 1] = cast(0, out_dtype) + output[(bx + 1) * scan_axis_size - 1] = cast(identity_value, out_dtype) with ib.for_range(0, lim, dtype="int64") as l2_width: width = 2 << (lim - l2_width - 1) @@ -312,7 +317,12 @@ def scan_thrust( def exclusive_scan( - data, axis=-1, return_reduction=False, output_dtype=None, binop=tvm.tir.generic.add + data, + axis=-1, + return_reduction=False, + output_dtype=None, + binop=tvm.tir.generic.add, + identity_value=0, ): """Do exclusive scan on 1D or multidimensional input. @@ -338,6 +348,11 @@ def exclusive_scan( and produce a new TIR expression. By default it uses tvm.tir.generic.add to compute prefix sum. + identity_value: int or float + A value for the binary operation which provides the identity property. E.g. if * is + your operator and i is the identity_value then a * i = a for all a in the domain of + your operation. + Returns ------- output : tvm.te.Tensor @@ -354,7 +369,7 @@ def do_scan(data, output_dtype): # TODO: add support for a prod_scan if ( target - and binop == tvm.generic.add + and binop == tvm.tir.generic.add and ( can_use_thrust(target, "tvm.contrib.thrust.sum_scan") or can_use_rocthrust(target, "tvm.contrib.thrust.sum_scan") @@ -375,7 +390,9 @@ def do_scan(data, output_dtype): output, reduction = te.extern( [data.shape, data.shape[:-1]], [data], - lambda ins, outs: exclusive_scan_ir(ins[0], outs[0], outs[1], binop=binop), + lambda ins, outs: exclusive_scan_ir( + ins[0], outs[0], outs[1], binop=binop, identity_value=identity_value + ), dtype=[data.dtype, output_dtype], in_buffers=[data_buf], name="exclusive_scan", @@ -385,7 +402,9 @@ def do_scan(data, output_dtype): output = te.extern( [data.shape], [data], - lambda ins, outs: exclusive_scan_ir(ins[0], outs[0], binop=binop), + lambda ins, outs: exclusive_scan_ir( + ins[0], outs[0], binop=binop, identity_value=identity_value + ), dtype=[output_dtype], in_buffers=[data_buf], out_buffers=[output_buf], @@ -432,7 +451,7 @@ def do_scan(data, output_dtype): return output -def inclusive_scan(data, axis=-1, output_dtype=None, binop=tvm.tir.generic.add): +def inclusive_scan(data, axis=-1, output_dtype=None, binop=tvm.tir.generic.add, identity_value=0): """Do inclusive scan on 1D or multidimensional input. Parameters @@ -451,12 +470,19 @@ def inclusive_scan(data, axis=-1, output_dtype=None, binop=tvm.tir.generic.add): and produce a new TIR expression. By default it uses tvm.tir.generic.add to compute prefix sum. + identity_value: int or float + A value for the binary operation which provides the identity property. E.g. if * is + your operator and i is the identity_value then a * i = a for all a in the domain of + your operation. + Returns ------- output : tvm.te.Tensor A N-D tensor of the same rank N as the input data. """ - ex_scan = exclusive_scan(data, axis, output_dtype=output_dtype, binop=binop) + ex_scan = exclusive_scan( + data, axis, output_dtype=output_dtype, binop=binop, identity_value=identity_value + ) if output_dtype is not None and data.dtype != output_dtype and output_dtype != "": data = cast(data, output_dtype) @@ -498,26 +524,32 @@ def traverse(op): def cumbinop( data: tvm.te.Tensor, binop: Callable[["tvm.Expr", "tvm.Expr"], "tvm.Expr"], + identity_value: Union[float, int], axis: Optional[int] = None, dtype: Optional[str] = None, exclusive: Optional[bool] = None, ) -> tvm.te.Tensor: """Cumulative binary operator with similar axis behavior as np.cumsum and np.cumprod. - - See cumprod and cumsum for an example of use. - - E.g. if * is your binary operator and the input tensor is [1, 2, 3, 4] the output may be + + See cumprod and cumsum for an example of use. + + E.g. if * is your binary operator and the input tensor is [1, 2, 3, 4] the output may be [1, 1 * 2, 1 * 2 * 3, 1 * 2 * 3 * 4] - + Parameters ---------- data : tvm.te.Tensor The input data to the operator. binop: Callable (tvm.Expr, tvm.Expr) -> tvm.Expr - A binary operator which should be associative and commutative. E.g. if * is your + A binary operator which should be associative and commutative. E.g. if * is your operator then a * (b * c) = (a * b) * c and a * b = b * a + identity_value: int or float + A value for the binary operation which provides the identity property. E.g. if * is + your operator and i is the identity_value then a * i = a for all a in the domain of + your operation. + axis : int, optional Axis along which the operation is computed. The default (None) is to compute the cumulative operation over the flattened array. @@ -529,11 +561,9 @@ def cumbinop( exclusive : int, optional If set to 1 will return exclusive cumulative operation in which the first element is not included. In other terms, if set to 1, the j-th output element would be - the cumulative operation of the first (j-1) elements. Otherwise, it would be the + the cumulative operation of the first (j-1) elements. Otherwise, it would be the cumulative operation of the first j elements. - TODO: what happens to the identity element? - Returns ------- result : tvm.te.Tensor @@ -544,9 +574,13 @@ def cumbinop( axis = 0 data = reshape(data, (prod(data.shape),)) axis = get_const_int(axis) - if exclusive is not None and exclusive != 0: - return exclusive_scan(data, axis, output_dtype=dtype, binop=binop) - return inclusive_scan(data, axis, output_dtype=dtype, binop=binop) + if exclusive is not None and exclusive: + return exclusive_scan( + data, axis, output_dtype=dtype, binop=binop, identity_value=identity_value + ) + return inclusive_scan( + data, axis, output_dtype=dtype, binop=binop, identity_value=identity_value + ) def cumsum( @@ -583,7 +617,12 @@ def cumsum( If axis is None, the result is a 1-d array. """ return cumbinop( - data=data, binop=tvm.tir.generic.add, axis=axis, dtype=dtype, exclusive=exclusive + data=data, + binop=tvm.tir.generic.add, + identity_value=0, + axis=axis, + dtype=dtype, + exclusive=exclusive, ) @@ -621,5 +660,10 @@ def cumprod( If axis is None, the result is a 1-d array. """ return cumbinop( - data=data, binop=tvm.tir.generic.multiply, axis=axis, dtype=dtype, exclusive=exclusive + data=data, + binop=tvm.tir.generic.multiply, + identity_value=1, + axis=axis, + dtype=dtype, + exclusive=exclusive, ) diff --git a/python/tvm/topi/cumprod.py b/python/tvm/topi/cumprod.py index 93aa033ac621..6108212c8a51 100644 --- a/python/tvm/topi/cumprod.py +++ b/python/tvm/topi/cumprod.py @@ -16,7 +16,7 @@ # under the License. # pylint: disable=invalid-name """Cumprod operator""" -from typing import Callable, Optional +from typing import Optional import tvm diff --git a/python/tvm/topi/cumsum.py b/python/tvm/topi/cumsum.py index 355e1510a4f3..709155ed79bc 100644 --- a/python/tvm/topi/cumsum.py +++ b/python/tvm/topi/cumsum.py @@ -36,24 +36,24 @@ def cumbinop( exclusive: Optional[bool] = None, ) -> tvm.te.Tensor: """Cumulative binary operator with similar axis behavior as np.cumsum and np.cumprod. - - See cumprod and cumsum for an example of use. - - E.g. if * is your binary operator and the input tensor is [1, 2, 3, 4] the output may be + + See cumprod and cumsum for an example of use. + + E.g. if * is your binary operator and the input tensor is [1, 2, 3, 4] the output may be [1, 1 * 2, 1 * 2 * 3, 1 * 2 * 3 * 4] - + Parameters ---------- data : tvm.te.Tensor The input data to the operator. binop: Callable (tvm.Expr, tvm.Expr) -> tvm.Expr - A binary operator which should be associative and commutative. E.g. if * is your + A binary operator which should be associative and commutative. E.g. if * is your operator then a * (b * c) = (a * b) * c and a * b = b * a identity_value: tvm.Expr - A value for the binary operation which provides the identity property. E.g. if * is - your operator and i is the identity_value then a * i = a for all a in the domain of + A value for the binary operation which provides the identity property. E.g. if * is + your operator and i is the identity_value then a * i = a for all a in the domain of your operation. axis : int, optional @@ -67,7 +67,7 @@ def cumbinop( exclusive : int, optional If set to 1 will return exclusive cumulative operation in which the first element is not included. In other terms, if set to 1, the j-th output element would be - the cumulative operation of the first (j-1) elements. Otherwise, it would be the + the cumulative operation of the first (j-1) elements. Otherwise, it would be the cumulative operation of the first j elements. The cumulative operation of zero elements is assumed to be the identity_value. @@ -80,6 +80,9 @@ def cumbinop( if dtype is None or dtype == "": dtype = data.dtype + if exclusive is None: + exclusive = False + def maybe_cast(x): if dtype != data.dtype: return cast(x, dtype) @@ -108,9 +111,6 @@ def maybe_cast(x): elif i > axis: axis_mul_after *= value - if exclusive is None: - exclusive = False - def gen_ir(data_buf, out_buf): ib = ir_builder.create() data_buf = ib.buffer_ptr(data_buf) @@ -128,10 +128,10 @@ def gen_ir(data_buf, out_buf): k = _k + 1 cur_idx = base_idx + k * axis_mul_after prev_idx = base_idx + (k - 1) * axis_mul_after - if exclusive == 0: - out_buf[cur_idx] = binop(out_buf[prev_idx], maybe_cast(data_buf[cur_idx])) - else: + if exclusive: out_buf[cur_idx] = binop(out_buf[prev_idx], maybe_cast(data_buf[prev_idx])) + else: + out_buf[cur_idx] = binop(out_buf[prev_idx], maybe_cast(data_buf[cur_idx])) return ib.get() diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index 2e6f76929eb3..96078da72a12 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -3805,7 +3805,7 @@ bool CumbinopRel(const Array& types, int num_inputs, const Attrs& attrs, return true; } -Expr MakeCumsum(Expr data, Integer axis, DataType dtype, Integer exclusive) { +Expr MakeCumsum(Expr data, Integer axis, DataType dtype, Bool exclusive) { auto attrs = make_object(); attrs->dtype = dtype; attrs->axis = axis; @@ -3825,7 +3825,7 @@ RELAY_REGISTER_OP("cumsum") .add_type_rel("Cumsum", CumbinopRel) .set_attr("TOpPattern", kOpaque); -Expr MakeCumprod(Expr data, Integer axis, DataType dtype, Integer exclusive) { +Expr MakeCumprod(Expr data, Integer axis, DataType dtype, Bool exclusive) { auto attrs = make_object(); attrs->dtype = dtype; attrs->axis = axis; diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index 2e411f3f273c..f94e3ac86cb2 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -1762,7 +1762,9 @@ def verify_adv_index(data_shape, index_shapes): cumbinops_supported = {"cumsum": relay.op.cumsum, "cumprod": relay.op.cumprod} -def run_binop_tests(target, ctx, binop_type: str, gt_func: Callable[..., np.array]): +def run_binop_tests( + target, ctx, binop_type: str, gt_func: Callable[..., np.array], identity_value: int +): def assert_relay_cumbinop( data_np: np.array, np_out: np.array, @@ -1770,6 +1772,7 @@ def assert_relay_cumbinop( out_dtype: str = None, rtol: float = 1e-5, atol: float = 1e-5, + exclusive: bool = False, ): inp = relay.var("data", relay.TensorType(data_np.shape, str(data_np.dtype))) @@ -1777,7 +1780,7 @@ def assert_relay_cumbinop( raise ValueError( f"Unknown function {binop_type}. Options: {cumbinops_supported.keys()}" ) - out = cumbinops_supported[binop_type](inp, axis, out_dtype) + out = cumbinops_supported[binop_type](inp, axis, out_dtype, exclusive=exclusive) func = relay.Function([inp], out) for kind in ["graph", "debug"]: @@ -1805,15 +1808,29 @@ def assert_relay_cumbinop( assert_relay_cumbinop(data, gt_func(data, dtype=np.int32)) assert_relay_cumbinop(data, gt_func(data, dtype="int64"), out_dtype="int64") + # Test exclusivity operations + data = np.random.randint(-100, 100, size=(2, 2)).astype("int64") + expected_result = np.roll(gt_func(data), 1) + expected_result[0] = identity_value + assert_relay_cumbinop(data, expected_result, exclusive=True) + + expected_result = np.roll(gt_func(data, axis=0), 1, axis=0) + expected_result[0, :] = identity_value + assert_relay_cumbinop(data, expected_result, exclusive=True, axis=0) + + expected_result = np.roll(gt_func(data, axis=1), 1, axis=1) + expected_result[:, 0] = identity_value + assert_relay_cumbinop(data, expected_result, exclusive=True, axis=1) + @tvm.testing.parametrize_targets def test_cumsum(target, ctx): - run_binop_tests(target, ctx, binop_type="cumsum", gt_func=np.cumsum) + run_binop_tests(target, ctx, binop_type="cumsum", gt_func=np.cumsum, identity_value=0) @tvm.testing.parametrize_targets def test_cumprod(target, ctx): - run_binop_tests(target, ctx, binop_type="cumprod", gt_func=np.cumproduct) + run_binop_tests(target, ctx, binop_type="cumprod", gt_func=np.cumprod, identity_value=1) @tvm.testing.parametrize_targets From 43df67ed121e8026ce6f0a2ba355087402958613 Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Tue, 23 Mar 2021 14:14:27 -0700 Subject: [PATCH 14/17] Overall improve formatting, add doc message corrections simplify construction clang-format more tests undo simpler construction due to function passing stuff fix docs more exclusive doc changes more fixins" --- python/tvm/relay/op/transform.py | 12 ++++++------ python/tvm/topi/cuda/scan.py | 12 ++++++------ python/tvm/topi/cumsum.py | 6 +++--- tests/python/relay/test_op_level3.py | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index 189017f6c255..f94a00db2fb1 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -1539,9 +1539,9 @@ def cumsum(data, axis=None, dtype=None, exclusive=None): Type of the returned array and of the accumulator in which the elements are summed. If dtype is not specified, it defaults to the dtype of data. - exclusive : int, optional - If set to 1 will return exclusive sum in which the first element is not - included. In other terms, if set to 1, the j-th output element would be + exclusive : bool, optional + If true will return exclusive sum in which the first element is not + included. In other terms, if true, the j-th output element would be the sum of the first (j-1) elements. Otherwise, it would be the sum of the first j elements. @@ -1594,9 +1594,9 @@ def cumprod(data, axis=None, dtype=None, exclusive=None): Type of the returned array and of the accumulator in which the elements are multiplied. If dtype is not specified, it defaults to the dtype of data. - exclusive : int, optional - If set to 1 will return exclusive product in which the first element is not - included. In other terms, if set to 1, the j-th output element would be + exclusive : bool, optional + If true will return exclusive product in which the first element is not + included. In other terms, if true, the j-th output element would be the product of the first (j-1) elements. Otherwise, it would be the product of the first j elements. The product of zero elements will be 1. diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py index b6452417ba79..3741c478430e 100644 --- a/python/tvm/topi/cuda/scan.py +++ b/python/tvm/topi/cuda/scan.py @@ -558,9 +558,9 @@ def cumbinop( Type of the returned array and of the accumulator in which the elements are computed. If dtype is not specified, it defaults to the dtype of data. - exclusive : int, optional - If set to 1 will return exclusive cumulative operation in which the first element is not - included. In other terms, if set to 1, the j-th output element would be + exclusive : bool, optional + If true will return exclusive cumulative operation in which the first element is not + included. In other terms, if true, the j-th output element would be the cumulative operation of the first (j-1) elements. Otherwise, it would be the cumulative operation of the first j elements. @@ -604,9 +604,9 @@ def cumsum( Type of the returned array and of the accumulator in which the elements are summed. If dtype is not specified, it defaults to the dtype of data. - exclusive : int, optional - If set to 1 will return exclusive sum in which the first element is not - included. In other terms, if set to 1, the j-th output element would be + exclusive : bool, optional + If true will return exclusive sum in which the first element is not + included. In other terms, if true, the j-th output element would be the sum of the first (j-1) elements. Otherwise, it would be the sum of the first j elements. diff --git a/python/tvm/topi/cumsum.py b/python/tvm/topi/cumsum.py index 709155ed79bc..7136d11c919f 100644 --- a/python/tvm/topi/cumsum.py +++ b/python/tvm/topi/cumsum.py @@ -64,9 +64,9 @@ def cumbinop( Type of the returned array and of the accumulator in which the elements are computed. If dtype is not specified, it defaults to the dtype of data. - exclusive : int, optional - If set to 1 will return exclusive cumulative operation in which the first element is not - included. In other terms, if set to 1, the j-th output element would be + exclusive : bool, optional + If True will return exclusive cumulative operation in which the first element is not + included. In other terms, if True, the j-th output element would be the cumulative operation of the first (j-1) elements. Otherwise, it would be the cumulative operation of the first j elements. The cumulative operation of zero elements is assumed to be the identity_value. diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index f94e3ac86cb2..15bdd7ebab02 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -1809,7 +1809,7 @@ def assert_relay_cumbinop( assert_relay_cumbinop(data, gt_func(data, dtype="int64"), out_dtype="int64") # Test exclusivity operations - data = np.random.randint(-100, 100, size=(2, 2)).astype("int64") + data = np.random.randint(-100, 100, size=(10, 10)).astype("int64") expected_result = np.roll(gt_func(data), 1) expected_result[0] = identity_value assert_relay_cumbinop(data, expected_result, exclusive=True) From 8ffff272336d550a021cc369f706b64603dc4dc6 Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Wed, 24 Mar 2021 10:49:42 -0700 Subject: [PATCH 15/17] merge cumsum and cumprod to scan, merge tests fix stuff --- python/tvm/relay/op/strategy/generic.py | 12 +- python/tvm/topi/__init__.py | 3 +- python/tvm/topi/cuda/scan.py | 8 +- python/tvm/topi/cumprod.py | 68 --------- python/tvm/topi/{cumsum.py => scan.py} | 52 ++++++- python/tvm/topi/unique.py | 2 +- tests/python/relay/test_op_level3.py | 40 +++-- tests/python/topi/python/test_topi_cumprod.py | 108 ------------- tests/python/topi/python/test_topi_cumsum.py | 108 ------------- tests/python/topi/python/test_topi_scan.py | 144 ++++++++++++++++++ 10 files changed, 223 insertions(+), 322 deletions(-) delete mode 100644 python/tvm/topi/cumprod.py rename python/tvm/topi/{cumsum.py => scan.py} (80%) delete mode 100644 tests/python/topi/python/test_topi_cumprod.py delete mode 100644 tests/python/topi/python/test_topi_cumsum.py create mode 100644 tests/python/topi/python/test_topi_scan.py diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index 3864d33a7c31..322a3607904f 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -1464,13 +1464,13 @@ def threefry_split_strategy(attrs, inputs, out_type, target): return strategy -def wrap_compute_cumbinop(topi_compute): - """Wrap cumbinop style topi compute""" +def wrap_compute_scanop(topi_compute): + """Wrap scanop style topi compute""" - def _compute_cumbinop(attrs, inputs, _): + def _compute_scanop(attrs, inputs, _): return [topi_compute(inputs[0], attrs.axis, attrs.dtype, attrs.exclusive)] - return _compute_cumbinop + return _compute_scanop @override_native_generic_func("cumsum_strategy") @@ -1478,7 +1478,7 @@ def cumsum_strategy(attrs, inputs, out_type, target): """cumsum generic strategy""" strategy = _op.OpStrategy() strategy.add_implementation( - wrap_compute_cumbinop(topi.cumsum), + wrap_compute_scanop(topi.cumsum), wrap_topi_schedule(topi.generic.schedule_extern), name="cumsum.generic", ) @@ -1490,7 +1490,7 @@ def cumprod_strategy(attrs, inputs, out_type, target): """cumprod generic strategy""" strategy = _op.OpStrategy() strategy.add_implementation( - wrap_compute_cumbinop(topi.cumprod), + wrap_compute_scanop(topi.cumprod), wrap_topi_schedule(topi.generic.schedule_extern), name="cumprod.generic", ) diff --git a/python/tvm/topi/__init__.py b/python/tvm/topi/__init__.py index 16a109f908be..90383ccf3d5b 100644 --- a/python/tvm/topi/__init__.py +++ b/python/tvm/topi/__init__.py @@ -42,8 +42,7 @@ from .sparse_reshape import * from .scatter_add import * from .argwhere import * -from .cumsum import * -from .cumprod import * +from .scan import * from .einsum import * from .unique import * from . import generic diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py index 3741c478430e..3240ebcd515c 100644 --- a/python/tvm/topi/cuda/scan.py +++ b/python/tvm/topi/cuda/scan.py @@ -521,7 +521,7 @@ def traverse(op): return s -def cumbinop( +def scanop( data: tvm.te.Tensor, binop: Callable[["tvm.Expr", "tvm.Expr"], "tvm.Expr"], identity_value: Union[float, int], @@ -529,7 +529,7 @@ def cumbinop( dtype: Optional[str] = None, exclusive: Optional[bool] = None, ) -> tvm.te.Tensor: - """Cumulative binary operator with similar axis behavior as np.cumsum and np.cumprod. + """Cumulative binary operator (scan) with similar axis behavior as np.cumsum and np.cumprod. See cumprod and cumsum for an example of use. @@ -616,7 +616,7 @@ def cumsum( The result has the same size as data, and the same shape as data if axis is not None. If axis is None, the result is a 1-d array. """ - return cumbinop( + return scanop( data=data, binop=tvm.tir.generic.add, identity_value=0, @@ -659,7 +659,7 @@ def cumprod( The result has the same size as data, and the same shape as data if axis is not None. If axis is None, the result is a 1-d array. """ - return cumbinop( + return scanop( data=data, binop=tvm.tir.generic.multiply, identity_value=1, diff --git a/python/tvm/topi/cumprod.py b/python/tvm/topi/cumprod.py deleted file mode 100644 index 6108212c8a51..000000000000 --- a/python/tvm/topi/cumprod.py +++ /dev/null @@ -1,68 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# pylint: disable=invalid-name -"""Cumprod operator""" -from typing import Optional - -import tvm - -from ..tir import generic -from .cumsum import cumbinop - - -def cumprod( - data: tvm.te.Tensor, - axis: Optional[int] = None, - dtype: Optional[int] = None, - exclusive: Optional[bool] = None, -): - """Numpy style cumprod op. Return the cumulative product of the elements along a given axis. - - Parameters - ---------- - data : tvm.te.Tensor - The input data to the operator. - - axis : int, optional - Axis along which the cumulative product is computed. The default (None) is to compute - the cumprod over the flattened array. - - dtype : string, optional - Type of the returned array and of the accumulator in which the elements are multiplied. - If dtype is not specified, it defaults to the dtype of data. - - exclusive : bool, optional - If True, will return exclusive product in which the first element is not - included. In other terms, if True, the j-th output element would be - the product of the first (j-1) elements. Otherwise, it would be the product of - the first j elements. - - Returns - ------- - result : tvm.te.Tensor - The result has the same size as data, and the same shape as data if axis is not None. - If axis is None, the result is a 1-d array. - """ - return cumbinop( - data=data, - binop=generic.multiply, - identity_value=1, - op_name="cumprod_generic", - axis=axis, - dtype=dtype, - exclusive=exclusive, - ) diff --git a/python/tvm/topi/cumsum.py b/python/tvm/topi/scan.py similarity index 80% rename from python/tvm/topi/cumsum.py rename to python/tvm/topi/scan.py index 7136d11c919f..f5796730f762 100644 --- a/python/tvm/topi/cumsum.py +++ b/python/tvm/topi/scan.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. # pylint: disable=invalid-name -"""Cumsum operator""" +"""Scan (cumulative binary) operators""" from typing import Callable, Optional import tvm @@ -26,7 +26,7 @@ from .utils import get_const_int, prod -def cumbinop( +def scanop( data: tvm.te.Tensor, binop: Callable[["tvm.Expr", "tvm.Expr"], "tvm.Expr"], identity_value: "tvm.Expr", @@ -35,7 +35,7 @@ def cumbinop( dtype: Optional[str] = None, exclusive: Optional[bool] = None, ) -> tvm.te.Tensor: - """Cumulative binary operator with similar axis behavior as np.cumsum and np.cumprod. + """Cumulative binary operator (scan) with similar axis behavior as np.cumsum and np.cumprod. See cumprod and cumsum for an example of use. @@ -181,7 +181,7 @@ def cumsum( The result has the same size as data, and the same shape as data if axis is not None. If axis is None, the result is a 1-d array. """ - return cumbinop( + return scanop( data=data, binop=generic.add, identity_value=0, @@ -190,3 +190,47 @@ def cumsum( dtype=dtype, exclusive=exclusive, ) + + +def cumprod( + data: tvm.te.Tensor, + axis: Optional[int] = None, + dtype: Optional[int] = None, + exclusive: Optional[bool] = None, +) -> tvm.te.Tensor: + """Numpy style cumprod op. Return the cumulative product of the elements along a given axis. + + Parameters + ---------- + data : tvm.te.Tensor + The input data to the operator. + + axis : int, optional + Axis along which the cumulative product is computed. The default (None) is to compute + the cumproduct over the flattened array. + + dtype : string, optional + Type of the returned array and of the accumulator in which the elements are multiplied. + If dtype is not specified, it defaults to the dtype of data. + + exclusive : bool, optional + If True, will return exclusive product in which the first element is not + included. In other terms, if True, the j-th output element would be + the product of the first (j-1) elements. Otherwise, it would be the product of + the first j elements. + + Returns + ------- + result : tvm.te.Tensor + The result has the same size as data, and the same shape as data if axis is not None. + If axis is None, the result is a 1-d array. + """ + return scanop( + data=data, + binop=generic.multiply, + identity_value=1, + op_name="cumprod_generic", + axis=axis, + dtype=dtype, + exclusive=exclusive, + ) diff --git a/python/tvm/topi/unique.py b/python/tvm/topi/unique.py index b4f27b38f65f..e7256551d7b6 100644 --- a/python/tvm/topi/unique.py +++ b/python/tvm/topi/unique.py @@ -18,7 +18,7 @@ """Unique operator""" from tvm import te, tir from ..te import hybrid -from .cumsum import cumsum +from .scan import cumsum from .sort import sort, argsort diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py index 15bdd7ebab02..7e443aa62ee8 100644 --- a/tests/python/relay/test_op_level3.py +++ b/tests/python/relay/test_op_level3.py @@ -1759,13 +1759,13 @@ def verify_adv_index(data_shape, index_shapes): # Helper for testing binop functions -cumbinops_supported = {"cumsum": relay.op.cumsum, "cumprod": relay.op.cumprod} +scanops_supported = {"cumsum": relay.op.cumsum, "cumprod": relay.op.cumprod} def run_binop_tests( target, ctx, binop_type: str, gt_func: Callable[..., np.array], identity_value: int ): - def assert_relay_cumbinop( + def assert_relay_scanop( data_np: np.array, np_out: np.array, axis: int = None, @@ -1776,11 +1776,9 @@ def assert_relay_cumbinop( ): inp = relay.var("data", relay.TensorType(data_np.shape, str(data_np.dtype))) - if binop_type not in cumbinops_supported.keys(): - raise ValueError( - f"Unknown function {binop_type}. Options: {cumbinops_supported.keys()}" - ) - out = cumbinops_supported[binop_type](inp, axis, out_dtype, exclusive=exclusive) + if binop_type not in scanops_supported.keys(): + raise ValueError(f"Unknown function {binop_type}. Options: {scanops_supported.keys()}") + out = scanops_supported[binop_type](inp, axis, out_dtype, exclusive=exclusive) func = relay.Function([inp], out) for kind in ["graph", "debug"]: @@ -1789,38 +1787,38 @@ def assert_relay_cumbinop( tvm.testing.assert_allclose(op_res.asnumpy(), np_out, rtol=rtol, atol=atol) data = np.array([2, 3, 0]) - assert_relay_cumbinop(data, gt_func(data)) - assert_relay_cumbinop(data, gt_func(data), out_dtype="int64") + assert_relay_scanop(data, gt_func(data)) + assert_relay_scanop(data, gt_func(data), out_dtype="int64") data = np.random.randn(10, 10) - assert_relay_cumbinop(data, gt_func(data)) - assert_relay_cumbinop(data, gt_func(data, axis=0), axis=0) - assert_relay_cumbinop(data, gt_func(data, axis=1), axis=1) + assert_relay_scanop(data, gt_func(data)) + assert_relay_scanop(data, gt_func(data, axis=0), axis=0) + assert_relay_scanop(data, gt_func(data, axis=1), axis=1) data = np.random.randn(10, 5, 10).astype("float32") - assert_relay_cumbinop(data, gt_func(data), rtol=1e-4, atol=1e-4) - assert_relay_cumbinop(data, gt_func(data, axis=0), axis=0, rtol=1e-4, atol=1e-4) - assert_relay_cumbinop(data, gt_func(data, axis=1), axis=1, rtol=1e-4, atol=1e-4) - assert_relay_cumbinop(data, gt_func(data, axis=-1), axis=-1, rtol=1e-4, atol=1e-4) + assert_relay_scanop(data, gt_func(data), rtol=1e-4, atol=1e-4) + assert_relay_scanop(data, gt_func(data, axis=0), axis=0, rtol=1e-4, atol=1e-4) + assert_relay_scanop(data, gt_func(data, axis=1), axis=1, rtol=1e-4, atol=1e-4) + assert_relay_scanop(data, gt_func(data, axis=-1), axis=-1, rtol=1e-4, atol=1e-4) data = np.random.rand(10) > 0.5 data = data.astype(np.int32) - assert_relay_cumbinop(data, gt_func(data, dtype=np.int32)) - assert_relay_cumbinop(data, gt_func(data, dtype="int64"), out_dtype="int64") + assert_relay_scanop(data, gt_func(data, dtype=np.int32)) + assert_relay_scanop(data, gt_func(data, dtype="int64"), out_dtype="int64") # Test exclusivity operations data = np.random.randint(-100, 100, size=(10, 10)).astype("int64") expected_result = np.roll(gt_func(data), 1) expected_result[0] = identity_value - assert_relay_cumbinop(data, expected_result, exclusive=True) + assert_relay_scanop(data, expected_result, exclusive=True) expected_result = np.roll(gt_func(data, axis=0), 1, axis=0) expected_result[0, :] = identity_value - assert_relay_cumbinop(data, expected_result, exclusive=True, axis=0) + assert_relay_scanop(data, expected_result, exclusive=True, axis=0) expected_result = np.roll(gt_func(data, axis=1), 1, axis=1) expected_result[:, 0] = identity_value - assert_relay_cumbinop(data, expected_result, exclusive=True, axis=1) + assert_relay_scanop(data, expected_result, exclusive=True, axis=1) @tvm.testing.parametrize_targets diff --git a/tests/python/topi/python/test_topi_cumprod.py b/tests/python/topi/python/test_topi_cumprod.py deleted file mode 100644 index 0c75308ddcea..000000000000 --- a/tests/python/topi/python/test_topi_cumprod.py +++ /dev/null @@ -1,108 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -import numpy as np -import tvm -import tvm.testing -import tvm.topi.testing -from tvm import topi - - -@tvm.testing.parametrize_targets -def test_cumprod(ctx, target): - def check_cumprod(np_ref, data, axis=None, dtype=None, exclusive=False): - implementations = { - "generic": ( - lambda x: topi.cumprod(x, axis, dtype, exclusive=exclusive), - topi.generic.schedule_extern, - ), - "cuda": ( - lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive), - topi.cuda.schedule_scan, - ), - "nvptx": ( - lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive), - topi.cuda.schedule_scan, - ), - "vulkan": ( - lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive), - topi.cuda.schedule_scan, - ), - "metal": ( - lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive), - topi.cuda.schedule_scan, - ), - } - fcompute, fschedule = tvm.topi.testing.dispatch(target, implementations) - tvm.topi.testing.compare_numpy_tvm([data], np_ref, target, ctx, fcompute, fschedule) - - data = np.array([2, 3, 0]) - check_cumprod(np.cumprod(data), data) - - data = np.random.rand(10) > 0.5 - data = data.astype(np.int32) - check_cumprod(np.cumprod(data, dtype=np.int32), data) - check_cumprod(np.cumprod(data), data, dtype="int64") - - data = np.random.rand(10) > 0.5 - check_cumprod(np.cumprod(data, dtype=np.int32), data, dtype="int32") - - for in_dtype in ["float32", "float64"]: - if target == "metal" and in_dtype == "float64": - # float64 is not supported in metal - continue - data = np.random.randn(10, 10).astype(in_dtype) - check_cumprod(np.cumprod(data), data) - check_cumprod(np.cumprod(data, axis=0), data, axis=0) - check_cumprod(np.cumprod(data, axis=1), data, axis=1) - - data = np.random.randn(10, 5, 10).astype(in_dtype) - check_cumprod(np.cumprod(data), data) - check_cumprod(np.cumprod(data, axis=0), data, axis=0) - check_cumprod(np.cumprod(data, axis=1), data, axis=1) - check_cumprod(np.cumprod(data, axis=-1), data, axis=-1) - - for in_dtype in ["int32", "int64"]: - data = np.random.randint(-100, 100, size=(100, 100)).astype(in_dtype) - check_cumprod(np.cumprod(data, dtype=in_dtype), data) - check_cumprod(np.cumprod(data), data, dtype="int64") - check_cumprod(np.cumprod(data, axis=0, dtype=in_dtype), data, axis=0) - check_cumprod(np.cumprod(data, axis=1, dtype=in_dtype), data, axis=1) - - data = np.random.randint(1 << 30, (1 << 31) - 1, size=(100)).astype(in_dtype) - check_cumprod(np.cumprod(data), data, dtype="int64") - - data = np.random.randint(-100, 100, size=(100, 100)).astype("int64") - - expected_result = np.roll(np.cumprod(data), 1) - expected_result[0] = 1 - check_cumprod(expected_result, data, dtype="int64", exclusive=True) - - expected_result = np.roll(np.cumprod(data, axis=0, dtype=in_dtype), 1, axis=0) - expected_result[0, :] = 1 - check_cumprod(expected_result, data, axis=0, exclusive=True) - - expected_result = np.roll(np.cumprod(data, axis=1, dtype=in_dtype), 1, axis=1) - expected_result[:, 0] = 1 - check_cumprod(np.cumprod(data, axis=1, dtype=in_dtype), data, axis=1) - - -if __name__ == "__main__": - test_cumprod(tvm.context("cpu"), tvm.target.Target("llvm")) - test_cumprod(tvm.context("cuda"), tvm.target.Target("cuda")) - test_cumprod(tvm.context("nvptx"), tvm.target.Target("nvptx")) - test_cumprod(tvm.context("vulkan"), tvm.target.Target("vulkan")) - test_cumprod(tvm.context("metal"), tvm.target.Target("metal")) diff --git a/tests/python/topi/python/test_topi_cumsum.py b/tests/python/topi/python/test_topi_cumsum.py deleted file mode 100644 index 0591609b6f3f..000000000000 --- a/tests/python/topi/python/test_topi_cumsum.py +++ /dev/null @@ -1,108 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -import numpy as np -import tvm -import tvm.testing -import tvm.topi.testing -from tvm import topi - - -@tvm.testing.parametrize_targets -def test_cumsum(ctx, target): - def check_cumsum(np_ref, data, axis=None, dtype=None, exclusive=False): - implementations = { - "generic": ( - lambda x: topi.cumsum(x, axis, dtype, exclusive=exclusive), - topi.generic.schedule_extern, - ), - "cuda": ( - lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive), - topi.cuda.schedule_scan, - ), - "nvptx": ( - lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive), - topi.cuda.schedule_scan, - ), - "vulkan": ( - lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive), - topi.cuda.schedule_scan, - ), - "metal": ( - lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive), - topi.cuda.schedule_scan, - ), - } - fcompute, fschedule = tvm.topi.testing.dispatch(target, implementations) - tvm.topi.testing.compare_numpy_tvm([data], np_ref, target, ctx, fcompute, fschedule) - - data = np.array([2, 3, 0]) - check_cumsum(np.cumsum(data), data) - - data = np.random.rand(10) > 0.5 - data = data.astype(np.int32) - check_cumsum(np.cumsum(data, dtype=np.int32), data) - check_cumsum(np.cumsum(data), data, dtype="int64") - - data = np.random.rand(10) > 0.5 - check_cumsum(np.cumsum(data, dtype=np.int32), data, dtype="int32") - - for in_dtype in ["float32", "float64"]: - if target == "metal" and in_dtype == "float64": - # float64 is not supported in metal - continue - data = np.random.randn(10, 10).astype(in_dtype) - check_cumsum(np.cumsum(data), data) - check_cumsum(np.cumsum(data, axis=0), data, axis=0) - check_cumsum(np.cumsum(data, axis=1), data, axis=1) - - data = np.random.randn(10, 5, 10).astype(in_dtype) - check_cumsum(np.cumsum(data), data) - check_cumsum(np.cumsum(data, axis=0), data, axis=0) - check_cumsum(np.cumsum(data, axis=1), data, axis=1) - check_cumsum(np.cumsum(data, axis=-1), data, axis=-1) - - for in_dtype in ["int32", "int64"]: - data = np.random.randint(-100, 100, size=(100, 100)).astype(in_dtype) - check_cumsum(np.cumsum(data, dtype=in_dtype), data) - check_cumsum(np.cumsum(data), data, dtype="int64") - check_cumsum(np.cumsum(data, axis=0, dtype=in_dtype), data, axis=0) - check_cumsum(np.cumsum(data, axis=1, dtype=in_dtype), data, axis=1) - - data = np.random.randint(1 << 30, (1 << 31) - 1, size=(100)).astype(in_dtype) - check_cumsum(np.cumsum(data), data, dtype="int64") - - data = np.random.randint(-100, 100, size=(100, 100)).astype("int64") - - expected_result = np.roll(np.cumsum(data), 1) - expected_result[0] = 0 - check_cumsum(expected_result, data, dtype="int64", exclusive=True) - - expected_result = np.roll(np.cumsum(data, axis=0, dtype=in_dtype), 1, axis=0) - expected_result[0, :] = 0 - check_cumsum(expected_result, data, axis=0, exclusive=True) - - expected_result = np.roll(np.cumsum(data, axis=1, dtype=in_dtype), 1, axis=1) - expected_result[:, 0] = 0 - check_cumsum(np.cumsum(data, axis=1, dtype=in_dtype), data, axis=1) - - -if __name__ == "__main__": - test_cumsum(tvm.context("cpu"), tvm.target.Target("llvm")) - test_cumsum(tvm.context("cuda"), tvm.target.Target("cuda")) - test_cumsum(tvm.context("nvptx"), tvm.target.Target("nvptx")) - test_cumsum(tvm.context("vulkan"), tvm.target.Target("vulkan")) - test_cumsum(tvm.context("metal"), tvm.target.Target("metal")) diff --git a/tests/python/topi/python/test_topi_scan.py b/tests/python/topi/python/test_topi_scan.py new file mode 100644 index 000000000000..020fde51659f --- /dev/null +++ b/tests/python/topi/python/test_topi_scan.py @@ -0,0 +1,144 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from typing import Callable + +import numpy as np +import tvm +import tvm.testing +import tvm.topi.testing +from tvm import topi + +topi_funcs = { + "cumsum": {"generic": topi.cumsum, "cuda": topi.cuda.cumsum}, + "cumprod": {"generic": topi.cumprod, "cuda": topi.cuda.cumprod}, +} + +identity_value = {"cumsum": 0, "cumprod": 1} + + +def get_implementations(name, axis, dtype, exclusive): + topi_func_generic = topi_funcs[name]["generic"] + topi_func_cuda = topi_funcs[name]["cuda"] + + return { + "generic": ( + lambda x: topi_func_generic(x, axis, dtype, exclusive=exclusive), + topi.generic.schedule_extern, + ), + "cuda": ( + lambda x: topi_func_cuda(x, axis, dtype, exclusive=exclusive), + topi.cuda.schedule_scan, + ), + "nvptx": ( + lambda x: topi_func_cuda(x, axis, dtype, exclusive=exclusive), + topi.cuda.schedule_scan, + ), + "vulkan": ( + lambda x: topi_func_cuda(x, axis, dtype, exclusive=exclusive), + topi.cuda.schedule_scan, + ), + "metal": ( + lambda x: topi_func_cuda(x, axis, dtype, exclusive=exclusive), + topi.cuda.schedule_scan, + ), + } + + +def _run_tests( + ctx, + target, + op_name: str = "cumsum", + gt_func: Callable[..., np.array] = np.cumsum, +): + def check_scan(np_ref, data, axis=None, dtype=None, exclusive=False): + implementations = get_implementations(op_name, axis, dtype, exclusive) + fcompute, fschedule = tvm.topi.testing.dispatch(target, implementations) + tvm.topi.testing.compare_numpy_tvm([data], np_ref, target, ctx, fcompute, fschedule) + + data = np.array([2, 3, 0]) + check_scan(gt_func(data), data) + + data = np.random.rand(10) > 0.5 + data = data.astype(np.int32) + check_scan(gt_func(data, dtype=np.int32), data) + check_scan(gt_func(data), data, dtype="int64") + + data = np.random.rand(10) > 0.5 + check_scan(gt_func(data, dtype=np.int32), data, dtype="int32") + + for in_dtype in ["float32", "float64"]: + if target == "metal" and in_dtype == "float64": + # float64 is not supported in metal + continue + data = np.random.randn(10, 10).astype(in_dtype) + check_scan(gt_func(data), data) + check_scan(gt_func(data, axis=0), data, axis=0) + check_scan(gt_func(data, axis=1), data, axis=1) + + data = np.random.randn(10, 5, 10).astype(in_dtype) + check_scan(gt_func(data), data) + check_scan(gt_func(data, axis=0), data, axis=0) + check_scan(gt_func(data, axis=1), data, axis=1) + check_scan(gt_func(data, axis=-1), data, axis=-1) + + for in_dtype in ["int32", "int64"]: + data = np.random.randint(-100, 100, size=(100, 100)).astype(in_dtype) + check_scan(gt_func(data, dtype=in_dtype), data) + check_scan(gt_func(data), data, dtype="int64") + check_scan(gt_func(data, axis=0, dtype=in_dtype), data, axis=0) + check_scan(gt_func(data, axis=1, dtype=in_dtype), data, axis=1) + + data = np.random.randint(1 << 30, (1 << 31) - 1, size=(100)).astype(in_dtype) + check_scan(gt_func(data), data, dtype="int64") + + data = np.random.randint(-100, 100, size=(100, 100)).astype("int64") + + expected_result = np.roll(gt_func(data), 1) + expected_result[0] = identity_value[op_name] + check_scan(expected_result, data, dtype="int64", exclusive=True) + + expected_result = np.roll(gt_func(data, axis=0, dtype=in_dtype), 1, axis=0) + expected_result[0, :] = identity_value[op_name] + check_scan(expected_result, data, axis=0, exclusive=True) + + expected_result = np.roll(gt_func(data, axis=1, dtype=in_dtype), 1, axis=1) + expected_result[:, 0] = identity_value[op_name] + check_scan(gt_func(data, axis=1, dtype=in_dtype), data, axis=1) + + +@tvm.testing.parametrize_targets +def test_cumsum(ctx, target): + _run_tests(ctx, target, op_name="cumsum", gt_func=np.cumsum) + + +@tvm.testing.parametrize_targets +def test_cumprod(ctx, target): + _run_tests(ctx, target, op_name="cumprod", gt_func=np.cumprod) + + +if __name__ == "__main__": + test_cumsum(tvm.context("cpu"), tvm.target.Target("llvm")) + test_cumsum(tvm.context("cuda"), tvm.target.Target("cuda")) + test_cumsum(tvm.context("nvptx"), tvm.target.Target("nvptx")) + test_cumsum(tvm.context("vulkan"), tvm.target.Target("vulkan")) + test_cumsum(tvm.context("metal"), tvm.target.Target("metal")) + + test_cumprod(tvm.context("cpu"), tvm.target.Target("llvm")) + test_cumprod(tvm.context("cuda"), tvm.target.Target("cuda")) + test_cumprod(tvm.context("nvptx"), tvm.target.Target("nvptx")) + test_cumprod(tvm.context("vulkan"), tvm.target.Target("vulkan")) + test_cumprod(tvm.context("metal"), tvm.target.Target("metal")) From b9fb77950abcff36d3e74e2df9fe6acc80a6febf Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Wed, 24 Mar 2021 11:43:25 -0700 Subject: [PATCH 16/17] remove other mentions of cumbinop -> scanop --- include/tvm/relay/attrs/transform.h | 4 ++-- python/tvm/relay/op/strategy/cuda.py | 4 ++-- src/relay/op/tensor/transform.cc | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index 4827579d2211..a5544c8a8799 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -439,11 +439,11 @@ struct MatrixSetDiagAttrs : public tvm::AttrsNode { }; // struct MatrixSetDiagAttrs /*! \brief Attributes used in cumsum and cumprod operator */ -struct CumbinopAttrs : public tvm::AttrsNode { +struct ScanopAttrs : public tvm::AttrsNode { Integer axis; DataType dtype; Bool exclusive = Bool(false); - TVM_DECLARE_ATTRS(CumbinopAttrs, "relay.attrs.CumbinopAttrs") { + TVM_DECLARE_ATTRS(ScanopAttrs, "relay.attrs.ScanopAttrs") { TVM_ATTR_FIELD(axis).describe("The axis to operate over").set_default(NullValue()); TVM_ATTR_FIELD(dtype).describe("Output data type").set_default(NullValue()); diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index 0da2d831c510..1a6742526607 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -1018,7 +1018,7 @@ def cumsum_strategy_cuda(attrs, inputs, out_type, target): """cumsum cuda strategy""" strategy = _op.OpStrategy() strategy.add_implementation( - wrap_compute_cumbinop(topi.cuda.cumsum), + wrap_compute_scanop(topi.cuda.cumsum), wrap_topi_schedule(topi.cuda.schedule_scan), name="cumsum.cuda", ) @@ -1030,7 +1030,7 @@ def cumprod_strategy_cuda(attrs, inputs, out_type, target): """cumprod cuda strategy""" strategy = _op.OpStrategy() strategy.add_implementation( - wrap_compute_cumbinop(topi.cuda.cumprod), + wrap_compute_scanop(topi.cuda.cumprod), wrap_topi_schedule(topi.cuda.schedule_scan), name="cumprod.cuda", ) diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index 96078da72a12..b6c578fa5053 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -3772,20 +3772,20 @@ RELAY_REGISTER_OP("adv_index") .set_attr("TOpPattern", kInjective) .set_attr("FTVMCompute", AdvIndexCompute); -TVM_REGISTER_NODE_TYPE(CumbinopAttrs); +TVM_REGISTER_NODE_TYPE(ScanopAttrs); -bool CumbinopRel(const Array& types, int num_inputs, const Attrs& attrs, +bool ScanopRel(const Array& types, int num_inputs, const Attrs& attrs, const TypeReporter& reporter) { // types: [data, output] ICHECK_EQ(types.size(), 2) << "Expects two types, one for the input and another for the output"; const auto* data = types[0].as(); if (data == nullptr) { ICHECK(types[0].as()) - << "cumbinop: expect input type to be TensorType but get " << types[0]; + << "Scanop: expect input type to be TensorType but get " << types[0]; return false; } - const auto* param = attrs.as(); + const auto* param = attrs.as(); auto dtype = param->dtype; if (dtype.is_void()) { @@ -3806,7 +3806,7 @@ bool CumbinopRel(const Array& types, int num_inputs, const Attrs& attrs, } Expr MakeCumsum(Expr data, Integer axis, DataType dtype, Bool exclusive) { - auto attrs = make_object(); + auto attrs = make_object(); attrs->dtype = dtype; attrs->axis = axis; attrs->exclusive = exclusive; @@ -3822,11 +3822,11 @@ RELAY_REGISTER_OP("cumsum") .set_num_inputs(1) .add_argument("data", "Tensor", "The input tensor.") .set_support_level(3) - .add_type_rel("Cumsum", CumbinopRel) + .add_type_rel("Cumsum", ScanopRel) .set_attr("TOpPattern", kOpaque); Expr MakeCumprod(Expr data, Integer axis, DataType dtype, Bool exclusive) { - auto attrs = make_object(); + auto attrs = make_object(); attrs->dtype = dtype; attrs->axis = axis; attrs->exclusive = exclusive; @@ -3842,7 +3842,7 @@ RELAY_REGISTER_OP("cumprod") .set_num_inputs(1) .add_argument("data", "Tensor", "The input tensor.") .set_support_level(3) - .add_type_rel("Cumprod", CumbinopRel) + .add_type_rel("Cumprod", ScanopRel) .set_attr("TOpPattern", kOpaque); TVM_REGISTER_NODE_TYPE(UniqueAttrs); From dbea713157698805853fca44c987dd5073487f7d Mon Sep 17 00:00:00 2001 From: Andrew Zhao Luo Date: Wed, 24 Mar 2021 12:42:22 -0700 Subject: [PATCH 17/17] lint formatting --- python/tvm/topi/__init__.py | 2 +- src/relay/op/tensor/transform.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tvm/topi/__init__.py b/python/tvm/topi/__init__.py index 90383ccf3d5b..ef2c5c1ea4b5 100644 --- a/python/tvm/topi/__init__.py +++ b/python/tvm/topi/__init__.py @@ -42,7 +42,7 @@ from .sparse_reshape import * from .scatter_add import * from .argwhere import * -from .scan import * +from .scan import * from .einsum import * from .unique import * from . import generic diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index b6c578fa5053..6fb9f77f99ea 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -3775,7 +3775,7 @@ RELAY_REGISTER_OP("adv_index") TVM_REGISTER_NODE_TYPE(ScanopAttrs); bool ScanopRel(const Array& types, int num_inputs, const Attrs& attrs, - const TypeReporter& reporter) { + const TypeReporter& reporter) { // types: [data, output] ICHECK_EQ(types.size(), 2) << "Expects two types, one for the input and another for the output"; const auto* data = types[0].as();