From f2ef782239822088086a9f5f6c3ef88cd8198f7d Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Fri, 19 Mar 2021 14:18:57 -0700
Subject: [PATCH 01/17] make cumbinop, refactor cumsum, add cumprod

---
 python/tvm/topi/cumprod.py | 67 +++++++++++++++++++++++++++
 python/tvm/topi/cumsum.py  | 93 ++++++++++++++++++++++++--------------
 2 files changed, 126 insertions(+), 34 deletions(-)
 create mode 100644 python/tvm/topi/cumprod.py

diff --git a/python/tvm/topi/cumprod.py b/python/tvm/topi/cumprod.py
new file mode 100644
index 000000000000..b3ce51265af4
--- /dev/null
+++ b/python/tvm/topi/cumprod.py
@@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name
+"""Cumprod operator"""
+from typing import Callable, Optional
+
+import tvm
+
+from ..tir import generic
+from .cumsum import cumbinop
+
+
+def cumprod(
+    data: tvm.te.Tensor,
+    axis: Optional[int] = None,
+    dtype: Optional[int] = None,
+    exclusive: Optional[bool] = None,
+):
+    """Numpy style cumprod op. Return the cumulative sum of the elements along a given axis.
+
+    Parameters
+    ----------
+    data : tvm.te.Tensor
+        The input data to the operator.
+
+    axis : int, optional
+        Axis along which the cumulative product is computed. The default (None) is to compute
+        the cumsum over the flattened array.
+
+    dtype : string, optional
+        Type of the returned array and of the accumulator in which the elements are summed.
+        If dtype is not specified, it defaults to the dtype of data.
+
+    exclusive : bool, optional
+        If True, will return exclusive sum in which the first element is not
+        included. In other terms, if True, the j-th output element would be
+        the product of the first (j-1) elements. Otherwise, it would be the product of
+        the first j elements.
+
+    Returns
+    -------
+    result : tvm.te.Tensor
+        The result has the same size as data, and the same shape as data if axis is not None.
+        If axis is None, the result is a 1-d array.
+    """
+    cumbinop(
+        data=data,
+        axis=axis,
+        dtype=dtype,
+        exclusive=exclusive,
+        binop=generic.multiply,
+        identity_value=1,
+    )
diff --git a/python/tvm/topi/cumsum.py b/python/tvm/topi/cumsum.py
index 2013a352874d..0b064021c36e 100644
--- a/python/tvm/topi/cumsum.py
+++ b/python/tvm/topi/cumsum.py
@@ -16,39 +16,26 @@
 # under the License.
 # pylint: disable=invalid-name
 """Cumsum operator"""
-from ..tir import decl_buffer, ir_builder
-from ..te import extern
-from .utils import prod, get_const_int
-from .math import cast
-
-
-def cumsum(data, axis=None, dtype=None, exclusive=None):
-    """Numpy style cumsum op. Return the cumulative sum of the elements along a given axis.
-
-    Parameters
-    ----------
-    data : tvm.te.Tensor
-        The input data to the operator.
+from typing import Callable, Optional
 
-    axis : int, optional
-        Axis along which the cumulative sum is computed. The default (None) is to compute
-        the cumsum over the flattened array.
+import tvm
 
-    dtype : string, optional
-        Type of the returned array and of the accumulator in which the elements are summed.
-        If dtype is not specified, it defaults to the dtype of data.
+from ..te import extern
+from ..tir import decl_buffer, generic, ir_builder
+from .math import cast
+from .utils import get_const_int, prod
 
-    exclusive : int, optional
-        If set to 1 will return exclusive sum in which the first element is not
-        included. In other terms, if set to 1, the j-th output element would be
-        the sum of the first (j-1) elements. Otherwise, it would be the sum of
-        the first j elements.
 
-    Returns
-    -------
-    result : tvm.te.Tensor
-        The result has the same size as data, and the same shape as data if axis is not None.
-        If axis is None, the result is a 1-d array.
+def cumbinop(
+    data: tvm.te.Tensor,
+    axis: Optional[int] = None,
+    dtype: Optional[str] = None,
+    exclusive: Optional[bool] = None,
+    binop: Callable[["tvm.Expr", "tvm.Expr"], "tvm.Expr"] = generic.add,
+    identity_value: "tvm.Expr" = 0,
+) -> tvm.te.Tensor:
+    """
+    TODO
     """
     if dtype is None or dtype == "":
         dtype = data.dtype
@@ -82,7 +69,7 @@ def maybe_cast(x):
                 axis_mul_after *= value
 
     if exclusive is None:
-        exclusive = 0
+        exclusive = False
 
     def gen_ir(data_buf, out_buf):
         ib = ir_builder.create()
@@ -93,18 +80,18 @@ def gen_ir(data_buf, out_buf):
             i = fused // axis_mul_after
             j = fused % axis_mul_after
             base_idx = i * cumsum_axis_len * axis_mul_after + j
-            if exclusive == 0:
+            if exclusive:
                 out_buf[base_idx] = maybe_cast(data_buf[base_idx])
             else:
-                out_buf[base_idx] = cast(0, dtype)
+                out_buf[base_idx] = cast(identity_value, dtype)
             with ib.for_range(0, cumsum_axis_len - 1, "_k") as _k:
                 k = _k + 1
                 cur_idx = base_idx + k * axis_mul_after
                 prev_idx = base_idx + (k - 1) * axis_mul_after
                 if exclusive == 0:
-                    out_buf[cur_idx] = out_buf[prev_idx] + maybe_cast(data_buf[cur_idx])
+                    out_buf[cur_idx] = binop(out_buf[prev_idx], maybe_cast(data_buf[cur_idx]))
                 else:
-                    out_buf[cur_idx] = out_buf[prev_idx] + maybe_cast(data_buf[prev_idx])
+                    out_buf[cur_idx] = binop(out_buf[prev_idx], maybe_cast(data_buf[prev_idx]))
 
         return ib.get()
 
@@ -119,3 +106,41 @@ def gen_ir(data_buf, out_buf):
         name="cumsum_generic",
         tag="cumsum_generic",
     )
+
+
+def cumsum(
+    data: tvm.te.Tensor,
+    axis: Optional[int] = None,
+    dtype: Optional[int] = None,
+    exclusive: Optional[bool] = None,
+):
+    """Numpy style cumsum op. Return the cumulative sum of the elements along a given axis.
+
+    Parameters
+    ----------
+    data : tvm.te.Tensor
+        The input data to the operator.
+
+    axis : int, optional
+        Axis along which the cumulative sum is computed. The default (None) is to compute
+        the cumsum over the flattened array.
+
+    dtype : string, optional
+        Type of the returned array and of the accumulator in which the elements are summed.
+        If dtype is not specified, it defaults to the dtype of data.
+
+    exclusive : bool, optional
+        If True, will return exclusive sum in which the first element is not
+        included. In other terms, if True, the j-th output element would be
+        the sum of the first (j-1) elements. Otherwise, it would be the sum of
+        the first j elements.
+
+    Returns
+    -------
+    result : tvm.te.Tensor
+        The result has the same size as data, and the same shape as data if axis is not None.
+        If axis is None, the result is a 1-d array.
+    """
+    cumbinop(
+        data=data, axis=axis, dtype=dtype, exclusive=exclusive, binop=generic.add, identity_value=0
+    )

From 2dc85a0bd7109bbe49695a04ec5106e90508633c Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Fri, 19 Mar 2021 17:28:28 -0700
Subject: [PATCH 02/17] cumsum exclusive test

---
 python/tvm/topi/cumsum.py                    |  8 +--
 tests/python/topi/python/test_topi_cumsum.py | 51 +++++++++++++++-----
 2 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/python/tvm/topi/cumsum.py b/python/tvm/topi/cumsum.py
index 0b064021c36e..271bb151372e 100644
--- a/python/tvm/topi/cumsum.py
+++ b/python/tvm/topi/cumsum.py
@@ -81,9 +81,9 @@ def gen_ir(data_buf, out_buf):
             j = fused % axis_mul_after
             base_idx = i * cumsum_axis_len * axis_mul_after + j
             if exclusive:
-                out_buf[base_idx] = maybe_cast(data_buf[base_idx])
-            else:
                 out_buf[base_idx] = cast(identity_value, dtype)
+            else:
+                out_buf[base_idx] = maybe_cast(data_buf[base_idx])
             with ib.for_range(0, cumsum_axis_len - 1, "_k") as _k:
                 k = _k + 1
                 cur_idx = base_idx + k * axis_mul_after
@@ -113,7 +113,7 @@ def cumsum(
     axis: Optional[int] = None,
     dtype: Optional[int] = None,
     exclusive: Optional[bool] = None,
-):
+) -> tvm.te.Tensor:
     """Numpy style cumsum op. Return the cumulative sum of the elements along a given axis.
 
     Parameters
@@ -141,6 +141,6 @@ def cumsum(
         The result has the same size as data, and the same shape as data if axis is not None.
         If axis is None, the result is a 1-d array.
     """
-    cumbinop(
+    return cumbinop(
         data=data, axis=axis, dtype=dtype, exclusive=exclusive, binop=generic.add, identity_value=0
     )
diff --git a/tests/python/topi/python/test_topi_cumsum.py b/tests/python/topi/python/test_topi_cumsum.py
index cfe5130643c5..4afe1f3926f0 100644
--- a/tests/python/topi/python/test_topi_cumsum.py
+++ b/tests/python/topi/python/test_topi_cumsum.py
@@ -17,19 +17,34 @@
 import numpy as np
 import tvm
 import tvm.testing
-from tvm import topi
 import tvm.topi.testing
+from tvm import topi
 
 
 @tvm.testing.parametrize_targets
 def test_cumsum(ctx, target):
-    def check_cumsum(np_ref, data, axis=None, dtype=None):
+    def check_cumsum(np_ref, data, axis=None, dtype=None, exclusive=False):
         implementations = {
-            "generic": (lambda x: topi.cumsum(x, axis, dtype), topi.generic.schedule_extern),
-            "cuda": (lambda x: topi.cuda.cumsum(x, axis, dtype), topi.cuda.schedule_scan),
-            "nvptx": (lambda x: topi.cuda.cumsum(x, axis, dtype), topi.cuda.schedule_scan),
-            "vulkan": (lambda x: topi.cuda.cumsum(x, axis, dtype), topi.cuda.schedule_scan),
-            "metal": (lambda x: topi.cuda.cumsum(x, axis, dtype), topi.cuda.schedule_scan),
+            "generic": (
+                lambda x: topi.cumsum(x, axis, dtype, exclusive=exclusive),
+                topi.generic.schedule_extern,
+            ),
+            "cuda": (
+                lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive),
+                topi.cuda.schedule_scan,
+            ),
+            "nvptx": (
+                lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive),
+                topi.cuda.schedule_scan,
+            ),
+            "vulkan": (
+                lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive),
+                topi.cuda.schedule_scan,
+            ),
+            "metal": (
+                lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive),
+                topi.cuda.schedule_scan,
+            ),
         }
         fcompute, fschedule = tvm.topi.testing.dispatch(target, implementations)
         tvm.topi.testing.compare_numpy_tvm([data], np_ref, target, ctx, fcompute, fschedule)
@@ -70,10 +85,24 @@ def check_cumsum(np_ref, data, axis=None, dtype=None):
         data = np.random.randint(1 << 30, (1 << 31) - 1, size=(100)).astype(in_dtype)
         check_cumsum(np.cumsum(data), data, dtype="int64")
 
+    data = np.random.randint(-100, 100, size=(100, 100)).astype("int64")
+
+    expected_result = np.roll(np.cumsum(data), 1)
+    expected_result[0] = 0
+    check_cumsum(expected_result, data, dtype="int64", exclusive=True)
+
+    expected_result = np.roll(np.cumsum(data, axis=0, dtype=in_dtype), 1, axis=0)
+    expected_result[0, :] = 0
+    check_cumsum(expected_result, data, axis=0, exclusive=True)
+
+    expected_result = np.roll(np.cumsum(data, axis=1, dtype=in_dtype), 1, axis=1)
+    expected_result[:, 0] = 0
+    check_cumsum(np.cumsum(data, axis=1, dtype=in_dtype), data, axis=1)
+
 
 if __name__ == "__main__":
     test_cumsum(tvm.context("cpu"), tvm.target.Target("llvm"))
-    test_cumsum(tvm.context("cuda"), tvm.target.Target("cuda"))
-    test_cumsum(tvm.context("nvptx"), tvm.target.Target("nvptx"))
-    test_cumsum(tvm.context("vulkan"), tvm.target.Target("vulkan"))
-    test_cumsum(tvm.context("metal"), tvm.target.Target("metal"))
+    # test_cumsum(tvm.context("cuda"), tvm.target.Target("cuda"))
+    # test_cumsum(tvm.context("nvptx"), tvm.target.Target("nvptx"))
+    # test_cumsum(tvm.context("vulkan"), tvm.target.Target("vulkan"))
+    # test_cumsum(tvm.context("metal"), tvm.target.Target("metal"))

From 94061501629dbd28ea18edd3ef2c457871c08efc Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Fri, 19 Mar 2021 17:35:32 -0700
Subject: [PATCH 03/17] Add cumprod + flesh out cumsum tests

add cumprod and tests

reinstate tests

rethink
---
 python/tvm/topi/__init__.py                   |   1 +
 python/tvm/topi/cumprod.py                    |   7 +-
 python/tvm/topi/cumsum.py                     |  17 ++-
 tests/python/topi/python/test_topi_cumprod.py | 108 ++++++++++++++++++
 tests/python/topi/python/test_topi_cumsum.py  |   8 +-
 5 files changed, 129 insertions(+), 12 deletions(-)
 create mode 100644 tests/python/topi/python/test_topi_cumprod.py

diff --git a/python/tvm/topi/__init__.py b/python/tvm/topi/__init__.py
index c196b33cf880..16a109f908be 100644
--- a/python/tvm/topi/__init__.py
+++ b/python/tvm/topi/__init__.py
@@ -43,6 +43,7 @@
 from .scatter_add import *
 from .argwhere import *
 from .cumsum import *
+from .cumprod import *
 from .einsum import *
 from .unique import *
 from . import generic
diff --git a/python/tvm/topi/cumprod.py b/python/tvm/topi/cumprod.py
index b3ce51265af4..3245184fbe61 100644
--- a/python/tvm/topi/cumprod.py
+++ b/python/tvm/topi/cumprod.py
@@ -57,11 +57,12 @@ def cumprod(
         The result has the same size as data, and the same shape as data if axis is not None.
         If axis is None, the result is a 1-d array.
     """
-    cumbinop(
+    return cumbinop(
         data=data,
+        binop=generic.multiply,
+        identity_value=1,
+        op_name="cumprod_generic",
         axis=axis,
         dtype=dtype,
         exclusive=exclusive,
-        binop=generic.multiply,
-        identity_value=1,
     )
diff --git a/python/tvm/topi/cumsum.py b/python/tvm/topi/cumsum.py
index 271bb151372e..e5e7a7036ae1 100644
--- a/python/tvm/topi/cumsum.py
+++ b/python/tvm/topi/cumsum.py
@@ -28,11 +28,12 @@
 
 def cumbinop(
     data: tvm.te.Tensor,
+    binop: Callable[["tvm.Expr", "tvm.Expr"], "tvm.Expr"],
+    identity_value: "tvm.Expr",
+    op_name: str,
     axis: Optional[int] = None,
     dtype: Optional[str] = None,
     exclusive: Optional[bool] = None,
-    binop: Callable[["tvm.Expr", "tvm.Expr"], "tvm.Expr"] = generic.add,
-    identity_value: "tvm.Expr" = 0,
 ) -> tvm.te.Tensor:
     """
     TODO
@@ -103,8 +104,8 @@ def gen_ir(data_buf, out_buf):
         lambda ins, outs: gen_ir(ins[0], outs[0]),
         dtype=dtype,
         out_buffers=[out_buf],
-        name="cumsum_generic",
-        tag="cumsum_generic",
+        name=op_name,
+        tag=op_name,
     )
 
 
@@ -142,5 +143,11 @@ def cumsum(
         If axis is None, the result is a 1-d array.
     """
     return cumbinop(
-        data=data, axis=axis, dtype=dtype, exclusive=exclusive, binop=generic.add, identity_value=0
+        data=data,
+        binop=generic.add,
+        identity_value=0,
+        op_name="cumsum_generic",
+        axis=axis,
+        dtype=dtype,
+        exclusive=exclusive,
     )
diff --git a/tests/python/topi/python/test_topi_cumprod.py b/tests/python/topi/python/test_topi_cumprod.py
new file mode 100644
index 000000000000..0c75308ddcea
--- /dev/null
+++ b/tests/python/topi/python/test_topi_cumprod.py
@@ -0,0 +1,108 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import numpy as np
+import tvm
+import tvm.testing
+import tvm.topi.testing
+from tvm import topi
+
+
+@tvm.testing.parametrize_targets
+def test_cumprod(ctx, target):
+    def check_cumprod(np_ref, data, axis=None, dtype=None, exclusive=False):
+        implementations = {
+            "generic": (
+                lambda x: topi.cumprod(x, axis, dtype, exclusive=exclusive),
+                topi.generic.schedule_extern,
+            ),
+            "cuda": (
+                lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive),
+                topi.cuda.schedule_scan,
+            ),
+            "nvptx": (
+                lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive),
+                topi.cuda.schedule_scan,
+            ),
+            "vulkan": (
+                lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive),
+                topi.cuda.schedule_scan,
+            ),
+            "metal": (
+                lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive),
+                topi.cuda.schedule_scan,
+            ),
+        }
+        fcompute, fschedule = tvm.topi.testing.dispatch(target, implementations)
+        tvm.topi.testing.compare_numpy_tvm([data], np_ref, target, ctx, fcompute, fschedule)
+
+    data = np.array([2, 3, 0])
+    check_cumprod(np.cumprod(data), data)
+
+    data = np.random.rand(10) > 0.5
+    data = data.astype(np.int32)
+    check_cumprod(np.cumprod(data, dtype=np.int32), data)
+    check_cumprod(np.cumprod(data), data, dtype="int64")
+
+    data = np.random.rand(10) > 0.5
+    check_cumprod(np.cumprod(data, dtype=np.int32), data, dtype="int32")
+
+    for in_dtype in ["float32", "float64"]:
+        if target == "metal" and in_dtype == "float64":
+            # float64 is not supported in metal
+            continue
+        data = np.random.randn(10, 10).astype(in_dtype)
+        check_cumprod(np.cumprod(data), data)
+        check_cumprod(np.cumprod(data, axis=0), data, axis=0)
+        check_cumprod(np.cumprod(data, axis=1), data, axis=1)
+
+        data = np.random.randn(10, 5, 10).astype(in_dtype)
+        check_cumprod(np.cumprod(data), data)
+        check_cumprod(np.cumprod(data, axis=0), data, axis=0)
+        check_cumprod(np.cumprod(data, axis=1), data, axis=1)
+        check_cumprod(np.cumprod(data, axis=-1), data, axis=-1)
+
+    for in_dtype in ["int32", "int64"]:
+        data = np.random.randint(-100, 100, size=(100, 100)).astype(in_dtype)
+        check_cumprod(np.cumprod(data, dtype=in_dtype), data)
+        check_cumprod(np.cumprod(data), data, dtype="int64")
+        check_cumprod(np.cumprod(data, axis=0, dtype=in_dtype), data, axis=0)
+        check_cumprod(np.cumprod(data, axis=1, dtype=in_dtype), data, axis=1)
+
+        data = np.random.randint(1 << 30, (1 << 31) - 1, size=(100)).astype(in_dtype)
+        check_cumprod(np.cumprod(data), data, dtype="int64")
+
+    data = np.random.randint(-100, 100, size=(100, 100)).astype("int64")
+
+    expected_result = np.roll(np.cumprod(data), 1)
+    expected_result[0] = 1
+    check_cumprod(expected_result, data, dtype="int64", exclusive=True)
+
+    expected_result = np.roll(np.cumprod(data, axis=0, dtype=in_dtype), 1, axis=0)
+    expected_result[0, :] = 1
+    check_cumprod(expected_result, data, axis=0, exclusive=True)
+
+    expected_result = np.roll(np.cumprod(data, axis=1, dtype=in_dtype), 1, axis=1)
+    expected_result[:, 0] = 1
+    check_cumprod(np.cumprod(data, axis=1, dtype=in_dtype), data, axis=1)
+
+
+if __name__ == "__main__":
+    test_cumprod(tvm.context("cpu"), tvm.target.Target("llvm"))
+    test_cumprod(tvm.context("cuda"), tvm.target.Target("cuda"))
+    test_cumprod(tvm.context("nvptx"), tvm.target.Target("nvptx"))
+    test_cumprod(tvm.context("vulkan"), tvm.target.Target("vulkan"))
+    test_cumprod(tvm.context("metal"), tvm.target.Target("metal"))
diff --git a/tests/python/topi/python/test_topi_cumsum.py b/tests/python/topi/python/test_topi_cumsum.py
index 4afe1f3926f0..0591609b6f3f 100644
--- a/tests/python/topi/python/test_topi_cumsum.py
+++ b/tests/python/topi/python/test_topi_cumsum.py
@@ -102,7 +102,7 @@ def check_cumsum(np_ref, data, axis=None, dtype=None, exclusive=False):
 
 if __name__ == "__main__":
     test_cumsum(tvm.context("cpu"), tvm.target.Target("llvm"))
-    # test_cumsum(tvm.context("cuda"), tvm.target.Target("cuda"))
-    # test_cumsum(tvm.context("nvptx"), tvm.target.Target("nvptx"))
-    # test_cumsum(tvm.context("vulkan"), tvm.target.Target("vulkan"))
-    # test_cumsum(tvm.context("metal"), tvm.target.Target("metal"))
+    test_cumsum(tvm.context("cuda"), tvm.target.Target("cuda"))
+    test_cumsum(tvm.context("nvptx"), tvm.target.Target("nvptx"))
+    test_cumsum(tvm.context("vulkan"), tvm.target.Target("vulkan"))
+    test_cumsum(tvm.context("metal"), tvm.target.Target("metal"))

From b908bdf2c629e9af259a00f0b41f786e548c6e3f Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Mon, 22 Mar 2021 01:37:14 -0700
Subject: [PATCH 04/17] add rudimentary scan implementation

---
 python/tvm/topi/cuda/scan.py | 87 ++++++++++++++++++++++++++++++------
 1 file changed, 74 insertions(+), 13 deletions(-)

diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py
index 84ab5dcf9756..76f19e2ef939 100644
--- a/python/tvm/topi/cuda/scan.py
+++ b/python/tvm/topi/cuda/scan.py
@@ -16,13 +16,16 @@
 # under the License.
 # pylint: disable=invalid-name, too-many-locals, too-many-statements
 "Scan related operators"
+from typing import Callable, Optional
+
 import tvm
 from tvm import te
-from tvm.contrib.thrust import can_use_thrust, can_use_rocthrust
-from ..transform import expand_dims, squeeze, transpose, reshape
-from ..utils import ceil_div, swap, prod, get_const_int
-from ..math import cast
+from tvm.contrib.thrust import can_use_rocthrust, can_use_thrust
+
 from .. import tag
+from ..math import cast
+from ..transform import expand_dims, reshape, squeeze, transpose
+from ..utils import ceil_div, get_const_int, prod, swap
 from .injective import schedule_injective_from_existing
 
 
@@ -347,7 +350,9 @@ def exclusive_scan(
 
     def do_scan(data, output_dtype):
         target = tvm.target.Target.current()
-        if target and (
+
+        # TODO: add support for a prod_scan
+        if target and binop == tvm.generic.add and (
             can_use_thrust(target, "tvm.contrib.thrust.sum_scan")
             or can_use_rocthrust(target, "tvm.contrib.thrust.sum_scan")
         ):
@@ -486,7 +491,29 @@ def traverse(op):
     return s
 
 
-def cumsum(data, axis=None, dtype=None, exclusive=None):
+def cumbinop(
+    data: tvm.te.Tensor,
+    binop: Callable[["tvm.Expr", "tvm.Expr"], "tvm.Expr"],
+    axis: Optional[int] = None,
+    dtype: Optional[str] = None,
+    exclusive: Optional[bool] = None,
+) -> tvm.te.Tensor:
+    """TODO"""
+    if axis is None:
+        axis = 0
+        data = reshape(data, (prod(data.shape),))
+    axis = get_const_int(axis)
+    if exclusive is not None and exclusive != 0:
+        return exclusive_scan(data, axis, output_dtype=dtype, binop=binop)
+    return inclusive_scan(data, axis, output_dtype=dtype, binop=binop)
+
+
+def cumsum(
+    data: tvm.te.Tensor,
+    axis: Optional[int] = None,
+    dtype: Optional[int] = None,
+    exclusive: Optional[bool] = None,
+) -> tvm.te.Tensor:
     """Numpy style cumsum op. Return the cumulative sum of the elements along a given axis.
 
     Parameters
@@ -514,10 +541,44 @@ def cumsum(data, axis=None, dtype=None, exclusive=None):
         The result has the same size as data, and the same shape as data if axis is not None.
         If axis is None, the result is a 1-d array.
     """
-    if axis is None:
-        axis = 0
-        data = reshape(data, (prod(data.shape),))
-    axis = get_const_int(axis)
-    if exclusive is not None and exclusive != 0:
-        return exclusive_scan(data, axis, output_dtype=dtype, binop=tvm.tir.generic.add)
-    return inclusive_scan(data, axis, output_dtype=dtype, binop=tvm.tir.generic.add)
+    return cumbinop(
+        data=data, binop=tvm.tir.generic.add, axis=axis, dtype=dtype, exclusive=exclusive
+    )
+
+
+def cumprod(
+    data: tvm.te.Tensor,
+    axis: Optional[int] = None,
+    dtype: Optional[int] = None,
+    exclusive: Optional[bool] = None,
+):
+    """Numpy style cumprod op. Return the cumulative sum of the elements along a given axis.
+
+    Parameters
+    ----------
+    data : tvm.te.Tensor
+        The input data to the operator.
+
+    axis : int, optional
+        Axis along which the cumulative product is computed. The default (None) is to compute
+        the cumsum over the flattened array.
+
+    dtype : string, optional
+        Type of the returned array and of the accumulator in which the elements are summed.
+        If dtype is not specified, it defaults to the dtype of data.
+
+    exclusive : bool, optional
+        If True, will return exclusive sum in which the first element is not
+        included. In other terms, if True, the j-th output element would be
+        the product of the first (j-1) elements. Otherwise, it would be the product of
+        the first j elements.
+
+    Returns
+    -------
+    result : tvm.te.Tensor
+        The result has the same size as data, and the same shape as data if axis is not None.
+        If axis is None, the result is a 1-d array.
+    """
+    return cumbinop(
+        data=data, binop=tvm.tir.generic.multiply, axis=axis, dtype=dtype, exclusive=exclusive
+    )

From cdedbb352da26edf3e5516be719bb482a81c2196 Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Mon, 22 Mar 2021 01:39:04 -0700
Subject: [PATCH 05/17] add attributes of cumprod node

---
 include/tvm/relay/attrs/transform.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h
index ff344f5e1a85..a0abf906b4e0 100644
--- a/include/tvm/relay/attrs/transform.h
+++ b/include/tvm/relay/attrs/transform.h
@@ -452,6 +452,20 @@ struct CumsumAttrs : public tvm::AttrsNode<CumsumAttrs> {
   }
 };
 
+/*! \brief Attributes used in cumprod operator */
+struct CumprodAttrs : public tvm::AttrsNode<CumprodAttrs> {
+  Integer axis;
+  DataType dtype;
+  Integer exclusive;
+  TVM_DECLARE_ATTRS(CumprodAttrs, "relay.attrs.CumprodAttrs") {
+    TVM_ATTR_FIELD(axis).describe("The axis to multiply over").set_default(NullValue<Integer>());
+    TVM_ATTR_FIELD(dtype).describe("Output data type").set_default(NullValue<DataType>());
+    TVM_ATTR_FIELD(exclusive)
+        .describe("The first element is not included")
+        .set_default(NullValue<Integer>());
+  }
+};
+
 /*! \brief Attributes used in unique operator */
 struct UniqueAttrs : public tvm::AttrsNode<UniqueAttrs> {
   bool sorted;

From 0de8e21a1a7fbd840d7df807e4a5520f362e4dd0 Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Mon, 22 Mar 2021 01:43:59 -0700
Subject: [PATCH 06/17] add cumprod strategy

---
 python/tvm/relay/op/_transform.py       | 19 +++++++++++++----
 python/tvm/relay/op/strategy/generic.py | 27 ++++++++++++++++++-------
 2 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py
index e90263d794bc..57f87421ed96 100644
--- a/python/tvm/relay/op/_transform.py
+++ b/python/tvm/relay/op/_transform.py
@@ -19,16 +19,17 @@
 # pylint: disable=too-many-local-variables, too-many-arguments, no-else-return
 
 from __future__ import absolute_import
+
 import tvm
-from tvm import te
-from tvm.te.hybrid import script
+from tvm import te, topi
 from tvm.runtime import convert
-from tvm import topi
+from tvm.te.hybrid import script
 from tvm.topi.utils import get_const_int, get_const_tuple
+
 from . import op as _reg
 from . import strategy
-from .op import OpPattern
 from ._tensor import elemwise_shape_func
+from .op import OpPattern
 
 _reg.register_broadcast_schedule("broadcast_to")
 _reg.register_broadcast_schedule("broadcast_to_like")
@@ -159,6 +160,16 @@ def compute_cumsum(attrs, inputs, output_type):
 _reg.register_strategy("cumsum", strategy.cumsum_strategy)
 _reg.register_shape_func("cumsum", False, elemwise_shape_func)
 
+# cumprod
+@_reg.register_compute("cumprod")
+def compute_cumprod(attrs, inputs, output_type):
+    """Compute definition of cumprod"""
+    return [topi.cumsum(inputs[0], attrs.axis, attrs.dtype, attrs.exclusive)]
+
+
+_reg.register_strategy("cumprod", strategy.cumprod_strategy)
+_reg.register_shape_func("cumprod", False, elemwise_shape_func)
+
 
 @_reg.register_compute("unique")
 def compute_unique(attrs, inputs, output_type):
diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py
index 04f25640574a..1d973ac41980 100644
--- a/python/tvm/relay/op/strategy/generic.py
+++ b/python/tvm/relay/op/strategy/generic.py
@@ -17,11 +17,12 @@
 """Definition of generic operator strategy."""
 # pylint: disable=invalid-name,unused-argument
 import logging
-
 import re
-from tvm import topi, _ffi, te, ir
-from tvm.topi.utils import get_const_int, get_const_float, get_const_tuple, get_float_tuple
+
+from tvm import _ffi, ir, te, topi
 from tvm.target import generic_func, override_native_generic_func
+from tvm.topi.utils import get_const_float, get_const_int, get_const_tuple, get_float_tuple
+
 from .. import op as _op
 
 logger = logging.getLogger("strategy")
@@ -1463,13 +1464,13 @@ def threefry_split_strategy(attrs, inputs, out_type, target):
     return strategy
 
 
-def wrap_compute_cumsum(topi_compute):
+def wrap_compute_cumbinop(topi_compute):
     """Wrap cumsum topi compute"""
 
-    def _compute_cumsum(attrs, inputs, _):
+    def _compute_cumbinop(attrs, inputs, _):
         return [topi_compute(inputs[0], attrs.axis, attrs.dtype, attrs.exclusive)]
 
-    return _compute_cumsum
+    return _compute_cumbinop
 
 
 @override_native_generic_func("cumsum_strategy")
@@ -1477,13 +1478,25 @@ def cumsum_strategy(attrs, inputs, out_type, target):
     """cumsum generic strategy"""
     strategy = _op.OpStrategy()
     strategy.add_implementation(
-        wrap_compute_cumsum(topi.cumsum),
+        wrap_compute_cumbinop(topi.cumsum),
         wrap_topi_schedule(topi.generic.schedule_extern),
         name="cumsum.generic",
     )
     return strategy
 
 
+@override_native_generic_func("cumprod_strategy")
+def cumprod_strategy(attrs, inputs, out_type, target):
+    """cumprod generic strategy"""
+    strategy = _op.OpStrategy()
+    strategy.add_implementation(
+        wrap_compute_cumbinop(topi.cumprod),
+        wrap_topi_schedule(topi.generic.schedule_extern),
+        name="cumprod.generic",
+    )
+    return strategy
+
+
 def wrap_compute_unique(topi_compute):
     """Wrap unique topi compute"""
 

From 4b48ec94c053cba3b1253526bdaff5bdefb82a3c Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Mon, 22 Mar 2021 01:49:04 -0700
Subject: [PATCH 07/17] add cuda strategy

---
 python/tvm/relay/op/strategy/cuda.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py
index e0d0f165219e..0da2d831c510 100644
--- a/python/tvm/relay/op/strategy/cuda.py
+++ b/python/tvm/relay/op/strategy/cuda.py
@@ -18,11 +18,12 @@
 # pylint: disable=invalid-name,unused-argument,wildcard-import,unused-wildcard-import
 from tvm import topi
 from tvm.auto_scheduler import is_auto_scheduler_enabled
-from tvm.te import SpecializedCondition
 from tvm.contrib import nvcc
 from tvm.contrib.thrust import can_use_thrust
-from .generic import *
+from tvm.te import SpecializedCondition
+
 from .. import op as _op
+from .generic import *
 
 
 @schedule_injective.register(["cuda", "gpu"])
@@ -1017,13 +1018,25 @@ def cumsum_strategy_cuda(attrs, inputs, out_type, target):
     """cumsum cuda strategy"""
     strategy = _op.OpStrategy()
     strategy.add_implementation(
-        wrap_compute_cumsum(topi.cuda.cumsum),
+        wrap_compute_cumbinop(topi.cuda.cumsum),
         wrap_topi_schedule(topi.cuda.schedule_scan),
         name="cumsum.cuda",
     )
     return strategy
 
 
+@cumprod_strategy.register(["cuda", "gpu"])
+def cumprod_strategy_cuda(attrs, inputs, out_type, target):
+    """cumprod cuda strategy"""
+    strategy = _op.OpStrategy()
+    strategy.add_implementation(
+        wrap_compute_cumbinop(topi.cuda.cumprod),
+        wrap_topi_schedule(topi.cuda.schedule_scan),
+        name="cumprod.cuda",
+    )
+    return strategy
+
+
 @unique_strategy.register(["cuda", "gpu"])
 def unique_strategy_cuda(attrs, inputs, out_type, target):
     """unique cuda strategy"""

From 7b3e372f5ebfb4bc4ee0398fc809fc2ae39ab48f Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Mon, 22 Mar 2021 10:28:08 -0700
Subject: [PATCH 08/17] python relay node construction

---
 python/tvm/relay/op/transform.py | 59 ++++++++++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 2 deletions(-)

diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py
index df0ae767460a..189017f6c255 100644
--- a/python/tvm/relay/op/transform.py
+++ b/python/tvm/relay/op/transform.py
@@ -18,11 +18,11 @@
 # pylint: disable=import-outside-toplevel
 """Transform operators."""
 
+from ...tir import expr as _expr
+from ..expr import Constant, Expr, Tuple, TupleWrapper, const
 from . import _make
 from .dyn import _make as _dyn_make
 from .tensor import shape_of
-from ..expr import TupleWrapper, const, Constant, Expr, Tuple
-from ...tir import expr as _expr
 
 
 def cast(data, dtype):
@@ -1577,6 +1577,61 @@ def cumsum(data, axis=None, dtype=None, exclusive=None):
     return _make.cumsum(data, axis, dtype, exclusive)
 
 
+def cumprod(data, axis=None, dtype=None, exclusive=None):
+    """Numpy style cumprod op. Return the cumulative inclusive product of the elements along
+    a given axis.
+
+    Parameters
+    ----------
+    data : relay.Expr
+        The input data to the operator.
+
+    axis : int, optional
+        Axis along which the cumulative product is computed. The default (None) is to compute
+        the cumprod over the flattened array.
+
+    dtype : string, optional
+        Type of the returned array and of the accumulator in which the elements are multiplied.
+        If dtype is not specified, it defaults to the dtype of data.
+
+    exclusive : int, optional
+        If set to 1 will return exclusive product in which the first element is not
+        included. In other terms, if set to 1, the j-th output element would be
+        the product of the first (j-1) elements. Otherwise, it would be the product of
+        the first j elements. The product of zero elements will be 1.
+
+    Returns
+    -------
+    result : relay.Expr
+        The result has the same size as data, and the same shape as data if axis is not None.
+        If axis is None, the result is a 1-d array.
+
+    Examples
+    --------
+    .. code-block:: python
+        a = [[1,2,3], [4,5,6]]
+
+        cumprod(a)  # if axis is not provided, cumprod is done over the flattened input.
+        -> [ 1,  2,  6, 24, 120, 720]
+
+        cumprod(a, dtype="float32")
+        -> [  1.,  2.,  6., 24., 120., 720.]
+
+        cumprod(a, axis=0)  # multiply over rows for each of the 3 columns
+        -> [[1, 2, 3],
+            [4, 10, 18]]
+
+        cumprod(a, axis=1)
+        -> [[ 1,  2,  6],
+            [ 4,  20, 120]]
+
+        a = [1, 1, 1, 0, 1, 1, 0]  # a is a boolean array
+        cumprod(a, dtype=int32)  # dtype should be provided to get the expected results
+        -> [1, 1, 1, 0, 0, 0, 0]
+    """
+    return _make.cumprod(data, axis, dtype, exclusive)
+
+
 def unique(data, is_sorted=True, return_counts=False):
     """
     Find the unique elements of a 1-D tensor. Please note `output` and `counts` are all padded to

From 5062a3c8374a90bbdff57cb7ec864a292ac9fea9 Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Mon, 22 Mar 2021 13:14:58 -0700
Subject: [PATCH 09/17] change attrs to be reusuable

---
 include/tvm/relay/attrs/transform.h | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h
index a0abf906b4e0..d70f289001d4 100644
--- a/include/tvm/relay/attrs/transform.h
+++ b/include/tvm/relay/attrs/transform.h
@@ -438,27 +438,13 @@ struct MatrixSetDiagAttrs : public tvm::AttrsNode<MatrixSetDiagAttrs> {
   }
 };  // struct MatrixSetDiagAttrs
 
-/*! \brief Attributes used in cumsum operator */
-struct CumsumAttrs : public tvm::AttrsNode<CumsumAttrs> {
+/*! \brief Attributes used in cumsum and cumprod operator */
+struct CumbinopAttrs : public tvm::AttrsNode<CumbinopAttrs> {
   Integer axis;
   DataType dtype;
   Integer exclusive;
-  TVM_DECLARE_ATTRS(CumsumAttrs, "relay.attrs.CumsumAttrs") {
-    TVM_ATTR_FIELD(axis).describe("The axis to sum over").set_default(NullValue<Integer>());
-    TVM_ATTR_FIELD(dtype).describe("Output data type").set_default(NullValue<DataType>());
-    TVM_ATTR_FIELD(exclusive)
-        .describe("The first element is not included")
-        .set_default(NullValue<Integer>());
-  }
-};
-
-/*! \brief Attributes used in cumprod operator */
-struct CumprodAttrs : public tvm::AttrsNode<CumprodAttrs> {
-  Integer axis;
-  DataType dtype;
-  Integer exclusive;
-  TVM_DECLARE_ATTRS(CumprodAttrs, "relay.attrs.CumprodAttrs") {
-    TVM_ATTR_FIELD(axis).describe("The axis to multiply over").set_default(NullValue<Integer>());
+  TVM_DECLARE_ATTRS(CumbinopAttrs, "relay.attrs.CumbinopAttrs") {
+    TVM_ATTR_FIELD(axis).describe("The axis to operate over").set_default(NullValue<Integer>());
     TVM_ATTR_FIELD(dtype).describe("Output data type").set_default(NullValue<DataType>());
     TVM_ATTR_FIELD(exclusive)
         .describe("The first element is not included")

From 67d9441b80179677edd96a4155355b0d082486ec Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Mon, 22 Mar 2021 13:19:27 -0700
Subject: [PATCH 10/17] add cumprod nodes

---
 src/relay/op/tensor/transform.cc | 34 +++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
index b65068bd0506..2e6f76929eb3 100644
--- a/src/relay/op/tensor/transform.cc
+++ b/src/relay/op/tensor/transform.cc
@@ -3772,20 +3772,20 @@ RELAY_REGISTER_OP("adv_index")
     .set_attr<TOpPattern>("TOpPattern", kInjective)
     .set_attr<FTVMCompute>("FTVMCompute", AdvIndexCompute);
 
-TVM_REGISTER_NODE_TYPE(CumsumAttrs);
+TVM_REGISTER_NODE_TYPE(CumbinopAttrs);
 
-bool CumsumRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
-               const TypeReporter& reporter) {
+bool CumbinopRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+                 const TypeReporter& reporter) {
   // types: [data, output]
   ICHECK_EQ(types.size(), 2) << "Expects two types, one for the input and another for the output";
   const auto* data = types[0].as<TensorTypeNode>();
   if (data == nullptr) {
     ICHECK(types[0].as<IncompleteTypeNode>())
-        << "cumsum: expect input type to be TensorType but get " << types[0];
+        << "cumbinop: expect input type to be TensorType but get " << types[0];
     return false;
   }
 
-  const auto* param = attrs.as<CumsumAttrs>();
+  const auto* param = attrs.as<CumbinopAttrs>();
 
   auto dtype = param->dtype;
   if (dtype.is_void()) {
@@ -3806,7 +3806,7 @@ bool CumsumRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
 }
 
 Expr MakeCumsum(Expr data, Integer axis, DataType dtype, Integer exclusive) {
-  auto attrs = make_object<CumsumAttrs>();
+  auto attrs = make_object<CumbinopAttrs>();
   attrs->dtype = dtype;
   attrs->axis = axis;
   attrs->exclusive = exclusive;
@@ -3822,7 +3822,27 @@ RELAY_REGISTER_OP("cumsum")
     .set_num_inputs(1)
     .add_argument("data", "Tensor", "The input tensor.")
     .set_support_level(3)
-    .add_type_rel("Cumsum", CumsumRel)
+    .add_type_rel("Cumsum", CumbinopRel)
+    .set_attr<TOpPattern>("TOpPattern", kOpaque);
+
+Expr MakeCumprod(Expr data, Integer axis, DataType dtype, Integer exclusive) {
+  auto attrs = make_object<CumbinopAttrs>();
+  attrs->dtype = dtype;
+  attrs->axis = axis;
+  attrs->exclusive = exclusive;
+  static const Op& op = Op::Get("cumprod");
+  return Call(op, {data}, Attrs(attrs), {});
+}
+
+TVM_REGISTER_GLOBAL("relay.op._make.cumprod").set_body_typed(MakeCumprod);
+
+RELAY_REGISTER_OP("cumprod")
+    .describe(
+        R"doc(Return the cumulative product of the elements along a given axis.)doc" TVM_ADD_FILELINE)
+    .set_num_inputs(1)
+    .add_argument("data", "Tensor", "The input tensor.")
+    .set_support_level(3)
+    .add_type_rel("Cumprod", CumbinopRel)
     .set_attr<TOpPattern>("TOpPattern", kOpaque);
 
 TVM_REGISTER_NODE_TYPE(UniqueAttrs);

From f75d64cd3d02e35149ab3f6a9310484fca79a39b Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Mon, 22 Mar 2021 14:18:22 -0700
Subject: [PATCH 11/17] complete tests

---
 tests/python/relay/test_op_level3.py | 62 +++++++++++++++++++---------
 1 file changed, 43 insertions(+), 19 deletions(-)

diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py
index d2a5090943c3..2e411f3f273c 100644
--- a/tests/python/relay/test_op_level3.py
+++ b/tests/python/relay/test_op_level3.py
@@ -16,16 +16,16 @@
 # under the License.
 """ Support level3 operator test cases.
 """
+from typing import Callable, Optional
+
 import numpy as np
 import pytest
 import tvm
-from tvm import te
-from tvm import relay
+import tvm.testing
+from tvm import relay, te
 from tvm.error import TVMError
 from tvm.relay import create_executor, transform
 from tvm.relay.testing import check_grad, run_infer_type
-from typing import Optional
-import tvm.testing
 
 
 def test_zeros_ones():
@@ -1758,12 +1758,26 @@ def verify_adv_index(data_shape, index_shapes):
     verify_adv_index((10, 5, 15), [(1, 2, 1), (1, 2, 7)])
 
 
-@tvm.testing.parametrize_targets
-def test_cumsum(target, ctx):
-    def verify_cumsum(data_np, np_out, axis=None, out_dtype=None, rtol=1e-5, atol=1e-5):
+# Helper for testing binop functions
+cumbinops_supported = {"cumsum": relay.op.cumsum, "cumprod": relay.op.cumprod}
+
+
+def run_binop_tests(target, ctx, binop_type: str, gt_func: Callable[..., np.array]):
+    def assert_relay_cumbinop(
+        data_np: np.array,
+        np_out: np.array,
+        axis: int = None,
+        out_dtype: str = None,
+        rtol: float = 1e-5,
+        atol: float = 1e-5,
+    ):
         inp = relay.var("data", relay.TensorType(data_np.shape, str(data_np.dtype)))
 
-        out = relay.op.cumsum(inp, axis, out_dtype)
+        if binop_type not in cumbinops_supported.keys():
+            raise ValueError(
+                f"Unknown function {binop_type}. Options: {cumbinops_supported.keys()}"
+            )
+        out = cumbinops_supported[binop_type](inp, axis, out_dtype)
         func = relay.Function([inp], out)
 
         for kind in ["graph", "debug"]:
@@ -1772,24 +1786,34 @@ def verify_cumsum(data_np, np_out, axis=None, out_dtype=None, rtol=1e-5, atol=1e
             tvm.testing.assert_allclose(op_res.asnumpy(), np_out, rtol=rtol, atol=atol)
 
     data = np.array([2, 3, 0])
-    verify_cumsum(data, np.cumsum(data))
-    verify_cumsum(data, np.cumsum(data), out_dtype="int64")
+    assert_relay_cumbinop(data, gt_func(data))
+    assert_relay_cumbinop(data, gt_func(data), out_dtype="int64")
 
     data = np.random.randn(10, 10)
-    verify_cumsum(data, np.cumsum(data))
-    verify_cumsum(data, np.cumsum(data, axis=0), axis=0)
-    verify_cumsum(data, np.cumsum(data, axis=1), axis=1)
+    assert_relay_cumbinop(data, gt_func(data))
+    assert_relay_cumbinop(data, gt_func(data, axis=0), axis=0)
+    assert_relay_cumbinop(data, gt_func(data, axis=1), axis=1)
 
     data = np.random.randn(10, 5, 10).astype("float32")
-    verify_cumsum(data, np.cumsum(data), rtol=1e-4, atol=1e-4)
-    verify_cumsum(data, np.cumsum(data, axis=0), axis=0, rtol=1e-4, atol=1e-4)
-    verify_cumsum(data, np.cumsum(data, axis=1), axis=1, rtol=1e-4, atol=1e-4)
-    verify_cumsum(data, np.cumsum(data, axis=-1), axis=-1, rtol=1e-4, atol=1e-4)
+    assert_relay_cumbinop(data, gt_func(data), rtol=1e-4, atol=1e-4)
+    assert_relay_cumbinop(data, gt_func(data, axis=0), axis=0, rtol=1e-4, atol=1e-4)
+    assert_relay_cumbinop(data, gt_func(data, axis=1), axis=1, rtol=1e-4, atol=1e-4)
+    assert_relay_cumbinop(data, gt_func(data, axis=-1), axis=-1, rtol=1e-4, atol=1e-4)
 
     data = np.random.rand(10) > 0.5
     data = data.astype(np.int32)
-    verify_cumsum(data, np.cumsum(data, dtype=np.int32))
-    verify_cumsum(data, np.cumsum(data, dtype="int64"), out_dtype="int64")
+    assert_relay_cumbinop(data, gt_func(data, dtype=np.int32))
+    assert_relay_cumbinop(data, gt_func(data, dtype="int64"), out_dtype="int64")
+
+
+@tvm.testing.parametrize_targets
+def test_cumsum(target, ctx):
+    run_binop_tests(target, ctx, binop_type="cumsum", gt_func=np.cumsum)
+
+
+@tvm.testing.parametrize_targets
+def test_cumprod(target, ctx):
+    run_binop_tests(target, ctx, binop_type="cumprod", gt_func=np.cumproduct)
 
 
 @tvm.testing.parametrize_targets

From b3290bde1ac9a1b3506255b11b510e3f38fdd485 Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Mon, 22 Mar 2021 14:43:12 -0700
Subject: [PATCH 12/17] Fix some typos about sum --> prod

typos fix sum -> prod

more typos

more typo fixes

more typos

add doc strings
---
 python/tvm/relay/op/_transform.py       |  4 +-
 python/tvm/relay/op/strategy/generic.py |  2 +-
 python/tvm/topi/cuda/scan.py            | 57 +++++++++++++++++++++----
 python/tvm/topi/cumprod.py              |  8 ++--
 python/tvm/topi/cumsum.py               | 43 ++++++++++++++++++-
 5 files changed, 97 insertions(+), 17 deletions(-)

diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py
index 57f87421ed96..fe2b2f91a2dd 100644
--- a/python/tvm/relay/op/_transform.py
+++ b/python/tvm/relay/op/_transform.py
@@ -162,9 +162,9 @@ def compute_cumsum(attrs, inputs, output_type):
 
 # cumprod
 @_reg.register_compute("cumprod")
-def compute_cumprod(attrs, inputs, output_type):
+def compute_cumprod(attrs, inputs, output_type)
     """Compute definition of cumprod"""
-    return [topi.cumsum(inputs[0], attrs.axis, attrs.dtype, attrs.exclusive)]
+    return [topi.cumprod(inputs[0], attrs.axis, attrs.dtype, attrs.exclusive)]
 
 
 _reg.register_strategy("cumprod", strategy.cumprod_strategy)
diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py
index 1d973ac41980..3864d33a7c31 100644
--- a/python/tvm/relay/op/strategy/generic.py
+++ b/python/tvm/relay/op/strategy/generic.py
@@ -1465,7 +1465,7 @@ def threefry_split_strategy(attrs, inputs, out_type, target):
 
 
 def wrap_compute_cumbinop(topi_compute):
-    """Wrap cumsum topi compute"""
+    """Wrap cumbinop style topi compute"""
 
     def _compute_cumbinop(attrs, inputs, _):
         return [topi_compute(inputs[0], attrs.axis, attrs.dtype, attrs.exclusive)]
diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py
index 76f19e2ef939..928c6257a916 100644
--- a/python/tvm/topi/cuda/scan.py
+++ b/python/tvm/topi/cuda/scan.py
@@ -352,9 +352,13 @@ def do_scan(data, output_dtype):
         target = tvm.target.Target.current()
 
         # TODO: add support for a prod_scan
-        if target and binop == tvm.generic.add and (
-            can_use_thrust(target, "tvm.contrib.thrust.sum_scan")
-            or can_use_rocthrust(target, "tvm.contrib.thrust.sum_scan")
+        if (
+            target
+            and binop == tvm.generic.add
+            and (
+                can_use_thrust(target, "tvm.contrib.thrust.sum_scan")
+                or can_use_rocthrust(target, "tvm.contrib.thrust.sum_scan")
+            )
         ):
             return scan_thrust(
                 data, output_dtype, exclusive=True, return_reduction=return_reduction, binop=binop
@@ -498,7 +502,44 @@ def cumbinop(
     dtype: Optional[str] = None,
     exclusive: Optional[bool] = None,
 ) -> tvm.te.Tensor:
-    """TODO"""
+    """Cumulative binary operator with similar axis behavior as np.cumsum and np.cumprod.
+    
+    See cumprod and cumsum for an example of use. 
+    
+    E.g. if * is your binary operator and the input tensor is [1, 2, 3, 4] the output may be 
+    [1, 1 * 2, 1 * 2 * 3, 1 * 2 * 3 * 4]
+    
+    Parameters
+    ----------
+    data : tvm.te.Tensor
+        The input data to the operator.
+
+    binop: Callable (tvm.Expr, tvm.Expr) -> tvm.Expr
+        A binary operator which should be associative and commutative. E.g. if * is your 
+        operator then a * (b * c) = (a * b) * c and a * b = b * a
+
+    axis : int, optional
+        Axis along which the operation is computed. The default (None) is to compute
+        the cumulative operation over the flattened array.
+
+    dtype : string, optional
+        Type of the returned array and of the accumulator in which the elements are computed.
+        If dtype is not specified, it defaults to the dtype of data.
+
+    exclusive : int, optional
+        If set to 1 will return exclusive cumulative operation in which the first element is not
+        included. In other terms, if set to 1, the j-th output element would be
+        the cumulative operation of the first (j-1) elements. Otherwise, it would be the 
+        cumulative operation of the first j elements.
+
+        TODO: what happens to the identity element?
+
+    Returns
+    -------
+    result : tvm.te.Tensor
+        The result has the same size as data, and the same shape as data if axis is not None.
+        If axis is None, the result is a 1-d array.
+    """
     if axis is None:
         axis = 0
         data = reshape(data, (prod(data.shape),))
@@ -552,7 +593,7 @@ def cumprod(
     dtype: Optional[int] = None,
     exclusive: Optional[bool] = None,
 ):
-    """Numpy style cumprod op. Return the cumulative sum of the elements along a given axis.
+    """Numpy style cumprod op. Return the cumulative product of the elements along a given axis.
 
     Parameters
     ----------
@@ -561,14 +602,14 @@ def cumprod(
 
     axis : int, optional
         Axis along which the cumulative product is computed. The default (None) is to compute
-        the cumsum over the flattened array.
+        the cumproduct over the flattened array.
 
     dtype : string, optional
-        Type of the returned array and of the accumulator in which the elements are summed.
+        Type of the returned array and of the accumulator in which the elements are multiplied.
         If dtype is not specified, it defaults to the dtype of data.
 
     exclusive : bool, optional
-        If True, will return exclusive sum in which the first element is not
+        If True, will return exclusive product in which the first element is not
         included. In other terms, if True, the j-th output element would be
         the product of the first (j-1) elements. Otherwise, it would be the product of
         the first j elements.
diff --git a/python/tvm/topi/cumprod.py b/python/tvm/topi/cumprod.py
index 3245184fbe61..93aa033ac621 100644
--- a/python/tvm/topi/cumprod.py
+++ b/python/tvm/topi/cumprod.py
@@ -30,7 +30,7 @@ def cumprod(
     dtype: Optional[int] = None,
     exclusive: Optional[bool] = None,
 ):
-    """Numpy style cumprod op. Return the cumulative sum of the elements along a given axis.
+    """Numpy style cumprod op. Return the cumulative product of the elements along a given axis.
 
     Parameters
     ----------
@@ -39,14 +39,14 @@ def cumprod(
 
     axis : int, optional
         Axis along which the cumulative product is computed. The default (None) is to compute
-        the cumsum over the flattened array.
+        the cumprod over the flattened array.
 
     dtype : string, optional
-        Type of the returned array and of the accumulator in which the elements are summed.
+        Type of the returned array and of the accumulator in which the elements are multiplied.
         If dtype is not specified, it defaults to the dtype of data.
 
     exclusive : bool, optional
-        If True, will return exclusive sum in which the first element is not
+        If True, will return exclusive product in which the first element is not
         included. In other terms, if True, the j-th output element would be
         the product of the first (j-1) elements. Otherwise, it would be the product of
         the first j elements.
diff --git a/python/tvm/topi/cumsum.py b/python/tvm/topi/cumsum.py
index e5e7a7036ae1..355e1510a4f3 100644
--- a/python/tvm/topi/cumsum.py
+++ b/python/tvm/topi/cumsum.py
@@ -35,8 +35,47 @@ def cumbinop(
     dtype: Optional[str] = None,
     exclusive: Optional[bool] = None,
 ) -> tvm.te.Tensor:
-    """
-    TODO
+    """Cumulative binary operator with similar axis behavior as np.cumsum and np.cumprod.
+    
+    See cumprod and cumsum for an example of use. 
+    
+    E.g. if * is your binary operator and the input tensor is [1, 2, 3, 4] the output may be 
+    [1, 1 * 2, 1 * 2 * 3, 1 * 2 * 3 * 4]
+    
+    Parameters
+    ----------
+    data : tvm.te.Tensor
+        The input data to the operator.
+
+    binop: Callable (tvm.Expr, tvm.Expr) -> tvm.Expr
+        A binary operator which should be associative and commutative. E.g. if * is your 
+        operator then a * (b * c) = (a * b) * c and a * b = b * a
+
+    identity_value: tvm.Expr
+        A value for the binary operation which provides the identity property. E.g. if * is 
+        your operator and i is the identity_value then a * i = a for all a in the domain of 
+        your operation.
+
+    axis : int, optional
+        Axis along which the operation is computed. The default (None) is to compute
+        the cumulative operation over the flattened array.
+
+    dtype : string, optional
+        Type of the returned array and of the accumulator in which the elements are computed.
+        If dtype is not specified, it defaults to the dtype of data.
+
+    exclusive : int, optional
+        If set to 1 will return exclusive cumulative operation in which the first element is not
+        included. In other terms, if set to 1, the j-th output element would be
+        the cumulative operation of the first (j-1) elements. Otherwise, it would be the 
+        cumulative operation of the first j elements. The cumulative operation of zero elements
+        is assumed to be the identity_value.
+
+    Returns
+    -------
+    result : tvm.te.Tensor
+        The result has the same size as data, and the same shape as data if axis is not None.
+        If axis is None, the result is a 1-d array.
     """
     if dtype is None or dtype == "":
         dtype = data.dtype

From 6f944d1454f93c0a3ed56399a11f4009466b7f76 Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Mon, 22 Mar 2021 16:44:04 -0700
Subject: [PATCH 13/17] Use Bool instead of int to represent exclusive

make exclusive a bool up and down stack

fix x

fix bool err

it is a bool now

fix

fix thing

formatting to pass linter

lint python

cumprod pylint

fix attribute

fix ordering

add exclusivity tests for end to end

fix things

cuda identity_value
---
 include/tvm/relay/attrs/transform.h  |  6 +-
 python/tvm/relay/op/_transform.py    |  2 +-
 python/tvm/topi/cuda/scan.py         | 90 +++++++++++++++++++++-------
 python/tvm/topi/cumprod.py           |  2 +-
 python/tvm/topi/cumsum.py            | 30 +++++-----
 src/relay/op/tensor/transform.cc     |  4 +-
 tests/python/relay/test_op_level3.py | 25 ++++++--
 7 files changed, 111 insertions(+), 48 deletions(-)

diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h
index d70f289001d4..4827579d2211 100644
--- a/include/tvm/relay/attrs/transform.h
+++ b/include/tvm/relay/attrs/transform.h
@@ -442,13 +442,15 @@ struct MatrixSetDiagAttrs : public tvm::AttrsNode<MatrixSetDiagAttrs> {
 struct CumbinopAttrs : public tvm::AttrsNode<CumbinopAttrs> {
   Integer axis;
   DataType dtype;
-  Integer exclusive;
+  Bool exclusive = Bool(false);
   TVM_DECLARE_ATTRS(CumbinopAttrs, "relay.attrs.CumbinopAttrs") {
     TVM_ATTR_FIELD(axis).describe("The axis to operate over").set_default(NullValue<Integer>());
     TVM_ATTR_FIELD(dtype).describe("Output data type").set_default(NullValue<DataType>());
+
+    // Default is 0 which is "false"
     TVM_ATTR_FIELD(exclusive)
         .describe("The first element is not included")
-        .set_default(NullValue<Integer>());
+        .set_default(Bool(false));
   }
 };
 
diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py
index fe2b2f91a2dd..16262833d1bf 100644
--- a/python/tvm/relay/op/_transform.py
+++ b/python/tvm/relay/op/_transform.py
@@ -162,7 +162,7 @@ def compute_cumsum(attrs, inputs, output_type):
 
 # cumprod
 @_reg.register_compute("cumprod")
-def compute_cumprod(attrs, inputs, output_type)
+def compute_cumprod(attrs, inputs, output_type):
     """Compute definition of cumprod"""
     return [topi.cumprod(inputs[0], attrs.axis, attrs.dtype, attrs.exclusive)]
 
diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py
index 928c6257a916..b6452417ba79 100644
--- a/python/tvm/topi/cuda/scan.py
+++ b/python/tvm/topi/cuda/scan.py
@@ -16,7 +16,7 @@
 # under the License.
 # pylint: disable=invalid-name, too-many-locals, too-many-statements
 "Scan related operators"
-from typing import Callable, Optional
+from typing import Callable, Optional, Union
 
 import tvm
 from tvm import te
@@ -35,7 +35,7 @@ def _get_thrust_func_name(tvmop):
     return tvmop_to_thrust_func_name[tvmop]
 
 
-def exclusive_scan_ir(data, output, reduction=None, binop=tvm.tir.generic.add):
+def exclusive_scan_ir(data, output, reduction=None, binop=tvm.tir.generic.add, identity_value=0):
     """Low level IR to do exclusive sum scan along rows of 2D input.
 
     Parameters
@@ -53,6 +53,11 @@ def exclusive_scan_ir(data, output, reduction=None, binop=tvm.tir.generic.add):
         A binary associative op to use for scan. The function takes two TIR expressions
         and produce a new TIR expression. By default it uses tvm.tir.generic.add to compute
         prefix sum.
+
+    identity_value: int or float
+        A value for the binary operation which provides the identity property. E.g. if * is
+        your operator and i is the identity_value then a * i = a for all a in the domain of
+        your operation.
     """
 
     batch_size = prod(data.shape[:-1])
@@ -137,7 +142,7 @@ def exclusive_scan_ir(data, output, reduction=None, binop=tvm.tir.generic.add):
             with ib.if_scope(bx < batch_size):
                 if reduction is not None:
                     reduction[bx] = output[(bx + 1) * scan_axis_size - 1]
-                output[(bx + 1) * scan_axis_size - 1] = cast(0, out_dtype)
+                output[(bx + 1) * scan_axis_size - 1] = cast(identity_value, out_dtype)
 
         with ib.for_range(0, lim, dtype="int64") as l2_width:
             width = 2 << (lim - l2_width - 1)
@@ -312,7 +317,12 @@ def scan_thrust(
 
 
 def exclusive_scan(
-    data, axis=-1, return_reduction=False, output_dtype=None, binop=tvm.tir.generic.add
+    data,
+    axis=-1,
+    return_reduction=False,
+    output_dtype=None,
+    binop=tvm.tir.generic.add,
+    identity_value=0,
 ):
     """Do exclusive scan on 1D or multidimensional input.
 
@@ -338,6 +348,11 @@ def exclusive_scan(
         and produce a new TIR expression. By default it uses tvm.tir.generic.add to compute
         prefix sum.
 
+    identity_value: int or float
+        A value for the binary operation which provides the identity property. E.g. if * is
+        your operator and i is the identity_value then a * i = a for all a in the domain of
+        your operation.
+
     Returns
     -------
     output : tvm.te.Tensor
@@ -354,7 +369,7 @@ def do_scan(data, output_dtype):
         # TODO: add support for a prod_scan
         if (
             target
-            and binop == tvm.generic.add
+            and binop == tvm.tir.generic.add
             and (
                 can_use_thrust(target, "tvm.contrib.thrust.sum_scan")
                 or can_use_rocthrust(target, "tvm.contrib.thrust.sum_scan")
@@ -375,7 +390,9 @@ def do_scan(data, output_dtype):
             output, reduction = te.extern(
                 [data.shape, data.shape[:-1]],
                 [data],
-                lambda ins, outs: exclusive_scan_ir(ins[0], outs[0], outs[1], binop=binop),
+                lambda ins, outs: exclusive_scan_ir(
+                    ins[0], outs[0], outs[1], binop=binop, identity_value=identity_value
+                ),
                 dtype=[data.dtype, output_dtype],
                 in_buffers=[data_buf],
                 name="exclusive_scan",
@@ -385,7 +402,9 @@ def do_scan(data, output_dtype):
             output = te.extern(
                 [data.shape],
                 [data],
-                lambda ins, outs: exclusive_scan_ir(ins[0], outs[0], binop=binop),
+                lambda ins, outs: exclusive_scan_ir(
+                    ins[0], outs[0], binop=binop, identity_value=identity_value
+                ),
                 dtype=[output_dtype],
                 in_buffers=[data_buf],
                 out_buffers=[output_buf],
@@ -432,7 +451,7 @@ def do_scan(data, output_dtype):
     return output
 
 
-def inclusive_scan(data, axis=-1, output_dtype=None, binop=tvm.tir.generic.add):
+def inclusive_scan(data, axis=-1, output_dtype=None, binop=tvm.tir.generic.add, identity_value=0):
     """Do inclusive scan on 1D or multidimensional input.
 
     Parameters
@@ -451,12 +470,19 @@ def inclusive_scan(data, axis=-1, output_dtype=None, binop=tvm.tir.generic.add):
         and produce a new TIR expression. By default it uses tvm.tir.generic.add to compute
         prefix sum.
 
+    identity_value: int or float
+        A value for the binary operation which provides the identity property. E.g. if * is
+        your operator and i is the identity_value then a * i = a for all a in the domain of
+        your operation.
+
     Returns
     -------
     output : tvm.te.Tensor
         A N-D tensor of the same rank N as the input data.
     """
-    ex_scan = exclusive_scan(data, axis, output_dtype=output_dtype, binop=binop)
+    ex_scan = exclusive_scan(
+        data, axis, output_dtype=output_dtype, binop=binop, identity_value=identity_value
+    )
 
     if output_dtype is not None and data.dtype != output_dtype and output_dtype != "":
         data = cast(data, output_dtype)
@@ -498,26 +524,32 @@ def traverse(op):
 def cumbinop(
     data: tvm.te.Tensor,
     binop: Callable[["tvm.Expr", "tvm.Expr"], "tvm.Expr"],
+    identity_value: Union[float, int],
     axis: Optional[int] = None,
     dtype: Optional[str] = None,
     exclusive: Optional[bool] = None,
 ) -> tvm.te.Tensor:
     """Cumulative binary operator with similar axis behavior as np.cumsum and np.cumprod.
-    
-    See cumprod and cumsum for an example of use. 
-    
-    E.g. if * is your binary operator and the input tensor is [1, 2, 3, 4] the output may be 
+
+    See cumprod and cumsum for an example of use.
+
+    E.g. if * is your binary operator and the input tensor is [1, 2, 3, 4] the output may be
     [1, 1 * 2, 1 * 2 * 3, 1 * 2 * 3 * 4]
-    
+
     Parameters
     ----------
     data : tvm.te.Tensor
         The input data to the operator.
 
     binop: Callable (tvm.Expr, tvm.Expr) -> tvm.Expr
-        A binary operator which should be associative and commutative. E.g. if * is your 
+        A binary operator which should be associative and commutative. E.g. if * is your
         operator then a * (b * c) = (a * b) * c and a * b = b * a
 
+    identity_value: int or float
+        A value for the binary operation which provides the identity property. E.g. if * is
+        your operator and i is the identity_value then a * i = a for all a in the domain of
+        your operation.
+
     axis : int, optional
         Axis along which the operation is computed. The default (None) is to compute
         the cumulative operation over the flattened array.
@@ -529,11 +561,9 @@ def cumbinop(
     exclusive : int, optional
         If set to 1 will return exclusive cumulative operation in which the first element is not
         included. In other terms, if set to 1, the j-th output element would be
-        the cumulative operation of the first (j-1) elements. Otherwise, it would be the 
+        the cumulative operation of the first (j-1) elements. Otherwise, it would be the
         cumulative operation of the first j elements.
 
-        TODO: what happens to the identity element?
-
     Returns
     -------
     result : tvm.te.Tensor
@@ -544,9 +574,13 @@ def cumbinop(
         axis = 0
         data = reshape(data, (prod(data.shape),))
     axis = get_const_int(axis)
-    if exclusive is not None and exclusive != 0:
-        return exclusive_scan(data, axis, output_dtype=dtype, binop=binop)
-    return inclusive_scan(data, axis, output_dtype=dtype, binop=binop)
+    if exclusive is not None and exclusive:
+        return exclusive_scan(
+            data, axis, output_dtype=dtype, binop=binop, identity_value=identity_value
+        )
+    return inclusive_scan(
+        data, axis, output_dtype=dtype, binop=binop, identity_value=identity_value
+    )
 
 
 def cumsum(
@@ -583,7 +617,12 @@ def cumsum(
         If axis is None, the result is a 1-d array.
     """
     return cumbinop(
-        data=data, binop=tvm.tir.generic.add, axis=axis, dtype=dtype, exclusive=exclusive
+        data=data,
+        binop=tvm.tir.generic.add,
+        identity_value=0,
+        axis=axis,
+        dtype=dtype,
+        exclusive=exclusive,
     )
 
 
@@ -621,5 +660,10 @@ def cumprod(
         If axis is None, the result is a 1-d array.
     """
     return cumbinop(
-        data=data, binop=tvm.tir.generic.multiply, axis=axis, dtype=dtype, exclusive=exclusive
+        data=data,
+        binop=tvm.tir.generic.multiply,
+        identity_value=1,
+        axis=axis,
+        dtype=dtype,
+        exclusive=exclusive,
     )
diff --git a/python/tvm/topi/cumprod.py b/python/tvm/topi/cumprod.py
index 93aa033ac621..6108212c8a51 100644
--- a/python/tvm/topi/cumprod.py
+++ b/python/tvm/topi/cumprod.py
@@ -16,7 +16,7 @@
 # under the License.
 # pylint: disable=invalid-name
 """Cumprod operator"""
-from typing import Callable, Optional
+from typing import Optional
 
 import tvm
 
diff --git a/python/tvm/topi/cumsum.py b/python/tvm/topi/cumsum.py
index 355e1510a4f3..709155ed79bc 100644
--- a/python/tvm/topi/cumsum.py
+++ b/python/tvm/topi/cumsum.py
@@ -36,24 +36,24 @@ def cumbinop(
     exclusive: Optional[bool] = None,
 ) -> tvm.te.Tensor:
     """Cumulative binary operator with similar axis behavior as np.cumsum and np.cumprod.
-    
-    See cumprod and cumsum for an example of use. 
-    
-    E.g. if * is your binary operator and the input tensor is [1, 2, 3, 4] the output may be 
+
+    See cumprod and cumsum for an example of use.
+
+    E.g. if * is your binary operator and the input tensor is [1, 2, 3, 4] the output may be
     [1, 1 * 2, 1 * 2 * 3, 1 * 2 * 3 * 4]
-    
+
     Parameters
     ----------
     data : tvm.te.Tensor
         The input data to the operator.
 
     binop: Callable (tvm.Expr, tvm.Expr) -> tvm.Expr
-        A binary operator which should be associative and commutative. E.g. if * is your 
+        A binary operator which should be associative and commutative. E.g. if * is your
         operator then a * (b * c) = (a * b) * c and a * b = b * a
 
     identity_value: tvm.Expr
-        A value for the binary operation which provides the identity property. E.g. if * is 
-        your operator and i is the identity_value then a * i = a for all a in the domain of 
+        A value for the binary operation which provides the identity property. E.g. if * is
+        your operator and i is the identity_value then a * i = a for all a in the domain of
         your operation.
 
     axis : int, optional
@@ -67,7 +67,7 @@ def cumbinop(
     exclusive : int, optional
         If set to 1 will return exclusive cumulative operation in which the first element is not
         included. In other terms, if set to 1, the j-th output element would be
-        the cumulative operation of the first (j-1) elements. Otherwise, it would be the 
+        the cumulative operation of the first (j-1) elements. Otherwise, it would be the
         cumulative operation of the first j elements. The cumulative operation of zero elements
         is assumed to be the identity_value.
 
@@ -80,6 +80,9 @@ def cumbinop(
     if dtype is None or dtype == "":
         dtype = data.dtype
 
+    if exclusive is None:
+        exclusive = False
+
     def maybe_cast(x):
         if dtype != data.dtype:
             return cast(x, dtype)
@@ -108,9 +111,6 @@ def maybe_cast(x):
             elif i > axis:
                 axis_mul_after *= value
 
-    if exclusive is None:
-        exclusive = False
-
     def gen_ir(data_buf, out_buf):
         ib = ir_builder.create()
         data_buf = ib.buffer_ptr(data_buf)
@@ -128,10 +128,10 @@ def gen_ir(data_buf, out_buf):
                 k = _k + 1
                 cur_idx = base_idx + k * axis_mul_after
                 prev_idx = base_idx + (k - 1) * axis_mul_after
-                if exclusive == 0:
-                    out_buf[cur_idx] = binop(out_buf[prev_idx], maybe_cast(data_buf[cur_idx]))
-                else:
+                if exclusive:
                     out_buf[cur_idx] = binop(out_buf[prev_idx], maybe_cast(data_buf[prev_idx]))
+                else:
+                    out_buf[cur_idx] = binop(out_buf[prev_idx], maybe_cast(data_buf[cur_idx]))
 
         return ib.get()
 
diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
index 2e6f76929eb3..96078da72a12 100644
--- a/src/relay/op/tensor/transform.cc
+++ b/src/relay/op/tensor/transform.cc
@@ -3805,7 +3805,7 @@ bool CumbinopRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
   return true;
 }
 
-Expr MakeCumsum(Expr data, Integer axis, DataType dtype, Integer exclusive) {
+Expr MakeCumsum(Expr data, Integer axis, DataType dtype, Bool exclusive) {
   auto attrs = make_object<CumbinopAttrs>();
   attrs->dtype = dtype;
   attrs->axis = axis;
@@ -3825,7 +3825,7 @@ RELAY_REGISTER_OP("cumsum")
     .add_type_rel("Cumsum", CumbinopRel)
     .set_attr<TOpPattern>("TOpPattern", kOpaque);
 
-Expr MakeCumprod(Expr data, Integer axis, DataType dtype, Integer exclusive) {
+Expr MakeCumprod(Expr data, Integer axis, DataType dtype, Bool exclusive) {
   auto attrs = make_object<CumbinopAttrs>();
   attrs->dtype = dtype;
   attrs->axis = axis;
diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py
index 2e411f3f273c..f94e3ac86cb2 100644
--- a/tests/python/relay/test_op_level3.py
+++ b/tests/python/relay/test_op_level3.py
@@ -1762,7 +1762,9 @@ def verify_adv_index(data_shape, index_shapes):
 cumbinops_supported = {"cumsum": relay.op.cumsum, "cumprod": relay.op.cumprod}
 
 
-def run_binop_tests(target, ctx, binop_type: str, gt_func: Callable[..., np.array]):
+def run_binop_tests(
+    target, ctx, binop_type: str, gt_func: Callable[..., np.array], identity_value: int
+):
     def assert_relay_cumbinop(
         data_np: np.array,
         np_out: np.array,
@@ -1770,6 +1772,7 @@ def assert_relay_cumbinop(
         out_dtype: str = None,
         rtol: float = 1e-5,
         atol: float = 1e-5,
+        exclusive: bool = False,
     ):
         inp = relay.var("data", relay.TensorType(data_np.shape, str(data_np.dtype)))
 
@@ -1777,7 +1780,7 @@ def assert_relay_cumbinop(
             raise ValueError(
                 f"Unknown function {binop_type}. Options: {cumbinops_supported.keys()}"
             )
-        out = cumbinops_supported[binop_type](inp, axis, out_dtype)
+        out = cumbinops_supported[binop_type](inp, axis, out_dtype, exclusive=exclusive)
         func = relay.Function([inp], out)
 
         for kind in ["graph", "debug"]:
@@ -1805,15 +1808,29 @@ def assert_relay_cumbinop(
     assert_relay_cumbinop(data, gt_func(data, dtype=np.int32))
     assert_relay_cumbinop(data, gt_func(data, dtype="int64"), out_dtype="int64")
 
+    # Test exclusivity operations
+    data = np.random.randint(-100, 100, size=(2, 2)).astype("int64")
+    expected_result = np.roll(gt_func(data), 1)
+    expected_result[0] = identity_value
+    assert_relay_cumbinop(data, expected_result, exclusive=True)
+
+    expected_result = np.roll(gt_func(data, axis=0), 1, axis=0)
+    expected_result[0, :] = identity_value
+    assert_relay_cumbinop(data, expected_result, exclusive=True, axis=0)
+
+    expected_result = np.roll(gt_func(data, axis=1), 1, axis=1)
+    expected_result[:, 0] = identity_value
+    assert_relay_cumbinop(data, expected_result, exclusive=True, axis=1)
+
 
 @tvm.testing.parametrize_targets
 def test_cumsum(target, ctx):
-    run_binop_tests(target, ctx, binop_type="cumsum", gt_func=np.cumsum)
+    run_binop_tests(target, ctx, binop_type="cumsum", gt_func=np.cumsum, identity_value=0)
 
 
 @tvm.testing.parametrize_targets
 def test_cumprod(target, ctx):
-    run_binop_tests(target, ctx, binop_type="cumprod", gt_func=np.cumproduct)
+    run_binop_tests(target, ctx, binop_type="cumprod", gt_func=np.cumprod, identity_value=1)
 
 
 @tvm.testing.parametrize_targets

From 43df67ed121e8026ce6f0a2ba355087402958613 Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Tue, 23 Mar 2021 14:14:27 -0700
Subject: [PATCH 14/17] Overall improve formatting, add doc message corrections

simplify construction

clang-format

more tests

undo simpler construction due to function passing stuff

fix docs

more exclusive doc changes

more fixins"
---
 python/tvm/relay/op/transform.py     | 12 ++++++------
 python/tvm/topi/cuda/scan.py         | 12 ++++++------
 python/tvm/topi/cumsum.py            |  6 +++---
 tests/python/relay/test_op_level3.py |  2 +-
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py
index 189017f6c255..f94a00db2fb1 100644
--- a/python/tvm/relay/op/transform.py
+++ b/python/tvm/relay/op/transform.py
@@ -1539,9 +1539,9 @@ def cumsum(data, axis=None, dtype=None, exclusive=None):
         Type of the returned array and of the accumulator in which the elements are summed.
         If dtype is not specified, it defaults to the dtype of data.
 
-    exclusive : int, optional
-        If set to 1 will return exclusive sum in which the first element is not
-        included. In other terms, if set to 1, the j-th output element would be
+    exclusive : bool, optional
+        If true will return exclusive sum in which the first element is not
+        included. In other terms, if true, the j-th output element would be
         the sum of the first (j-1) elements. Otherwise, it would be the sum of
         the first j elements.
 
@@ -1594,9 +1594,9 @@ def cumprod(data, axis=None, dtype=None, exclusive=None):
         Type of the returned array and of the accumulator in which the elements are multiplied.
         If dtype is not specified, it defaults to the dtype of data.
 
-    exclusive : int, optional
-        If set to 1 will return exclusive product in which the first element is not
-        included. In other terms, if set to 1, the j-th output element would be
+    exclusive : bool, optional
+        If true will return exclusive product in which the first element is not
+        included. In other terms, if true, the j-th output element would be
         the product of the first (j-1) elements. Otherwise, it would be the product of
         the first j elements. The product of zero elements will be 1.
 
diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py
index b6452417ba79..3741c478430e 100644
--- a/python/tvm/topi/cuda/scan.py
+++ b/python/tvm/topi/cuda/scan.py
@@ -558,9 +558,9 @@ def cumbinop(
         Type of the returned array and of the accumulator in which the elements are computed.
         If dtype is not specified, it defaults to the dtype of data.
 
-    exclusive : int, optional
-        If set to 1 will return exclusive cumulative operation in which the first element is not
-        included. In other terms, if set to 1, the j-th output element would be
+    exclusive : bool, optional
+        If true will return exclusive cumulative operation in which the first element is not
+        included. In other terms, if true, the j-th output element would be
         the cumulative operation of the first (j-1) elements. Otherwise, it would be the
         cumulative operation of the first j elements.
 
@@ -604,9 +604,9 @@ def cumsum(
         Type of the returned array and of the accumulator in which the elements are summed.
         If dtype is not specified, it defaults to the dtype of data.
 
-    exclusive : int, optional
-        If set to 1 will return exclusive sum in which the first element is not
-        included. In other terms, if set to 1, the j-th output element would be
+    exclusive : bool, optional
+        If true will return exclusive sum in which the first element is not
+        included. In other terms, if true, the j-th output element would be
         the sum of the first (j-1) elements. Otherwise, it would be the sum of
         the first j elements.
 
diff --git a/python/tvm/topi/cumsum.py b/python/tvm/topi/cumsum.py
index 709155ed79bc..7136d11c919f 100644
--- a/python/tvm/topi/cumsum.py
+++ b/python/tvm/topi/cumsum.py
@@ -64,9 +64,9 @@ def cumbinop(
         Type of the returned array and of the accumulator in which the elements are computed.
         If dtype is not specified, it defaults to the dtype of data.
 
-    exclusive : int, optional
-        If set to 1 will return exclusive cumulative operation in which the first element is not
-        included. In other terms, if set to 1, the j-th output element would be
+    exclusive : bool, optional
+        If True will return exclusive cumulative operation in which the first element is not
+        included. In other terms, if True, the j-th output element would be
         the cumulative operation of the first (j-1) elements. Otherwise, it would be the
         cumulative operation of the first j elements. The cumulative operation of zero elements
         is assumed to be the identity_value.
diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py
index f94e3ac86cb2..15bdd7ebab02 100644
--- a/tests/python/relay/test_op_level3.py
+++ b/tests/python/relay/test_op_level3.py
@@ -1809,7 +1809,7 @@ def assert_relay_cumbinop(
     assert_relay_cumbinop(data, gt_func(data, dtype="int64"), out_dtype="int64")
 
     # Test exclusivity operations
-    data = np.random.randint(-100, 100, size=(2, 2)).astype("int64")
+    data = np.random.randint(-100, 100, size=(10, 10)).astype("int64")
     expected_result = np.roll(gt_func(data), 1)
     expected_result[0] = identity_value
     assert_relay_cumbinop(data, expected_result, exclusive=True)

From 8ffff272336d550a021cc369f706b64603dc4dc6 Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Wed, 24 Mar 2021 10:49:42 -0700
Subject: [PATCH 15/17] merge cumsum and cumprod to scan, merge tests

fix stuff
---
 python/tvm/relay/op/strategy/generic.py       |  12 +-
 python/tvm/topi/__init__.py                   |   3 +-
 python/tvm/topi/cuda/scan.py                  |   8 +-
 python/tvm/topi/cumprod.py                    |  68 ---------
 python/tvm/topi/{cumsum.py => scan.py}        |  52 ++++++-
 python/tvm/topi/unique.py                     |   2 +-
 tests/python/relay/test_op_level3.py          |  40 +++--
 tests/python/topi/python/test_topi_cumprod.py | 108 -------------
 tests/python/topi/python/test_topi_cumsum.py  | 108 -------------
 tests/python/topi/python/test_topi_scan.py    | 144 ++++++++++++++++++
 10 files changed, 223 insertions(+), 322 deletions(-)
 delete mode 100644 python/tvm/topi/cumprod.py
 rename python/tvm/topi/{cumsum.py => scan.py} (80%)
 delete mode 100644 tests/python/topi/python/test_topi_cumprod.py
 delete mode 100644 tests/python/topi/python/test_topi_cumsum.py
 create mode 100644 tests/python/topi/python/test_topi_scan.py

diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py
index 3864d33a7c31..322a3607904f 100644
--- a/python/tvm/relay/op/strategy/generic.py
+++ b/python/tvm/relay/op/strategy/generic.py
@@ -1464,13 +1464,13 @@ def threefry_split_strategy(attrs, inputs, out_type, target):
     return strategy
 
 
-def wrap_compute_cumbinop(topi_compute):
-    """Wrap cumbinop style topi compute"""
+def wrap_compute_scanop(topi_compute):
+    """Wrap scanop style topi compute"""
 
-    def _compute_cumbinop(attrs, inputs, _):
+    def _compute_scanop(attrs, inputs, _):
         return [topi_compute(inputs[0], attrs.axis, attrs.dtype, attrs.exclusive)]
 
-    return _compute_cumbinop
+    return _compute_scanop
 
 
 @override_native_generic_func("cumsum_strategy")
@@ -1478,7 +1478,7 @@ def cumsum_strategy(attrs, inputs, out_type, target):
     """cumsum generic strategy"""
     strategy = _op.OpStrategy()
     strategy.add_implementation(
-        wrap_compute_cumbinop(topi.cumsum),
+        wrap_compute_scanop(topi.cumsum),
         wrap_topi_schedule(topi.generic.schedule_extern),
         name="cumsum.generic",
     )
@@ -1490,7 +1490,7 @@ def cumprod_strategy(attrs, inputs, out_type, target):
     """cumprod generic strategy"""
     strategy = _op.OpStrategy()
     strategy.add_implementation(
-        wrap_compute_cumbinop(topi.cumprod),
+        wrap_compute_scanop(topi.cumprod),
         wrap_topi_schedule(topi.generic.schedule_extern),
         name="cumprod.generic",
     )
diff --git a/python/tvm/topi/__init__.py b/python/tvm/topi/__init__.py
index 16a109f908be..90383ccf3d5b 100644
--- a/python/tvm/topi/__init__.py
+++ b/python/tvm/topi/__init__.py
@@ -42,8 +42,7 @@
 from .sparse_reshape import *
 from .scatter_add import *
 from .argwhere import *
-from .cumsum import *
-from .cumprod import *
+from .scan import * 
 from .einsum import *
 from .unique import *
 from . import generic
diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py
index 3741c478430e..3240ebcd515c 100644
--- a/python/tvm/topi/cuda/scan.py
+++ b/python/tvm/topi/cuda/scan.py
@@ -521,7 +521,7 @@ def traverse(op):
     return s
 
 
-def cumbinop(
+def scanop(
     data: tvm.te.Tensor,
     binop: Callable[["tvm.Expr", "tvm.Expr"], "tvm.Expr"],
     identity_value: Union[float, int],
@@ -529,7 +529,7 @@ def cumbinop(
     dtype: Optional[str] = None,
     exclusive: Optional[bool] = None,
 ) -> tvm.te.Tensor:
-    """Cumulative binary operator with similar axis behavior as np.cumsum and np.cumprod.
+    """Cumulative binary operator (scan) with similar axis behavior as np.cumsum and np.cumprod.
 
     See cumprod and cumsum for an example of use.
 
@@ -616,7 +616,7 @@ def cumsum(
         The result has the same size as data, and the same shape as data if axis is not None.
         If axis is None, the result is a 1-d array.
     """
-    return cumbinop(
+    return scanop(
         data=data,
         binop=tvm.tir.generic.add,
         identity_value=0,
@@ -659,7 +659,7 @@ def cumprod(
         The result has the same size as data, and the same shape as data if axis is not None.
         If axis is None, the result is a 1-d array.
     """
-    return cumbinop(
+    return scanop(
         data=data,
         binop=tvm.tir.generic.multiply,
         identity_value=1,
diff --git a/python/tvm/topi/cumprod.py b/python/tvm/topi/cumprod.py
deleted file mode 100644
index 6108212c8a51..000000000000
--- a/python/tvm/topi/cumprod.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=invalid-name
-"""Cumprod operator"""
-from typing import Optional
-
-import tvm
-
-from ..tir import generic
-from .cumsum import cumbinop
-
-
-def cumprod(
-    data: tvm.te.Tensor,
-    axis: Optional[int] = None,
-    dtype: Optional[int] = None,
-    exclusive: Optional[bool] = None,
-):
-    """Numpy style cumprod op. Return the cumulative product of the elements along a given axis.
-
-    Parameters
-    ----------
-    data : tvm.te.Tensor
-        The input data to the operator.
-
-    axis : int, optional
-        Axis along which the cumulative product is computed. The default (None) is to compute
-        the cumprod over the flattened array.
-
-    dtype : string, optional
-        Type of the returned array and of the accumulator in which the elements are multiplied.
-        If dtype is not specified, it defaults to the dtype of data.
-
-    exclusive : bool, optional
-        If True, will return exclusive product in which the first element is not
-        included. In other terms, if True, the j-th output element would be
-        the product of the first (j-1) elements. Otherwise, it would be the product of
-        the first j elements.
-
-    Returns
-    -------
-    result : tvm.te.Tensor
-        The result has the same size as data, and the same shape as data if axis is not None.
-        If axis is None, the result is a 1-d array.
-    """
-    return cumbinop(
-        data=data,
-        binop=generic.multiply,
-        identity_value=1,
-        op_name="cumprod_generic",
-        axis=axis,
-        dtype=dtype,
-        exclusive=exclusive,
-    )
diff --git a/python/tvm/topi/cumsum.py b/python/tvm/topi/scan.py
similarity index 80%
rename from python/tvm/topi/cumsum.py
rename to python/tvm/topi/scan.py
index 7136d11c919f..f5796730f762 100644
--- a/python/tvm/topi/cumsum.py
+++ b/python/tvm/topi/scan.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 # pylint: disable=invalid-name
-"""Cumsum operator"""
+"""Scan (cumulative binary) operators"""
 from typing import Callable, Optional
 
 import tvm
@@ -26,7 +26,7 @@
 from .utils import get_const_int, prod
 
 
-def cumbinop(
+def scanop(
     data: tvm.te.Tensor,
     binop: Callable[["tvm.Expr", "tvm.Expr"], "tvm.Expr"],
     identity_value: "tvm.Expr",
@@ -35,7 +35,7 @@ def cumbinop(
     dtype: Optional[str] = None,
     exclusive: Optional[bool] = None,
 ) -> tvm.te.Tensor:
-    """Cumulative binary operator with similar axis behavior as np.cumsum and np.cumprod.
+    """Cumulative binary operator (scan) with similar axis behavior as np.cumsum and np.cumprod.
 
     See cumprod and cumsum for an example of use.
 
@@ -181,7 +181,7 @@ def cumsum(
         The result has the same size as data, and the same shape as data if axis is not None.
         If axis is None, the result is a 1-d array.
     """
-    return cumbinop(
+    return scanop(
         data=data,
         binop=generic.add,
         identity_value=0,
@@ -190,3 +190,47 @@ def cumsum(
         dtype=dtype,
         exclusive=exclusive,
     )
+
+
+def cumprod(
+    data: tvm.te.Tensor,
+    axis: Optional[int] = None,
+    dtype: Optional[int] = None,
+    exclusive: Optional[bool] = None,
+) -> tvm.te.Tensor:
+    """Numpy style cumprod op. Return the cumulative product of the elements along a given axis.
+
+    Parameters
+    ----------
+    data : tvm.te.Tensor
+        The input data to the operator.
+
+    axis : int, optional
+        Axis along which the cumulative product is computed. The default (None) is to compute
+        the cumproduct over the flattened array.
+
+    dtype : string, optional
+        Type of the returned array and of the accumulator in which the elements are multiplied.
+        If dtype is not specified, it defaults to the dtype of data.
+
+    exclusive : bool, optional
+        If True, will return exclusive product in which the first element is not
+        included. In other terms, if True, the j-th output element would be
+        the product of the first (j-1) elements. Otherwise, it would be the product of
+        the first j elements.
+
+    Returns
+    -------
+    result : tvm.te.Tensor
+        The result has the same size as data, and the same shape as data if axis is not None.
+        If axis is None, the result is a 1-d array.
+    """
+    return scanop(
+        data=data,
+        binop=generic.multiply,
+        identity_value=1,
+        op_name="cumprod_generic",
+        axis=axis,
+        dtype=dtype,
+        exclusive=exclusive,
+    )
diff --git a/python/tvm/topi/unique.py b/python/tvm/topi/unique.py
index b4f27b38f65f..e7256551d7b6 100644
--- a/python/tvm/topi/unique.py
+++ b/python/tvm/topi/unique.py
@@ -18,7 +18,7 @@
 """Unique operator"""
 from tvm import te, tir
 from ..te import hybrid
-from .cumsum import cumsum
+from .scan import cumsum
 from .sort import sort, argsort
 
 
diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py
index 15bdd7ebab02..7e443aa62ee8 100644
--- a/tests/python/relay/test_op_level3.py
+++ b/tests/python/relay/test_op_level3.py
@@ -1759,13 +1759,13 @@ def verify_adv_index(data_shape, index_shapes):
 
 
 # Helper for testing binop functions
-cumbinops_supported = {"cumsum": relay.op.cumsum, "cumprod": relay.op.cumprod}
+scanops_supported = {"cumsum": relay.op.cumsum, "cumprod": relay.op.cumprod}
 
 
 def run_binop_tests(
     target, ctx, binop_type: str, gt_func: Callable[..., np.array], identity_value: int
 ):
-    def assert_relay_cumbinop(
+    def assert_relay_scanop(
         data_np: np.array,
         np_out: np.array,
         axis: int = None,
@@ -1776,11 +1776,9 @@ def assert_relay_cumbinop(
     ):
         inp = relay.var("data", relay.TensorType(data_np.shape, str(data_np.dtype)))
 
-        if binop_type not in cumbinops_supported.keys():
-            raise ValueError(
-                f"Unknown function {binop_type}. Options: {cumbinops_supported.keys()}"
-            )
-        out = cumbinops_supported[binop_type](inp, axis, out_dtype, exclusive=exclusive)
+        if binop_type not in scanops_supported.keys():
+            raise ValueError(f"Unknown function {binop_type}. Options: {scanops_supported.keys()}")
+        out = scanops_supported[binop_type](inp, axis, out_dtype, exclusive=exclusive)
         func = relay.Function([inp], out)
 
         for kind in ["graph", "debug"]:
@@ -1789,38 +1787,38 @@ def assert_relay_cumbinop(
             tvm.testing.assert_allclose(op_res.asnumpy(), np_out, rtol=rtol, atol=atol)
 
     data = np.array([2, 3, 0])
-    assert_relay_cumbinop(data, gt_func(data))
-    assert_relay_cumbinop(data, gt_func(data), out_dtype="int64")
+    assert_relay_scanop(data, gt_func(data))
+    assert_relay_scanop(data, gt_func(data), out_dtype="int64")
 
     data = np.random.randn(10, 10)
-    assert_relay_cumbinop(data, gt_func(data))
-    assert_relay_cumbinop(data, gt_func(data, axis=0), axis=0)
-    assert_relay_cumbinop(data, gt_func(data, axis=1), axis=1)
+    assert_relay_scanop(data, gt_func(data))
+    assert_relay_scanop(data, gt_func(data, axis=0), axis=0)
+    assert_relay_scanop(data, gt_func(data, axis=1), axis=1)
 
     data = np.random.randn(10, 5, 10).astype("float32")
-    assert_relay_cumbinop(data, gt_func(data), rtol=1e-4, atol=1e-4)
-    assert_relay_cumbinop(data, gt_func(data, axis=0), axis=0, rtol=1e-4, atol=1e-4)
-    assert_relay_cumbinop(data, gt_func(data, axis=1), axis=1, rtol=1e-4, atol=1e-4)
-    assert_relay_cumbinop(data, gt_func(data, axis=-1), axis=-1, rtol=1e-4, atol=1e-4)
+    assert_relay_scanop(data, gt_func(data), rtol=1e-4, atol=1e-4)
+    assert_relay_scanop(data, gt_func(data, axis=0), axis=0, rtol=1e-4, atol=1e-4)
+    assert_relay_scanop(data, gt_func(data, axis=1), axis=1, rtol=1e-4, atol=1e-4)
+    assert_relay_scanop(data, gt_func(data, axis=-1), axis=-1, rtol=1e-4, atol=1e-4)
 
     data = np.random.rand(10) > 0.5
     data = data.astype(np.int32)
-    assert_relay_cumbinop(data, gt_func(data, dtype=np.int32))
-    assert_relay_cumbinop(data, gt_func(data, dtype="int64"), out_dtype="int64")
+    assert_relay_scanop(data, gt_func(data, dtype=np.int32))
+    assert_relay_scanop(data, gt_func(data, dtype="int64"), out_dtype="int64")
 
     # Test exclusivity operations
     data = np.random.randint(-100, 100, size=(10, 10)).astype("int64")
     expected_result = np.roll(gt_func(data), 1)
     expected_result[0] = identity_value
-    assert_relay_cumbinop(data, expected_result, exclusive=True)
+    assert_relay_scanop(data, expected_result, exclusive=True)
 
     expected_result = np.roll(gt_func(data, axis=0), 1, axis=0)
     expected_result[0, :] = identity_value
-    assert_relay_cumbinop(data, expected_result, exclusive=True, axis=0)
+    assert_relay_scanop(data, expected_result, exclusive=True, axis=0)
 
     expected_result = np.roll(gt_func(data, axis=1), 1, axis=1)
     expected_result[:, 0] = identity_value
-    assert_relay_cumbinop(data, expected_result, exclusive=True, axis=1)
+    assert_relay_scanop(data, expected_result, exclusive=True, axis=1)
 
 
 @tvm.testing.parametrize_targets
diff --git a/tests/python/topi/python/test_topi_cumprod.py b/tests/python/topi/python/test_topi_cumprod.py
deleted file mode 100644
index 0c75308ddcea..000000000000
--- a/tests/python/topi/python/test_topi_cumprod.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-import tvm.testing
-import tvm.topi.testing
-from tvm import topi
-
-
-@tvm.testing.parametrize_targets
-def test_cumprod(ctx, target):
-    def check_cumprod(np_ref, data, axis=None, dtype=None, exclusive=False):
-        implementations = {
-            "generic": (
-                lambda x: topi.cumprod(x, axis, dtype, exclusive=exclusive),
-                topi.generic.schedule_extern,
-            ),
-            "cuda": (
-                lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive),
-                topi.cuda.schedule_scan,
-            ),
-            "nvptx": (
-                lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive),
-                topi.cuda.schedule_scan,
-            ),
-            "vulkan": (
-                lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive),
-                topi.cuda.schedule_scan,
-            ),
-            "metal": (
-                lambda x: topi.cuda.cumprod(x, axis, dtype, exclusive=exclusive),
-                topi.cuda.schedule_scan,
-            ),
-        }
-        fcompute, fschedule = tvm.topi.testing.dispatch(target, implementations)
-        tvm.topi.testing.compare_numpy_tvm([data], np_ref, target, ctx, fcompute, fschedule)
-
-    data = np.array([2, 3, 0])
-    check_cumprod(np.cumprod(data), data)
-
-    data = np.random.rand(10) > 0.5
-    data = data.astype(np.int32)
-    check_cumprod(np.cumprod(data, dtype=np.int32), data)
-    check_cumprod(np.cumprod(data), data, dtype="int64")
-
-    data = np.random.rand(10) > 0.5
-    check_cumprod(np.cumprod(data, dtype=np.int32), data, dtype="int32")
-
-    for in_dtype in ["float32", "float64"]:
-        if target == "metal" and in_dtype == "float64":
-            # float64 is not supported in metal
-            continue
-        data = np.random.randn(10, 10).astype(in_dtype)
-        check_cumprod(np.cumprod(data), data)
-        check_cumprod(np.cumprod(data, axis=0), data, axis=0)
-        check_cumprod(np.cumprod(data, axis=1), data, axis=1)
-
-        data = np.random.randn(10, 5, 10).astype(in_dtype)
-        check_cumprod(np.cumprod(data), data)
-        check_cumprod(np.cumprod(data, axis=0), data, axis=0)
-        check_cumprod(np.cumprod(data, axis=1), data, axis=1)
-        check_cumprod(np.cumprod(data, axis=-1), data, axis=-1)
-
-    for in_dtype in ["int32", "int64"]:
-        data = np.random.randint(-100, 100, size=(100, 100)).astype(in_dtype)
-        check_cumprod(np.cumprod(data, dtype=in_dtype), data)
-        check_cumprod(np.cumprod(data), data, dtype="int64")
-        check_cumprod(np.cumprod(data, axis=0, dtype=in_dtype), data, axis=0)
-        check_cumprod(np.cumprod(data, axis=1, dtype=in_dtype), data, axis=1)
-
-        data = np.random.randint(1 << 30, (1 << 31) - 1, size=(100)).astype(in_dtype)
-        check_cumprod(np.cumprod(data), data, dtype="int64")
-
-    data = np.random.randint(-100, 100, size=(100, 100)).astype("int64")
-
-    expected_result = np.roll(np.cumprod(data), 1)
-    expected_result[0] = 1
-    check_cumprod(expected_result, data, dtype="int64", exclusive=True)
-
-    expected_result = np.roll(np.cumprod(data, axis=0, dtype=in_dtype), 1, axis=0)
-    expected_result[0, :] = 1
-    check_cumprod(expected_result, data, axis=0, exclusive=True)
-
-    expected_result = np.roll(np.cumprod(data, axis=1, dtype=in_dtype), 1, axis=1)
-    expected_result[:, 0] = 1
-    check_cumprod(np.cumprod(data, axis=1, dtype=in_dtype), data, axis=1)
-
-
-if __name__ == "__main__":
-    test_cumprod(tvm.context("cpu"), tvm.target.Target("llvm"))
-    test_cumprod(tvm.context("cuda"), tvm.target.Target("cuda"))
-    test_cumprod(tvm.context("nvptx"), tvm.target.Target("nvptx"))
-    test_cumprod(tvm.context("vulkan"), tvm.target.Target("vulkan"))
-    test_cumprod(tvm.context("metal"), tvm.target.Target("metal"))
diff --git a/tests/python/topi/python/test_topi_cumsum.py b/tests/python/topi/python/test_topi_cumsum.py
deleted file mode 100644
index 0591609b6f3f..000000000000
--- a/tests/python/topi/python/test_topi_cumsum.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-import numpy as np
-import tvm
-import tvm.testing
-import tvm.topi.testing
-from tvm import topi
-
-
-@tvm.testing.parametrize_targets
-def test_cumsum(ctx, target):
-    def check_cumsum(np_ref, data, axis=None, dtype=None, exclusive=False):
-        implementations = {
-            "generic": (
-                lambda x: topi.cumsum(x, axis, dtype, exclusive=exclusive),
-                topi.generic.schedule_extern,
-            ),
-            "cuda": (
-                lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive),
-                topi.cuda.schedule_scan,
-            ),
-            "nvptx": (
-                lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive),
-                topi.cuda.schedule_scan,
-            ),
-            "vulkan": (
-                lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive),
-                topi.cuda.schedule_scan,
-            ),
-            "metal": (
-                lambda x: topi.cuda.cumsum(x, axis, dtype, exclusive=exclusive),
-                topi.cuda.schedule_scan,
-            ),
-        }
-        fcompute, fschedule = tvm.topi.testing.dispatch(target, implementations)
-        tvm.topi.testing.compare_numpy_tvm([data], np_ref, target, ctx, fcompute, fschedule)
-
-    data = np.array([2, 3, 0])
-    check_cumsum(np.cumsum(data), data)
-
-    data = np.random.rand(10) > 0.5
-    data = data.astype(np.int32)
-    check_cumsum(np.cumsum(data, dtype=np.int32), data)
-    check_cumsum(np.cumsum(data), data, dtype="int64")
-
-    data = np.random.rand(10) > 0.5
-    check_cumsum(np.cumsum(data, dtype=np.int32), data, dtype="int32")
-
-    for in_dtype in ["float32", "float64"]:
-        if target == "metal" and in_dtype == "float64":
-            # float64 is not supported in metal
-            continue
-        data = np.random.randn(10, 10).astype(in_dtype)
-        check_cumsum(np.cumsum(data), data)
-        check_cumsum(np.cumsum(data, axis=0), data, axis=0)
-        check_cumsum(np.cumsum(data, axis=1), data, axis=1)
-
-        data = np.random.randn(10, 5, 10).astype(in_dtype)
-        check_cumsum(np.cumsum(data), data)
-        check_cumsum(np.cumsum(data, axis=0), data, axis=0)
-        check_cumsum(np.cumsum(data, axis=1), data, axis=1)
-        check_cumsum(np.cumsum(data, axis=-1), data, axis=-1)
-
-    for in_dtype in ["int32", "int64"]:
-        data = np.random.randint(-100, 100, size=(100, 100)).astype(in_dtype)
-        check_cumsum(np.cumsum(data, dtype=in_dtype), data)
-        check_cumsum(np.cumsum(data), data, dtype="int64")
-        check_cumsum(np.cumsum(data, axis=0, dtype=in_dtype), data, axis=0)
-        check_cumsum(np.cumsum(data, axis=1, dtype=in_dtype), data, axis=1)
-
-        data = np.random.randint(1 << 30, (1 << 31) - 1, size=(100)).astype(in_dtype)
-        check_cumsum(np.cumsum(data), data, dtype="int64")
-
-    data = np.random.randint(-100, 100, size=(100, 100)).astype("int64")
-
-    expected_result = np.roll(np.cumsum(data), 1)
-    expected_result[0] = 0
-    check_cumsum(expected_result, data, dtype="int64", exclusive=True)
-
-    expected_result = np.roll(np.cumsum(data, axis=0, dtype=in_dtype), 1, axis=0)
-    expected_result[0, :] = 0
-    check_cumsum(expected_result, data, axis=0, exclusive=True)
-
-    expected_result = np.roll(np.cumsum(data, axis=1, dtype=in_dtype), 1, axis=1)
-    expected_result[:, 0] = 0
-    check_cumsum(np.cumsum(data, axis=1, dtype=in_dtype), data, axis=1)
-
-
-if __name__ == "__main__":
-    test_cumsum(tvm.context("cpu"), tvm.target.Target("llvm"))
-    test_cumsum(tvm.context("cuda"), tvm.target.Target("cuda"))
-    test_cumsum(tvm.context("nvptx"), tvm.target.Target("nvptx"))
-    test_cumsum(tvm.context("vulkan"), tvm.target.Target("vulkan"))
-    test_cumsum(tvm.context("metal"), tvm.target.Target("metal"))
diff --git a/tests/python/topi/python/test_topi_scan.py b/tests/python/topi/python/test_topi_scan.py
new file mode 100644
index 000000000000..020fde51659f
--- /dev/null
+++ b/tests/python/topi/python/test_topi_scan.py
@@ -0,0 +1,144 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from typing import Callable
+
+import numpy as np
+import tvm
+import tvm.testing
+import tvm.topi.testing
+from tvm import topi
+
+topi_funcs = {
+    "cumsum": {"generic": topi.cumsum, "cuda": topi.cuda.cumsum},
+    "cumprod": {"generic": topi.cumprod, "cuda": topi.cuda.cumprod},
+}
+
+identity_value = {"cumsum": 0, "cumprod": 1}
+
+
+def get_implementations(name, axis, dtype, exclusive):
+    topi_func_generic = topi_funcs[name]["generic"]
+    topi_func_cuda = topi_funcs[name]["cuda"]
+
+    return {
+        "generic": (
+            lambda x: topi_func_generic(x, axis, dtype, exclusive=exclusive),
+            topi.generic.schedule_extern,
+        ),
+        "cuda": (
+            lambda x: topi_func_cuda(x, axis, dtype, exclusive=exclusive),
+            topi.cuda.schedule_scan,
+        ),
+        "nvptx": (
+            lambda x: topi_func_cuda(x, axis, dtype, exclusive=exclusive),
+            topi.cuda.schedule_scan,
+        ),
+        "vulkan": (
+            lambda x: topi_func_cuda(x, axis, dtype, exclusive=exclusive),
+            topi.cuda.schedule_scan,
+        ),
+        "metal": (
+            lambda x: topi_func_cuda(x, axis, dtype, exclusive=exclusive),
+            topi.cuda.schedule_scan,
+        ),
+    }
+
+
+def _run_tests(
+    ctx,
+    target,
+    op_name: str = "cumsum",
+    gt_func: Callable[..., np.array] = np.cumsum,
+):
+    def check_scan(np_ref, data, axis=None, dtype=None, exclusive=False):
+        implementations = get_implementations(op_name, axis, dtype, exclusive)
+        fcompute, fschedule = tvm.topi.testing.dispatch(target, implementations)
+        tvm.topi.testing.compare_numpy_tvm([data], np_ref, target, ctx, fcompute, fschedule)
+
+    data = np.array([2, 3, 0])
+    check_scan(gt_func(data), data)
+
+    data = np.random.rand(10) > 0.5
+    data = data.astype(np.int32)
+    check_scan(gt_func(data, dtype=np.int32), data)
+    check_scan(gt_func(data), data, dtype="int64")
+
+    data = np.random.rand(10) > 0.5
+    check_scan(gt_func(data, dtype=np.int32), data, dtype="int32")
+
+    for in_dtype in ["float32", "float64"]:
+        if target == "metal" and in_dtype == "float64":
+            # float64 is not supported in metal
+            continue
+        data = np.random.randn(10, 10).astype(in_dtype)
+        check_scan(gt_func(data), data)
+        check_scan(gt_func(data, axis=0), data, axis=0)
+        check_scan(gt_func(data, axis=1), data, axis=1)
+
+        data = np.random.randn(10, 5, 10).astype(in_dtype)
+        check_scan(gt_func(data), data)
+        check_scan(gt_func(data, axis=0), data, axis=0)
+        check_scan(gt_func(data, axis=1), data, axis=1)
+        check_scan(gt_func(data, axis=-1), data, axis=-1)
+
+    for in_dtype in ["int32", "int64"]:
+        data = np.random.randint(-100, 100, size=(100, 100)).astype(in_dtype)
+        check_scan(gt_func(data, dtype=in_dtype), data)
+        check_scan(gt_func(data), data, dtype="int64")
+        check_scan(gt_func(data, axis=0, dtype=in_dtype), data, axis=0)
+        check_scan(gt_func(data, axis=1, dtype=in_dtype), data, axis=1)
+
+        data = np.random.randint(1 << 30, (1 << 31) - 1, size=(100)).astype(in_dtype)
+        check_scan(gt_func(data), data, dtype="int64")
+
+    data = np.random.randint(-100, 100, size=(100, 100)).astype("int64")
+
+    expected_result = np.roll(gt_func(data), 1)
+    expected_result[0] = identity_value[op_name]
+    check_scan(expected_result, data, dtype="int64", exclusive=True)
+
+    expected_result = np.roll(gt_func(data, axis=0, dtype=in_dtype), 1, axis=0)
+    expected_result[0, :] = identity_value[op_name]
+    check_scan(expected_result, data, axis=0, exclusive=True)
+
+    expected_result = np.roll(gt_func(data, axis=1, dtype=in_dtype), 1, axis=1)
+    expected_result[:, 0] = identity_value[op_name]
+    check_scan(gt_func(data, axis=1, dtype=in_dtype), data, axis=1)
+
+
+@tvm.testing.parametrize_targets
+def test_cumsum(ctx, target):
+    _run_tests(ctx, target, op_name="cumsum", gt_func=np.cumsum)
+
+
+@tvm.testing.parametrize_targets
+def test_cumprod(ctx, target):
+    _run_tests(ctx, target, op_name="cumprod", gt_func=np.cumprod)
+
+
+if __name__ == "__main__":
+    test_cumsum(tvm.context("cpu"), tvm.target.Target("llvm"))
+    test_cumsum(tvm.context("cuda"), tvm.target.Target("cuda"))
+    test_cumsum(tvm.context("nvptx"), tvm.target.Target("nvptx"))
+    test_cumsum(tvm.context("vulkan"), tvm.target.Target("vulkan"))
+    test_cumsum(tvm.context("metal"), tvm.target.Target("metal"))
+
+    test_cumprod(tvm.context("cpu"), tvm.target.Target("llvm"))
+    test_cumprod(tvm.context("cuda"), tvm.target.Target("cuda"))
+    test_cumprod(tvm.context("nvptx"), tvm.target.Target("nvptx"))
+    test_cumprod(tvm.context("vulkan"), tvm.target.Target("vulkan"))
+    test_cumprod(tvm.context("metal"), tvm.target.Target("metal"))

From b9fb77950abcff36d3e74e2df9fe6acc80a6febf Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Wed, 24 Mar 2021 11:43:25 -0700
Subject: [PATCH 16/17] remove other mentions of cumbinop -> scanop

---
 include/tvm/relay/attrs/transform.h  |  4 ++--
 python/tvm/relay/op/strategy/cuda.py |  4 ++--
 src/relay/op/tensor/transform.cc     | 16 ++++++++--------
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h
index 4827579d2211..a5544c8a8799 100644
--- a/include/tvm/relay/attrs/transform.h
+++ b/include/tvm/relay/attrs/transform.h
@@ -439,11 +439,11 @@ struct MatrixSetDiagAttrs : public tvm::AttrsNode<MatrixSetDiagAttrs> {
 };  // struct MatrixSetDiagAttrs
 
 /*! \brief Attributes used in cumsum and cumprod operator */
-struct CumbinopAttrs : public tvm::AttrsNode<CumbinopAttrs> {
+struct ScanopAttrs : public tvm::AttrsNode<ScanopAttrs> {
   Integer axis;
   DataType dtype;
   Bool exclusive = Bool(false);
-  TVM_DECLARE_ATTRS(CumbinopAttrs, "relay.attrs.CumbinopAttrs") {
+  TVM_DECLARE_ATTRS(ScanopAttrs, "relay.attrs.ScanopAttrs") {
     TVM_ATTR_FIELD(axis).describe("The axis to operate over").set_default(NullValue<Integer>());
     TVM_ATTR_FIELD(dtype).describe("Output data type").set_default(NullValue<DataType>());
 
diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py
index 0da2d831c510..1a6742526607 100644
--- a/python/tvm/relay/op/strategy/cuda.py
+++ b/python/tvm/relay/op/strategy/cuda.py
@@ -1018,7 +1018,7 @@ def cumsum_strategy_cuda(attrs, inputs, out_type, target):
     """cumsum cuda strategy"""
     strategy = _op.OpStrategy()
     strategy.add_implementation(
-        wrap_compute_cumbinop(topi.cuda.cumsum),
+        wrap_compute_scanop(topi.cuda.cumsum),
         wrap_topi_schedule(topi.cuda.schedule_scan),
         name="cumsum.cuda",
     )
@@ -1030,7 +1030,7 @@ def cumprod_strategy_cuda(attrs, inputs, out_type, target):
     """cumprod cuda strategy"""
     strategy = _op.OpStrategy()
     strategy.add_implementation(
-        wrap_compute_cumbinop(topi.cuda.cumprod),
+        wrap_compute_scanop(topi.cuda.cumprod),
         wrap_topi_schedule(topi.cuda.schedule_scan),
         name="cumprod.cuda",
     )
diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
index 96078da72a12..b6c578fa5053 100644
--- a/src/relay/op/tensor/transform.cc
+++ b/src/relay/op/tensor/transform.cc
@@ -3772,20 +3772,20 @@ RELAY_REGISTER_OP("adv_index")
     .set_attr<TOpPattern>("TOpPattern", kInjective)
     .set_attr<FTVMCompute>("FTVMCompute", AdvIndexCompute);
 
-TVM_REGISTER_NODE_TYPE(CumbinopAttrs);
+TVM_REGISTER_NODE_TYPE(ScanopAttrs);
 
-bool CumbinopRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+bool ScanopRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
                  const TypeReporter& reporter) {
   // types: [data, output]
   ICHECK_EQ(types.size(), 2) << "Expects two types, one for the input and another for the output";
   const auto* data = types[0].as<TensorTypeNode>();
   if (data == nullptr) {
     ICHECK(types[0].as<IncompleteTypeNode>())
-        << "cumbinop: expect input type to be TensorType but get " << types[0];
+        << "Scanop: expect input type to be TensorType but get " << types[0];
     return false;
   }
 
-  const auto* param = attrs.as<CumbinopAttrs>();
+  const auto* param = attrs.as<ScanopAttrs>();
 
   auto dtype = param->dtype;
   if (dtype.is_void()) {
@@ -3806,7 +3806,7 @@ bool CumbinopRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
 }
 
 Expr MakeCumsum(Expr data, Integer axis, DataType dtype, Bool exclusive) {
-  auto attrs = make_object<CumbinopAttrs>();
+  auto attrs = make_object<ScanopAttrs>();
   attrs->dtype = dtype;
   attrs->axis = axis;
   attrs->exclusive = exclusive;
@@ -3822,11 +3822,11 @@ RELAY_REGISTER_OP("cumsum")
     .set_num_inputs(1)
     .add_argument("data", "Tensor", "The input tensor.")
     .set_support_level(3)
-    .add_type_rel("Cumsum", CumbinopRel)
+    .add_type_rel("Cumsum", ScanopRel)
     .set_attr<TOpPattern>("TOpPattern", kOpaque);
 
 Expr MakeCumprod(Expr data, Integer axis, DataType dtype, Bool exclusive) {
-  auto attrs = make_object<CumbinopAttrs>();
+  auto attrs = make_object<ScanopAttrs>();
   attrs->dtype = dtype;
   attrs->axis = axis;
   attrs->exclusive = exclusive;
@@ -3842,7 +3842,7 @@ RELAY_REGISTER_OP("cumprod")
     .set_num_inputs(1)
     .add_argument("data", "Tensor", "The input tensor.")
     .set_support_level(3)
-    .add_type_rel("Cumprod", CumbinopRel)
+    .add_type_rel("Cumprod", ScanopRel)
     .set_attr<TOpPattern>("TOpPattern", kOpaque);
 
 TVM_REGISTER_NODE_TYPE(UniqueAttrs);

From dbea713157698805853fca44c987dd5073487f7d Mon Sep 17 00:00:00 2001
From: Andrew Zhao Luo <andrewzhaoluo@Andrews-MacBook-Pro.local>
Date: Wed, 24 Mar 2021 12:42:22 -0700
Subject: [PATCH 17/17] lint formatting

---
 python/tvm/topi/__init__.py      | 2 +-
 src/relay/op/tensor/transform.cc | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/tvm/topi/__init__.py b/python/tvm/topi/__init__.py
index 90383ccf3d5b..ef2c5c1ea4b5 100644
--- a/python/tvm/topi/__init__.py
+++ b/python/tvm/topi/__init__.py
@@ -42,7 +42,7 @@
 from .sparse_reshape import *
 from .scatter_add import *
 from .argwhere import *
-from .scan import * 
+from .scan import *
 from .einsum import *
 from .unique import *
 from . import generic
diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
index b6c578fa5053..6fb9f77f99ea 100644
--- a/src/relay/op/tensor/transform.cc
+++ b/src/relay/op/tensor/transform.cc
@@ -3775,7 +3775,7 @@ RELAY_REGISTER_OP("adv_index")
 TVM_REGISTER_NODE_TYPE(ScanopAttrs);
 
 bool ScanopRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
-                 const TypeReporter& reporter) {
+               const TypeReporter& reporter) {
   // types: [data, output]
   ICHECK_EQ(types.size(), 2) << "Expects two types, one for the input and another for the output";
   const auto* data = types[0].as<TensorTypeNode>();