From a03d300f307120afc9f78f19cf5ee850612a61bd Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Thu, 27 Dec 2018 18:59:27 -0800
Subject: [PATCH 1/4] [RELAY][EXPR] make const numpy consistent

---
 python/tvm/api.py                             | 49 +++++++++++++++----
 python/tvm/hybrid/calls.py                    |  2 +-
 python/tvm/hybrid/parser.py                   | 23 ++++++---
 python/tvm/relay/expr.py                      |  6 ---
 .../python/relay/test_backend_interpreter.py  | 11 +++--
 tests/python/relay/test_debug.py              |  5 +-
 tests/python/relay/test_op_level3.py          |  4 +-
 .../python/relay/test_pass_alter_op_layout.py |  5 +-
 tests/python/unittest/test_arith_simplify.py  | 15 +++---
 tests/python/unittest/test_lang_basic.py      |  6 +--
 tests/python/unittest/test_lang_operator.py   |  2 +-
 tests/python/unittest/test_lang_reflection.py |  8 +--
 tests/python/unittest/test_pass_simplify.py   |  4 +-
 .../unittest/test_pass_storage_rewrite.py     |  3 +-
 tests/python/unittest/test_pass_unroll.py     |  4 +-
 .../unittest/test_schedule_schedule_ops.py    |  3 +-
 topi/python/topi/nn/bitserial_conv2d.py       |  2 +-
 topi/tests/python/test_topi_basic.py          |  2 +-
 18 files changed, 97 insertions(+), 57 deletions(-)

diff --git a/python/tvm/api.py b/python/tvm/api.py
index e275c1122c36..34b71d0132ba 100644
--- a/python/tvm/api.py
+++ b/python/tvm/api.py
@@ -26,22 +26,53 @@
 
 
 def min_value(dtype):
-    """minimum value of dtype"""
+    """minimum value of dtype
+
+    Parameters
+    ----------
+    dtype : str
+        The data type.
+
+    Returns
+    -------
+    value : tvm.Expr
+        The minimum value of dtype.
+    """
     return _api_internal._min_value(dtype)
 
 
 def max_value(dtype):
-    """maximum value of dtype"""
+    """maximum value of dtype
+
+    Parameters
+    ----------
+    dtype : str
+        The data type.
+
+    Returns
+    -------
+    value : tvm.Expr
+        The maximum value of dtype.
+    """
     return _api_internal._max_value(dtype)
 
 
-def const(value, dtype=None):
-    """construct a constant"""
-    if dtype is None:
-        if isinstance(value, _Integral):
-            dtype = 'int32'
-        else:
-            dtype = 'float32'
+def const(value, dtype):
+    """construct a constant
+
+    Parameters
+    ----------
+    value : number
+        The content of the constant number.
+
+    dtype : str
+        The data type.
+
+    Returns
+    -------
+    const_val: tvm.Expr
+        The result expression.
+    """
     return _api_internal._const(value, dtype)
 
 
diff --git a/python/tvm/hybrid/calls.py b/python/tvm/hybrid/calls.py
index 730b56f58bd2..da58280701a5 100644
--- a/python/tvm/hybrid/calls.py
+++ b/python/tvm/hybrid/calls.py
@@ -43,7 +43,7 @@ def bind(func_id, args):
     _internal_assert(isinstance(args[0], str), \
                      "A loop bind's first argument should be a string!")
     iter_var = _api.thread_axis(args[0])
-    low, ext = _api.const(0), args[1]
+    low, ext = _api.const(0, "int32"), args[1]
     for_type = None
     return iter_var, low, ext, for_type
 
diff --git a/python/tvm/hybrid/parser.py b/python/tvm/hybrid/parser.py
index 26b0e141d0db..b3a5e1351eda 100644
--- a/python/tvm/hybrid/parser.py
+++ b/python/tvm/hybrid/parser.py
@@ -4,6 +4,8 @@
 import operator
 import logging
 import sys
+from numbers import Integral
+
 from .util import _internal_assert
 from . import calls
 from . import util
@@ -137,6 +139,15 @@ def _get_buffer_from_id(self, s, for_provide=False):
             return self._args[s]
         return self.alloc_buffers[s][0]
 
+    def _const(self, value, dtype=None):
+        if dtype is None:
+            if isinstance(value, bool):
+                dtype = "bool"
+            elif isinstance(value, Integral):
+                dtype = "int32"
+            else:
+                dtype = "float32"
+        return _api.const(value, dtype)
 
     #pylint: disable=invalid-name, missing-docstring
     def visit_Module(self, node):
@@ -172,9 +183,9 @@ def visit_Name(self, node):
             if isinstance(res, tuple):
                 buf = res[0]
                 if isinstance(node.ctx, ast.Load):
-                    return _make.Call(buf.dtype, buf.name, [_api.const(0)], \
+                    return _make.Call(buf.dtype, buf.name, [self._const(0)], \
                                       _expr.Call.Halide, buf.op, buf.value_index)
-                return buf, [_api.const(0)]
+                return buf, [self._const(0)]
             if isinstance(node.ctx, ast.Load):
                 return res
             return None
@@ -183,7 +194,7 @@ def visit_Name(self, node):
 
 
     def visit_Num(self, node):
-        return _api.const(node.n)
+        return self._const(node.n)
 
 
     def visit_AugAssign(self, node):
@@ -193,7 +204,7 @@ def visit_AugAssign(self, node):
             _internal_assert(len(buf) == 2, "LHS is supposed to be (buf, args)!")
             buf, args = buf
         else:
-            args = [_api.const(0)]
+            args = [self._const(0)]
         _internal_assert(isinstance(buf, Tensor), "LHS is supposed to be Tensor!")
 
         read = _make.Call(buf.dtype, buf.name, args, _expr.Call.Halide, buf.op, buf.value_index)
@@ -378,7 +389,7 @@ def visit_For(self, node):
         if iter_var is None:
             _internal_assert(for_type is not None, "The loop bind function parse error!")
             offset = iter_var = _api.var(_name)
-            if not _ir_pass.Equal(low, _api.const(0)):
+            if not _ir_pass.Equal(low, self._const(0)):
                 offset = iter_var + low
             self.loops_above[_name] = offset
         else:
@@ -389,7 +400,7 @@ def visit_For(self, node):
         if for_type is None:
             res = _make.AttrStmt(iter_var, 'thread_extent', ext, _body)
         else:
-            res = _make.For(iter_var, _api.const(0), ext, for_type, 0, _body)
+            res = _make.For(iter_var, self._const(0), ext, for_type, 0, _body)
         self.loops_above.pop(_name)
         return res
 
diff --git a/python/tvm/relay/expr.py b/python/tvm/relay/expr.py
index b96111083bce..9de0344bf6b9 100644
--- a/python/tvm/relay/expr.py
+++ b/python/tvm/relay/expr.py
@@ -465,12 +465,6 @@ def const(value, dtype=None):
     """
     if isinstance(value, (_base.numeric_types, (bool, list))):
         value = _np.array(value, dtype=dtype)
-        # convert default to int32 and float32
-        if dtype is None:
-            if value.dtype == "float64":
-                value = value.astype("float32")
-            elif value.dtype == "int64":
-                value = value.astype("int32")
     if isinstance(value, (_np.ndarray, _np.generic)):
         value = _nd.array(value)
 
diff --git a/tests/python/relay/test_backend_interpreter.py b/tests/python/relay/test_backend_interpreter.py
index f53f27192b9e..3eda33f603fd 100644
--- a/tests/python/relay/test_backend_interpreter.py
+++ b/tests/python/relay/test_backend_interpreter.py
@@ -37,7 +37,8 @@ def test_tuple_value():
 def test_id():
     x = relay.var('x', 'float32')
     ident = relay.Function([x], x)
-    check_eval(ident, [1.0], 1.0)
+    one = np.array(1.0, 'float32')
+    check_eval(ident, [one], one)
 
 
 def test_add_const():
@@ -60,8 +61,8 @@ def test_equal():
     j = relay.var('i', shape=[], dtype='int32')
     z = relay.equal(i, j)
     func = relay.Function([i, j], z, ret_type=relay.TensorType([], 'bool'))
-    i_data = relay.const(0)
-    j_data = relay.const(0)
+    i_data = relay.const(0, 'int32')
+    j_data = relay.const(0, 'int32')
     check_eval(func, [i_data, j_data], True)
 
 
@@ -96,10 +97,10 @@ def test_loop():
     i = relay.var('i', shape=[], dtype='int32')
     accum = relay.var('accum', shape=[], dtype='int32')
     sb = ScopeBuilder()
-    with sb.if_scope(relay.equal(i, relay.const(0))):
+    with sb.if_scope(relay.equal(i, relay.const(0, 'int32'))):
         sb.ret(accum)
     with sb.else_scope():
-        one_less = relay.subtract(i, relay.const(1))
+        one_less = relay.subtract(i, relay.const(1, 'int32'))
         new_accum = relay.add(accum, i)
         sb.ret(relay.Call(sum_up, [one_less, new_accum]))
     func = relay.Function([i, accum], sb.get())
diff --git a/tests/python/relay/test_debug.py b/tests/python/relay/test_debug.py
index 3463e2916147..d62e3026ad46 100644
--- a/tests/python/relay/test_debug.py
+++ b/tests/python/relay/test_debug.py
@@ -13,10 +13,11 @@ def did_exec(x):
         global _test_debug_hit
         _test_debug_hit = True
     prog = debug(x, debug_func=did_exec)
-    result = ex.evaluate(prog, { x: const(1) })
+    result = ex.evaluate(prog, { x: const(1, 'int32') })
     assert _test_debug_hit
     assert result.asnumpy() == 1
 
+
 def test_debug_with_expr():
     global _test_debug_hit
     _test_debug_hit = False
@@ -27,6 +28,6 @@ def did_exec(x):
         global _test_debug_hit
         _test_debug_hit = True
     prog = debug(x + x * x, debug_func=did_exec)
-    result = ex.evaluate(prog, { x: const(2) })
+    result = ex.evaluate(prog, { x: const(2, 'int32') })
     assert _test_debug_hit
     assert result.asnumpy() == 6
diff --git a/tests/python/relay/test_op_level3.py b/tests/python/relay/test_op_level3.py
index 31e87ef04856..e66a73cf84cb 100644
--- a/tests/python/relay/test_op_level3.py
+++ b/tests/python/relay/test_op_level3.py
@@ -329,7 +329,7 @@ def verify_full(fill_value, src_shape, dtype):
         for target, ctx in ctx_list():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
-                op_res = intrp.evaluate(func)(fill_value)
+                op_res = intrp.evaluate(func)(np.array(fill_value, dtype))
                 tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
     verify_full(4, (1, 3, 4, 4), "int32")
     verify_full(4.0, (1, 4), "float32")
@@ -365,7 +365,7 @@ def verify_full_like(base, fill_value, dtype):
         for target, ctx in ctx_list():
             for kind in ["graph", "debug"]:
                 intrp = relay.create_executor(kind, ctx=ctx, target=target)
-                op_res = intrp.evaluate(func)(x_data, fill_value)
+                op_res = intrp.evaluate(func)(x_data, np.array(fill_value, dtype))
                 tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
     verify_full_like((1, 3, 4, 4), 4, "int32")
     verify_full_like((1, 1), 44.0, "float32")
diff --git a/tests/python/relay/test_pass_alter_op_layout.py b/tests/python/relay/test_pass_alter_op_layout.py
index 6a8be7ea847e..0fa1f1d692d5 100644
--- a/tests/python/relay/test_pass_alter_op_layout.py
+++ b/tests/python/relay/test_pass_alter_op_layout.py
@@ -20,13 +20,13 @@ def before():
     @register_alter_op_layout("nn.conv2d", level=100)
     def alter_conv2d(attrs, inputs, tinfos):
         data, weight = inputs
-        weight = relay.multiply(weight, relay.const(2.0))
+        weight = relay.multiply(weight, relay.const(2.0, "float32"))
         return relay.nn.conv2d(data, weight, **attrs)
 
     def expected():
         x = relay.var("x", shape=(1, 64, 56, 56))
         weight = relay.var('weight', shape=(64, 64, 3, 3))
-        y = relay.nn.conv2d(x, relay.multiply(weight, relay.const(2.0)),
+        y = relay.nn.conv2d(x, relay.multiply(weight, relay.const(2.0, "float32")),
                             channels=64,
                             kernel_size=(3, 3),
                             padding=(1, 1))
@@ -313,4 +313,3 @@ def expected():
     test_alter_layout_dual_path()
     test_alter_layout_resnet()
     test_alter_layout_broadcast_op()
-
diff --git a/tests/python/unittest/test_arith_simplify.py b/tests/python/unittest/test_arith_simplify.py
index f6a78b6e3770..edb9b09f0388 100644
--- a/tests/python/unittest/test_arith_simplify.py
+++ b/tests/python/unittest/test_arith_simplify.py
@@ -21,8 +21,8 @@ def test_simplify():
     assert zz.a == x and zz.b.value == 4
 
     n = tvm.var('n')
-    assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n % (-1)), tvm.const(0))
-    assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n % 1), tvm.const(0))
+    assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n % (-1)), tvm.const(0, "int32"))
+    assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n % 1), tvm.const(0, "int32"))
     assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n / 1), n)
     tvm.ir_pass.CanonicalSimplify(n / (-1))
     # This is not true in the current implementation
@@ -67,10 +67,11 @@ def test_modular():
     ry = tvm.var("ry")
     y = tvm.var("y")
     x = tvm.var("x")
-    vmap = {rx: tvm.Range(tvm.const(0), tvm.const(3)),
-            ry: tvm.Range(tvm.const(0), tvm.const(3)),
-            y: tvm.Range(tvm.const(0), tvm.const(2)),
-            x: tvm.Range(tvm.const(0), tvm.const(14))}
+    i32_const = lambda x: tvm.const(x, "int32")
+    vmap = {rx: tvm.Range(i32_const(0), i32_const(3)),
+            ry: tvm.Range(i32_const(0), i32_const(3)),
+            y: tvm.Range(i32_const(0), i32_const(2)),
+            x: tvm.Range(i32_const(0), i32_const(14))}
     idx = ry * 16 + rx + y * 16 + x
     z1 = tvm.ir_pass.CanonicalSimplify(idx // 16, vmap)
     z2 = tvm.ir_pass.CanonicalSimplify(idx % 16, vmap)
@@ -82,4 +83,4 @@ def test_modular():
     test_modular()
     test_simplify()
     test_mul()
-    test_simplify_minmax()
\ No newline at end of file
+    test_simplify_minmax()
diff --git a/tests/python/unittest/test_lang_basic.py b/tests/python/unittest/test_lang_basic.py
index 079123d96ca0..09567c06fe41 100644
--- a/tests/python/unittest/test_lang_basic.py
+++ b/tests/python/unittest/test_lang_basic.py
@@ -1,20 +1,20 @@
 import tvm
 
 def test_const():
-    x = tvm.const(1)
+    x = tvm.const(1, "int32")
     print(x.dtype)
     assert x.dtype == tvm.int32
     assert isinstance(x, tvm.expr.IntImm)
 
 def test_make():
-    x = tvm.const(1)
+    x = tvm.const(1, "int32")
     y = tvm.var("x")
     z = x + y
     assert isinstance(tvm.max(x, y), tvm.expr.Max)
     assert isinstance(tvm.min(x, y), tvm.expr.Min)
 
 def test_ir():
-    x = tvm.const(1)
+    x = tvm.const(1, "int32")
     y = tvm.make.IntImm('int32', 1)
     z = x + y
     stmt = tvm.make.Evaluate(z)
diff --git a/tests/python/unittest/test_lang_operator.py b/tests/python/unittest/test_lang_operator.py
index af7d9fd5544a..371f276cb025 100644
--- a/tests/python/unittest/test_lang_operator.py
+++ b/tests/python/unittest/test_lang_operator.py
@@ -2,7 +2,7 @@
 
 def test_const_fold():
     def check(f, *args):
-        x = f(*[tvm.const(x) for x in args])
+        x = f(*[tvm.const(x, "int32") for x in args])
         y = f(*args)
         if not isinstance(x, (tvm.expr.IntImm, tvm.expr.UIntImm)) or x.value != int(y):
             raise ValueError("check error: %s vs %s " % (x, y))
diff --git a/tests/python/unittest/test_lang_reflection.py b/tests/python/unittest/test_lang_reflection.py
index 3ec760f20c76..103cd6c5f219 100644
--- a/tests/python/unittest/test_lang_reflection.py
+++ b/tests/python/unittest/test_lang_reflection.py
@@ -2,8 +2,8 @@
 
 def test_const_saveload_json():
     # save load json
-    x = tvm.const(1)
-    y = tvm.const(10)
+    x = tvm.const(1, "int32")
+    y = tvm.const(10, "int32")
     z = x + y
     z = z + z
     json_str = tvm.save_json(z)
@@ -13,8 +13,8 @@ def test_const_saveload_json():
 
 def test_make_smap():
     # save load json
-    x = tvm.const(1)
-    y = tvm.const(10)
+    x = tvm.const(1, "int32")
+    y = tvm.const(10, "int32")
     z = tvm.expr.Add(x, y)
     smap = tvm.convert({"z": z, "x": x})
     json_str = tvm.save_json(tvm.convert([smap]))
diff --git a/tests/python/unittest/test_pass_simplify.py b/tests/python/unittest/test_pass_simplify.py
index fce6eaed5a1f..a42230df815d 100644
--- a/tests/python/unittest/test_pass_simplify.py
+++ b/tests/python/unittest/test_pass_simplify.py
@@ -29,13 +29,13 @@ def test_basic():
 
 def test_bound():
     m = tvm.var('m')
-    vrange = tvm.convert({m: tvm.Range(tvm.const(0), tvm.const(10))})
+    vrange = tvm.convert({m: tvm.Range(tvm.const(0, "int32"), tvm.const(10, "int32"))})
     ret = tvm.ir_pass.Simplify(m % 10, vrange)
     assert ret == m
 
 def test_canonical():
     x = tvm.var("x")
-    z = tvm.const(3)
+    z = tvm.const(3, "int32")
     ret = tvm.ir_pass.CanonicalSimplify(x / (z*z) - x / (z*z))
     assert(tvm.ir_pass.Equal(ret, 0))
 
diff --git a/tests/python/unittest/test_pass_storage_rewrite.py b/tests/python/unittest/test_pass_storage_rewrite.py
index 3c07a1f26aff..faf70204c29e 100644
--- a/tests/python/unittest/test_pass_storage_rewrite.py
+++ b/tests/python/unittest/test_pass_storage_rewrite.py
@@ -238,7 +238,8 @@ def test_parallel_alloc():
     n = tvm.var("n")
     with ib.for_range(0, n, name="t") as i:
         ib.scope_attr(
-            tvm.const(1) , "pragma_scope", tvm.make.StringImm("parallel_launch_point"))
+            tvm.const(1, "int32") , "pragma_scope",
+            tvm.make.StringImm("parallel_launch_point"))
         with ib.for_range(0, n, name="i", for_type="parallel") as i:
             with ib.for_range(0, 10, name="j") as j:
                 A = ib.allocate("float32", n, name="A", scope="global")
diff --git a/tests/python/unittest/test_pass_unroll.py b/tests/python/unittest/test_pass_unroll.py
index 68467b0c04c6..c88a019a8bce 100644
--- a/tests/python/unittest/test_pass_unroll.py
+++ b/tests/python/unittest/test_pass_unroll.py
@@ -24,7 +24,7 @@ def test_unroll_loop():
     assert ret.for_type == tvm.stmt.For.Unrolled
 
     ib = tvm.ir_builder.create()
-    ib.scope_attr(tvm.const(0), "pragma_auto_unroll_max_step", 16)
+    ib.scope_attr(tvm.const(0, "int32"), "pragma_auto_unroll_max_step", 16)
     ib.emit(stmt)
     wrapped = ib.get()
     wrapped = tvm.make.Block(wrapped, stmt)
@@ -54,4 +54,4 @@ def test_unroll_fake_loop():
 
 if __name__ == "__main__":
     test_unroll_loop()
-    test_unroll_fake_loop()
\ No newline at end of file
+    test_unroll_fake_loop()
diff --git a/tests/python/unittest/test_schedule_schedule_ops.py b/tests/python/unittest/test_schedule_schedule_ops.py
index e59a73529d24..053b621aae3a 100644
--- a/tests/python/unittest/test_schedule_schedule_ops.py
+++ b/tests/python/unittest/test_schedule_schedule_ops.py
@@ -272,7 +272,8 @@ def _compute(*indice):
 
 def test_schedule_bound_condition():
    A = tvm.placeholder((64,), name='A', dtype="float32")
-   Apad = tvm.compute((66,), lambda i: tvm.select(tvm.all(i>0, i < 65), A[i-1], tvm.const(0.)), name='Apad')
+   Apad = tvm.compute((66,), lambda i: tvm.select(
+       tvm.all(i>0, i < 65), A[i-1], tvm.const(0., "float32")), name='Apad')
    Apad2 = tvm.compute((66,), lambda i: Apad[i]*2, name='Apad2')
    s = tvm.create_schedule(Apad2.op)
    AL1 = s.cache_read(A,"local",[Apad])
diff --git a/topi/python/topi/nn/bitserial_conv2d.py b/topi/python/topi/nn/bitserial_conv2d.py
index ca2efb0820c1..545ad2f38ae5 100644
--- a/topi/python/topi/nn/bitserial_conv2d.py
+++ b/topi/python/topi/nn/bitserial_conv2d.py
@@ -320,7 +320,7 @@ def _bitpack(*indices):
 
             element = data(*idx)
             for b in range(bits):
-                extracted_bit = ((element & tvm.const(masks[b])) >> b).astype(pack_type)
+                extracted_bit = ((element & tvm.const(masks[b], "int32")) >> b).astype(pack_type)
                 packed_data[b] = (packed_data[b] | extracted_bit)
                 if k < data_width - 1:
                     packed_data[b] = packed_data[b] << 1
diff --git a/topi/tests/python/test_topi_basic.py b/topi/tests/python/test_topi_basic.py
index 067bc5adab41..077331cd6d1a 100644
--- a/topi/tests/python/test_topi_basic.py
+++ b/topi/tests/python/test_topi_basic.py
@@ -4,7 +4,7 @@
 
 
 def test_util():
-    x = tvm.const(100)
+    x = tvm.const(100, "int32")
     assert util.get_const_int(x) == 100
     assert util.get_const_tuple((x, x)) == (100, 100)
 

From 2a269fc84a0c86c3a3c415c6dba7d32c5f51ee12 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Thu, 27 Dec 2018 19:52:14 -0800
Subject: [PATCH 2/4] fix vta

---
 vta/python/vta/ir_pass.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vta/python/vta/ir_pass.py b/vta/python/vta/ir_pass.py
index 3efef7135edb..c21ca6ed5bf4 100644
--- a/vta/python/vta/ir_pass.py
+++ b/vta/python/vta/ir_pass.py
@@ -700,7 +700,7 @@ def _flatten_loop(src_coeff, dst_coeff, extents):
             elif isinstance(loop_body.value, tvm.expr.Load):
                 alu_opcode = env.dev.ALU_OPCODE_SHR
                 lhs = loop_body.value
-                rhs = tvm.const(0)
+                rhs = tvm.const(0, "int32")
             else:
                 raise RuntimeError(
                     "Expression not recognized %s, %s, %s" % (

From 0714e47c95e4af93de03b64fd9a9e85a9d12edcf Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 28 Dec 2018 10:38:15 -0800
Subject: [PATCH 3/4] fix tutorial

---
 topi/tests/python/test_topi_math.py    | 2 +-
 tutorials/dev/low_level_custom_pass.py | 2 +-
 tutorials/optimize/opt_conv_cuda.py    | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/topi/tests/python/test_topi_math.py b/topi/tests/python/test_topi_math.py
index 22713aa6cfdd..a7f36592cd6c 100644
--- a/topi/tests/python/test_topi_math.py
+++ b/topi/tests/python/test_topi_math.py
@@ -6,7 +6,7 @@
 
 
 def test_util():
-    x = tvm.const(100)
+    x = tvm.const(100, "int32")
     assert util.get_const_int(x) == 100
     assert util.get_const_tuple((x, x)) == (100, 100)
 
diff --git a/tutorials/dev/low_level_custom_pass.py b/tutorials/dev/low_level_custom_pass.py
index f2f38207931f..bbe826db0e9f 100644
--- a/tutorials/dev/low_level_custom_pass.py
+++ b/tutorials/dev/low_level_custom_pass.py
@@ -34,7 +34,7 @@
 # our customized lowering pass to manipulate the IR directly instead of using schedule premitives.
 #
 
-n = tvm.const(128)
+n = tvm.const(128, "int32")
 a = tvm.placeholder((n, ), name="a")
 b = tvm.placeholder((n, ), name="b")
 c = tvm.compute((n, ), lambda i: a[i] + b[i], name='c')
diff --git a/tutorials/optimize/opt_conv_cuda.py b/tutorials/optimize/opt_conv_cuda.py
index 52e0417f20ca..7cb087f43f51 100644
--- a/tutorials/optimize/opt_conv_cuda.py
+++ b/tutorials/optimize/opt_conv_cuda.py
@@ -46,7 +46,7 @@
     lambda yy, xx, cc, nn: tvm.select(
         tvm.all(yy >= pad, yy - pad < in_size,
                 xx >= pad, xx - pad < in_size),
-        A[yy - pad, xx - pad, cc, nn], tvm.const(0.)),
+        A[yy - pad, xx - pad, cc, nn], tvm.const(0., "float32")),
     name='Apad')
 # Create reduction variables
 rc = tvm.reduce_axis((0, in_channel), name='rc')
@@ -64,7 +64,7 @@
 ###############################################################################
 # Memory Hierarchy
 # ----------------
-# 
+#
 # We first specify the memory hierarchy for buffers. The figure below shows the
 # GPU memory hierarchy. One important difference from CPU memory hierarchy is
 # that GPU provides a cache buffer called shared memory, which is managed by

From 54210ad97362eb22176623fa297ed3d7f947bf25 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Fri, 28 Dec 2018 16:22:13 -0800
Subject: [PATCH 4/4] Remove cpp test because most of them are already in
 pytest

---
 topi/tests/python_cpp/test_topi_basic.py     |  36 --
 topi/tests/python_cpp/test_topi_bnn.py       |  55 ---
 topi/tests/python_cpp/test_topi_clip.py      |  44 ---
 topi/tests/python_cpp/test_topi_dense.py     |  61 ---
 topi/tests/python_cpp/test_topi_dilate.py    |  35 --
 topi/tests/python_cpp/test_topi_l2norm.py    |  48 ---
 topi/tests/python_cpp/test_topi_lrn.py       |  44 ---
 topi/tests/python_cpp/test_topi_pooling.py   | 132 -------
 topi/tests/python_cpp/test_topi_reduce.py    | 147 -------
 topi/tests/python_cpp/test_topi_region.py    |  52 ---
 topi/tests/python_cpp/test_topi_relu.py      |  90 -----
 topi/tests/python_cpp/test_topi_reorg.py     |  52 ---
 topi/tests/python_cpp/test_topi_softmax.py   |  82 ----
 topi/tests/python_cpp/test_topi_tensor.py    |  81 ----
 topi/tests/python_cpp/test_topi_transform.py | 384 -------------------
 15 files changed, 1343 deletions(-)
 delete mode 100644 topi/tests/python_cpp/test_topi_basic.py
 delete mode 100644 topi/tests/python_cpp/test_topi_bnn.py
 delete mode 100644 topi/tests/python_cpp/test_topi_clip.py
 delete mode 100644 topi/tests/python_cpp/test_topi_dense.py
 delete mode 100644 topi/tests/python_cpp/test_topi_dilate.py
 delete mode 100644 topi/tests/python_cpp/test_topi_l2norm.py
 delete mode 100644 topi/tests/python_cpp/test_topi_lrn.py
 delete mode 100644 topi/tests/python_cpp/test_topi_pooling.py
 delete mode 100644 topi/tests/python_cpp/test_topi_reduce.py
 delete mode 100644 topi/tests/python_cpp/test_topi_region.py
 delete mode 100644 topi/tests/python_cpp/test_topi_relu.py
 delete mode 100644 topi/tests/python_cpp/test_topi_reorg.py
 delete mode 100644 topi/tests/python_cpp/test_topi_softmax.py
 delete mode 100644 topi/tests/python_cpp/test_topi_tensor.py
 delete mode 100644 topi/tests/python_cpp/test_topi_transform.py

diff --git a/topi/tests/python_cpp/test_topi_basic.py b/topi/tests/python_cpp/test_topi_basic.py
deleted file mode 100644
index 1057f746b004..000000000000
--- a/topi/tests/python_cpp/test_topi_basic.py
+++ /dev/null
@@ -1,36 +0,0 @@
-import tvm
-import topi
-from topi import util
-
-
-def test_util():
-    x = tvm.const(100)
-    assert util.get_const_int(x) == 100
-    assert util.get_const_tuple((x, x)) == (100, 100)
-
-
-def test_ewise():
-    m = tvm.var('m')
-    l = tvm.var('l')
-    A = tvm.placeholder((m, l), name='A')
-
-    def test_apply(func, name):
-        B = func(A)
-        assert tuple(B.shape) == tuple(A.shape)
-        assert B.op.body[0].name == name
-
-    test_apply(topi.cpp.exp, "exp")
-    test_apply(topi.cpp.tanh, "tanh")
-    test_apply(topi.cpp.sigmoid, "sigmoid")
-    test_apply(topi.cpp.log, "log")
-    test_apply(topi.cpp.sqrt, "sqrt")
-
-def test_flatten_tag():
-    A = tvm.placeholder((3, 4), name='A')
-    B = topi.cpp.nn.flatten(A)
-    assert B.op.tag == topi.tag.INJECTIVE
-
-if __name__ == "__main__":
-    test_util()
-    test_ewise()
-    test_flatten_tag()
diff --git a/topi/tests/python_cpp/test_topi_bnn.py b/topi/tests/python_cpp/test_topi_bnn.py
deleted file mode 100644
index 83d880311eff..000000000000
--- a/topi/tests/python_cpp/test_topi_bnn.py
+++ /dev/null
@@ -1,55 +0,0 @@
-"""Test code for binary neural network operators."""
-import numpy as np
-import tvm
-import topi
-from topi.util import get_const_tuple
-from tvm.contrib.pickle_memoize import memoize
-
-
-def verify_binary_dense(batch, in_dim, out_dim):
-    A = tvm.placeholder((batch, in_dim), name='A')
-    B = tvm.placeholder((out_dim, in_dim), name='B')
-    bnn_A = topi.cpp.nn.binarize_pack(A, 1)
-    bnn_B = topi.cpp.nn.binarize_pack(B, 1)
-    # binary dense
-    bnn_A1 = tvm.placeholder(bnn_A.shape, dtype=bnn_A.dtype)
-    bnn_B1 = tvm.placeholder(bnn_B.shape, dtype=bnn_B.dtype)
-    bnn_C = topi.cpp.nn.binary_dense(bnn_A1, bnn_B1)
-    # schedule
-    target = topi.cpp.TEST_create_target("llvm")
-    s1 = topi.cpp.x86.schedule_binarize_pack(target, [bnn_A])
-    s2 = topi.cpp.x86.schedule_binarize_pack(target, [bnn_B])
-    s3 = topi.cpp.x86.schedule_binary_dense(target, [bnn_C])
-
-    dtype = A.dtype
-    @memoize("topi.tests.test_topi_binary_dense")
-    def get_ref_data():
-        # generate random matrix of +1 or -1 value
-        a_np = (np.random.randint(2, size=(batch, in_dim)) * 2 - 1).astype(dtype)
-        b_np = (np.random.randint(2, size=(out_dim, in_dim)) * 2 - 1).astype(dtype)
-        c_np = np.dot(a_np, b_np.T)
-        return (a_np, b_np, c_np)
-
-    a_np, b_np, c_np = get_ref_data()
-
-    ctx = tvm.cpu(0)
-    a = tvm.nd.array(a_np, ctx)
-    b = tvm.nd.array(b_np, ctx)
-    bnn_a = tvm.nd.array(np.zeros(get_const_tuple(bnn_A.shape), dtype=bnn_A.dtype), ctx)
-    bnn_b = tvm.nd.array(np.zeros(get_const_tuple(bnn_B.shape), dtype=bnn_B.dtype), ctx)
-    bnn_c = tvm.nd.array(np.zeros(get_const_tuple(bnn_C.shape), dtype=bnn_C.dtype), ctx)
-    f1 = tvm.build(s1, [A, bnn_A], 'llvm')
-    f2 = tvm.build(s2, [B, bnn_B], 'llvm')
-    f3 = tvm.build(s3, [bnn_A1, bnn_B1, bnn_C], 'llvm')
-    f1(a, bnn_a)
-    f2(b, bnn_b)
-    f3(bnn_a, bnn_b, bnn_c)
-    tvm.testing.assert_allclose(bnn_c.asnumpy(), c_np, rtol=1e-5)
-
-def test_binary_dense():
-    verify_binary_dense(1, 4096, 1024)
-    verify_binary_dense(1, 1024, 1000)
-
-
-if __name__ == "__main__":
-    test_binary_dense()
diff --git a/topi/tests/python_cpp/test_topi_clip.py b/topi/tests/python_cpp/test_topi_clip.py
deleted file mode 100644
index d1aca4cb904c..000000000000
--- a/topi/tests/python_cpp/test_topi_clip.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""Test code for clip operator"""
-import numpy as np
-import tvm
-import topi
-from topi.util import get_const_tuple
-from tvm.contrib.pickle_memoize import memoize
-
-
-def verify_clip(N, a_min, a_max, dtype):
-    A = tvm.placeholder((N, N), dtype=dtype, name='A')
-    B = topi.cpp.clip(A, a_min, a_max)
-
-    # use memoize to pickle the test data for next time use
-    @memoize("topi.tests.test_topi_clip")
-    def get_ref_data():
-        a_np = np.random.uniform(a_min*2, a_max*2, size=(N, N)).astype(dtype)
-        b_np = np.clip(a_np, a_min, a_max)
-        return a_np, b_np
-    a_np, b_np = get_ref_data()
-
-    def check_device(device):
-        if not tvm.module.enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-        target = topi.cpp.TEST_create_target(device)
-        s = topi.cpp.generic.default_schedule(target, [B], False)
-        ctx = tvm.cpu(0) if device == "llvm" else tvm.gpu(0)
-        a = tvm.nd.array(a_np, ctx)
-        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
-        f = tvm.build(s, [A, B], device, name="clip")
-        f(a, b)
-        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
-    for device in ['llvm']:
-        check_device(device)
-
-def test_clip():
-    verify_clip(1024, -127, 127, 'int8')
-    verify_clip(1024, -127, 127, 'int16')
-    verify_clip(1024, -127, 127, 'float32')
-
-
-if __name__ == "__main__":
-    test_clip()
diff --git a/topi/tests/python_cpp/test_topi_dense.py b/topi/tests/python_cpp/test_topi_dense.py
deleted file mode 100644
index 636257de7919..000000000000
--- a/topi/tests/python_cpp/test_topi_dense.py
+++ /dev/null
@@ -1,61 +0,0 @@
-"""Test code for dense operator"""
-import numpy as np
-import tvm
-import topi
-from topi.util import get_const_tuple
-from tvm.contrib.pickle_memoize import memoize
-
-
-def verify_dense(batch, in_dim, out_dim, use_bias=True):
-    A = tvm.placeholder((batch, in_dim), name='A')
-    B = tvm.placeholder((out_dim, in_dim), name='B')
-    C = tvm.placeholder((out_dim,), name='C')
-    D = topi.cpp.nn.dense(A, B, C if use_bias else None)
-    D = topi.cpp.nn.relu(D)
-    dtype = A.dtype
-
-    # use memoize to pickle the test data for next time use
-    @memoize("topi.tests.test_topi_dense")
-    def get_ref_data():
-        a_np = np.random.uniform(size=(batch, in_dim)).astype(dtype)
-        b_np = np.random.uniform(size=(out_dim, in_dim)).astype(dtype)
-        c_np = np.random.uniform(size=(out_dim,)).astype(dtype)
-        if use_bias:
-            d_np = np.maximum(np.dot(a_np, b_np.T) + c_np, 0.0)
-        else:
-            d_np = np.maximum(np.dot(a_np, b_np.T), 0.0)
-        return (a_np, b_np, c_np, d_np)
-    # get the test data
-    a_np, b_np, c_np, d_np = get_ref_data()
-
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.schedule_dense(target, [D])
-        elif device == "rocm":
-            s = topi.cpp.rocm.schedule_dense(target, [D])
-        else:
-            s = topi.cpp.cuda.schedule_dense(target, [D])
-        a = tvm.nd.array(a_np, ctx)
-        b = tvm.nd.array(b_np, ctx)
-        c = tvm.nd.array(c_np, ctx)
-        d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
-        f = tvm.build(s, [A, B, C, D], device, name="dense")
-        f(a, b, c, d)
-        tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
-
-    for device in ['cuda', 'opencl', 'metal', 'rocm']:
-        check_device(device)
-
-def test_dense():
-    verify_dense(1, 1024, 1000, use_bias=True)
-    verify_dense(1, 1024, 1000, use_bias=False)
-
-
-if __name__ == "__main__":
-    test_dense()
diff --git a/topi/tests/python_cpp/test_topi_dilate.py b/topi/tests/python_cpp/test_topi_dilate.py
deleted file mode 100644
index 1f7f1d8bceeb..000000000000
--- a/topi/tests/python_cpp/test_topi_dilate.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import tvm
-import topi
-import topi.testing
-import numpy as np
-
-def test_dilate():
-    target = 'llvm'
-    ctx = tvm.cpu(0)
-
-    def _test_dilate(input_size, strides):
-        Input = tvm.placeholder((input_size))
-        Output = topi.cpp.nn.dilate(Input, strides)
-        tgt = topi.cpp.TEST_create_target(target)
-        schedule = topi.cpp.generic.default_schedule(tgt, [Output], True)
-        input_np = np.random.uniform(size=input_size).astype(Input.dtype)
-        output_np = topi.testing.dilate_python(input_np, strides)
-        input_tvm = tvm.nd.array(input_np, ctx=ctx)
-        output_size = topi.util.get_const_tuple(Output.shape)
-        output_tvm = tvm.nd.array(np.zeros(shape=output_size).astype(Output.dtype), ctx=ctx)
-        f = tvm.build(schedule, [Input, Output], target)
-        f(input_tvm, output_tvm)
-        tvm.testing.assert_allclose(output_tvm.asnumpy(), output_np, rtol=1e-5)
-
-    _test_dilate((32,), (2,))
-    _test_dilate((32,32), (2,2))
-    _test_dilate((1,3,32,32), (1,1,1,1))
-    _test_dilate((1,3,32,32), (2,2,2,2))
-    _test_dilate((1,32,32,3,3), (1,1,1,1,1))
-    _test_dilate((1,32,32,3,3), (2,2,2,2,2))
-    _test_dilate((1,32,32,32,3,3), (1,1,1,2,2,2))
-    _test_dilate((1,32,32,32,3,3), (2,2,2,1,1,1))
-
-
-if __name__ == "__main__":
-    test_dilate()
diff --git a/topi/tests/python_cpp/test_topi_l2norm.py b/topi/tests/python_cpp/test_topi_l2norm.py
deleted file mode 100644
index fef2710b8d79..000000000000
--- a/topi/tests/python_cpp/test_topi_l2norm.py
+++ /dev/null
@@ -1,48 +0,0 @@
-"""Test code for l2 normalization"""
-import numpy as np
-import tvm
-import topi
-import logging
-from topi.util import get_const_tuple
-import topi.testing
-
-def verify_l2_normalize(shape, eps, axis=None):
-    '''Verify l2 normalization operator by comparing outputs from tvm and numpy implementation'''
-    A = tvm.placeholder(shape, name='A')
-    B = topi.cpp.nn.l2_normalize(A, eps, axis)
-    dtype = A.dtype
-
-    a_np = np.random.uniform(size=shape).astype(dtype)
-    b_np = topi.testing.l2_normalize_python(a_np, eps, axis)
-
-    def check_device(device):
-        if not tvm.module.enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.default_schedule(target, [B], False)
-        else:
-            s = topi.cpp.cuda.schedule_l2_normalize(target, [B])
-        ctx = tvm.context(device, 0)
-        a = tvm.nd.array(a_np, ctx)
-        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
-        func = tvm.build(s, [A, B], device, name="l2_normalize")
-        func(a, b)
-        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
-    for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm']:
-        check_device(device)
-
-def test_l2_normalize():
-    verify_l2_normalize((1, 3, 20, 20), 0.001)
-    verify_l2_normalize((1, 3, 20, 20), 0.001, (1,))
-    verify_l2_normalize((1, 3, 20, 20), 0.001, (1, 2))
-    verify_l2_normalize((1, 3, 20, 20), 0.001, (2, 3))
-    verify_l2_normalize((1, 3, 20, 20), 0.001, (0, 3))
-    verify_l2_normalize((1, 3, 20, 20), 0.001, (0, 2, 3))
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.DEBUG)
-    test_l2_normalize()
diff --git a/topi/tests/python_cpp/test_topi_lrn.py b/topi/tests/python_cpp/test_topi_lrn.py
deleted file mode 100644
index 14a0eaa27781..000000000000
--- a/topi/tests/python_cpp/test_topi_lrn.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""Test code for LRN"""
-import numpy as np
-import tvm
-import topi
-import logging
-from topi.util import get_const_tuple
-import topi.testing
-
-def verify_lrn(shape, size, axis, bias, alpha, beta):
-    '''Verify Local response normalization operator by comparing outputs from tvm and numpy implementation'''
-    A = tvm.placeholder(shape, name='A')
-    B = topi.cpp.nn.lrn(A, size, axis, alpha, beta, bias)
-    dtype = A.dtype
-
-    a_np = np.random.uniform(size=shape).astype(dtype)
-    b_np = topi.testing.lrn_python(a_np, size, axis, bias, alpha, beta)
-    def check_device(device):
-        if not tvm.module.enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.default_schedule(target, [B], False)
-        else:
-            s = topi.cpp.cuda.schedule_lrn(target, [B])
-        ctx = tvm.context(device, 0)
-        a = tvm.nd.array(a_np, ctx)
-        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
-        f = tvm.build(s, [A, B], device)
-        f(a, b)
-        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-1)
-
-    for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm']:
-        check_device(device)
-
-def test_lrn():
-    verify_lrn((1, 3, 5, 5), 3, 3, 1.0, 1.0, 0.5)
-    verify_lrn((1, 3, 5, 5), 3, 3, 1.0, 1.0, 0.5)
-    verify_lrn((1, 3, 20, 20), 3, 1, 2.0, 1.0, 0.75)
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.DEBUG)
-    test_lrn()
diff --git a/topi/tests/python_cpp/test_topi_pooling.py b/topi/tests/python_cpp/test_topi_pooling.py
deleted file mode 100644
index 9997fb6738c2..000000000000
--- a/topi/tests/python_cpp/test_topi_pooling.py
+++ /dev/null
@@ -1,132 +0,0 @@
-"""Test code for pooling"""
-import numpy as np
-import tvm
-import topi
-import math
-from topi.util import get_const_tuple
-
-pool_code = {
-    "avg": 0,
-    "max": 1
-}
-def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_pad=True):
-    iw = ih
-    kw = kh
-    sw = sh
-    pt, pl, pb, pr = padding
-    A = tvm.placeholder((n, ic, ih, iw), name='A')
-    B = topi.cpp.nn.pool(A, [kh, kw], [sh, sw], padding,
-                         pool_code[pool_type], ceil_mode, "NCHW", count_include_pad)
-    B = topi.cpp.nn.relu(B)
-    dtype = A.dtype
-
-    bshape = get_const_tuple(B.shape)
-    ashape = get_const_tuple(A.shape)
-    if ceil_mode:
-        assert bshape[2] == int(math.ceil(float(ashape[2] - kh + pt + pb) / sh) + 1)
-        assert bshape[3] == int(math.ceil(float(ashape[3] - kw + pl + pr) / sw) + 1)
-    else:
-        assert bshape[2] == int(math.floor(float(ashape[2] - kh + pt + pb) / sh) + 1)
-        assert bshape[3] == int(math.floor(float(ashape[3] - kw + pl + pr) / sw) + 1)
-
-
-    a_np = np.random.uniform(size=(n, ic, ih, iw)).astype(dtype)
-    pad_np = np.zeros(shape=(n, ic, ih+pt+pb, iw+pl+pr)).astype(dtype)
-    no_zero = (range(n), range(ic), (range(pt, ih+pt)), (range(pl, iw+pl)))
-    pad_np[np.ix_(*no_zero)] = a_np
-    _, oc, oh, ow = get_const_tuple(B.shape)
-    b_np = np.zeros(shape=(n, oc, oh, ow)).astype(dtype)
-
-    if pool_type == 'avg':
-        for i in range(oh):
-            for j in range(ow):
-                if count_include_pad:
-                    b_np[:,:,i,j] = np.mean(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3))
-                else:
-                    pad_count = np.sum(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw] > 0, axis=(2,3))
-                    b_np[:,:,i,j] = np.sum(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3)) / np.maximum(pad_count, 1)
-
-    elif pool_type =='max':
-        for i in range(oh):
-            for j in range(ow):
-                b_np[:,:,i,j] = np.max(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3))
-    b_np = np.maximum(b_np, 0.0)
-
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.default_schedule(target, [B], False)
-        else:
-            s = topi.cpp.cuda.schedule_pool(target, [B])
-        a = tvm.nd.array(a_np, ctx)
-        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
-        f = tvm.build(s, [A, B], device)
-        f(a, b)
-        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
-    for device in ['cuda', 'opencl', 'metal', 'rocm']:
-        check_device(device)
-
-def test_pool():
-    verify_pool(1, 256, 32, 2, 2, [0, 0, 0, 0], 'avg', False, True)
-    verify_pool(1, 256, 31, 3, 3, [1, 2, 1, 2], 'avg', False, True)
-    verify_pool(1, 256, 32, 2, 2, [1, 2, 1, 2], 'avg', False, False)
-    verify_pool(1, 256, 31, 4, 4, [3, 3, 3, 3], 'avg', False, False)
-    verify_pool(1, 256, 31, 4, 4, [0, 0, 0, 0], 'avg', False, False)
-    verify_pool(1, 256, 32, 2, 2, [0, 0, 0, 0], 'max', False)
-    verify_pool(1, 256, 31, 3, 3, [2, 1, 2, 1], 'max', False)
-    verify_pool(1, 256, 31, 3, 3, [2, 1, 2, 1], 'max', True)
-
-    verify_pool(1, 256, 31, 3, 3, [2, 1, 0, 3], 'avg', False, True)
-    verify_pool(1, 256, 32, 2, 2, [0, 3, 2, 1], 'avg', False, False)
-    verify_pool(1, 256, 31, 3, 3, [1, 0, 3, 2], 'max', False)
-    verify_pool(1, 256, 31, 3, 3, [3, 2, 1, 0], 'max', True)
-
-
-def verify_global_pool(n, c, h, w, pool_type):
-    A = tvm.placeholder((n, c, h, w), name='A')
-    B = topi.cpp.nn.global_pool(A, pool_code[pool_type])
-    B = topi.cpp.nn.relu(B)
-
-    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
-    if pool_type == 'avg':
-        b_np = np.mean(a_np, axis=(2,3), keepdims=True)
-    elif pool_type =='max':
-        b_np = np.max(a_np, axis=(2,3), keepdims=True)
-    b_np = np.maximum(b_np, 0.0)
-
-    def check_device(device):
-        if not tvm.module.enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.default_schedule(target, [B], False)
-        else:
-            s = topi.cpp.cuda.schedule_global_pool(target, [B])
-        ctx = tvm.context(device, 0)
-        a = tvm.nd.array(a_np, ctx)
-        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
-        f = tvm.build(s, [A, B], device)
-        f(a, b)
-        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
-    for device in ['cuda', 'opencl', 'metal', 'rocm']:
-        check_device(device)
-
-def test_global_pool():
-    verify_global_pool(1, 1024, 7, 7, 'avg')
-    verify_global_pool(4, 1024, 7, 7, 'avg')
-    verify_global_pool(1, 1024, 7, 7, 'max')
-    verify_global_pool(4, 1024, 7, 7, 'max')
-
-
-if __name__ == "__main__":
-    test_pool()
-    test_global_pool()
diff --git a/topi/tests/python_cpp/test_topi_reduce.py b/topi/tests/python_cpp/test_topi_reduce.py
deleted file mode 100644
index dbfa3683fa66..000000000000
--- a/topi/tests/python_cpp/test_topi_reduce.py
+++ /dev/null
@@ -1,147 +0,0 @@
-"""Test code for reduce."""
-import os
-import numpy as np
-import tvm
-import topi
-
-def _my_npy_argmax(arr, axis, keepdims):
-    if not keepdims:
-        return arr.argmax(axis=axis)
-    else:
-        if axis is not None:
-            out_shape = list(arr.shape)
-            out_shape[axis] = 1
-        else:
-            out_shape = [1 for _ in range(len(arr.shape))]
-        return arr.argmax(axis=axis).reshape(out_shape)
-
-
-def _my_npy_argmin(arr, axis, keepdims):
-    if not keepdims:
-        return arr.argmin(axis=axis)
-    else:
-        out_shape = list(arr.shape)
-        out_shape[axis] = 1
-        return arr.argmin(axis=axis).reshape(out_shape)
-
-def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum"):
-    # Build the logic and compile the function
-    dat_dtype = "float32"
-    A = tvm.placeholder(shape=in_shape, name="A", dtype=dat_dtype)
-    A1 = topi.cpp.sqrt(topi.cpp.exp(A))
-    out_dtype = "float32"
-    if type == "sum":
-        B = topi.cpp.sum(A1, axis, keepdims)
-    elif type == "max":
-        B = topi.cpp.max(A1, axis, keepdims)
-    elif type == "min":
-        B = topi.cpp.min(A1, axis, keepdims)
-    elif type == "argmax":
-        B = topi.cpp.argmax(A1, axis, keepdims)
-        out_dtype = "int32"
-    elif type == "argmin":
-        B = topi.cpp.argmin(A1, axis, keepdims)
-        out_dtype = "int32"
-    elif type == "prod":
-        B = topi.cpp.prod(A1, axis, keepdims)
-    else:
-        raise NotImplementedError
-
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.default_schedule(target, [B], True)
-        else:
-            s = topi.cpp.cuda.schedule_reduce(target, [B])
-
-        foo = tvm.build(s, [A, B], device, name=type)
-        # Test
-        in_npy = np.random.uniform(size=in_shape).astype(np.float32)
-        in_npy_map = np.sqrt(np.exp(in_npy)).astype(np.float32)
-        if type == "sum":
-            out_npy = in_npy_map.sum(axis=axis, keepdims=keepdims)
-        elif type == "max":
-            out_npy = in_npy_map.max(axis=axis, keepdims=keepdims)
-        elif type == "min":
-            out_npy = in_npy_map.min(axis=axis, keepdims=keepdims)
-        elif type == "argmax":
-            out_npy = _my_npy_argmax(in_npy_map, axis=axis, keepdims=keepdims)
-        elif type == "argmin":
-            out_npy = _my_npy_argmin(in_npy_map, axis=axis, keepdims=keepdims)
-        elif type == "prod":
-            out_npy = in_npy_map.prod(axis=axis, keepdims=keepdims)
-        else:
-            raise NotImplementedError
-        data_tvm = tvm.nd.array(in_npy, ctx=ctx)
-        out_tvm = tvm.nd.empty(shape=out_npy.shape, ctx=ctx, dtype=out_dtype)
-        for _ in range(1):
-            foo(data_tvm, out_tvm)
-        if type == "argmax" or type == "argmin":
-            out_tvm_indices = out_tvm.asnumpy()
-            if keepdims:
-                out_tvm_indices = np.take(out_tvm_indices, indices=0, axis=axis)
-            if axis is None:
-                out_tvm_val = in_npy_map.ravel()[out_tvm_indices]
-            else:
-                other_indices = tuple(np.indices(in_shape[0:axis] + in_shape[(axis+1):]))
-                sel_indices = other_indices[0:axis] + (out_tvm_indices,) + other_indices[axis:]
-                out_tvm_val = in_npy_map[sel_indices]
-            if type == "argmax":
-                tvm.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1E-3, 1E-3)
-            elif type == "argmin":
-                tvm.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1E-3, 1E-3)
-        else:
-            tvm.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1E-3, 1E-3)
-    for device in ["cuda", "opencl", "metal", "llvm", "rocm"]:
-        check_device(device)
-
-
-def test_reduce_map():
-    verify_reduce_map_ele(in_shape=(128, 24, 128, 24),
-                          axis=(1, 2, 3),
-                          keepdims=True,
-                          type="sum")
-    verify_reduce_map_ele(in_shape=(128, 24 * 128 * 24),
-                          axis=(1,),
-                          keepdims=False,
-                          type="max")
-    verify_reduce_map_ele(in_shape=(32, 128, 24),
-                          axis=None,
-                          keepdims=True,
-                          type="sum")
-    verify_reduce_map_ele(in_shape=(128, 24, 128, 24),
-                          axis=(0, 2),
-                          keepdims=False,
-                          type="min")
-    verify_reduce_map_ele(in_shape=(128, 4, 4, 128),
-                          axis=(1, ),
-                          keepdims=True,
-                          type="prod")
-    verify_reduce_map_ele(in_shape=(4, 4),
-                          axis=(0, 1),
-                          keepdims=False,
-                          type="prod")
-    verify_reduce_map_ele(in_shape=(32, 128),
-                          axis=1,
-                          keepdims=True,
-                          type="argmax")
-    verify_reduce_map_ele(in_shape=(32, 24, 32, 24),
-                          axis=2,
-                          keepdims=False,
-                          type="argmin")
-    verify_reduce_map_ele(in_shape=(31, 21, 15),
-                          axis=None,
-                          keepdims=True,
-                          type="argmax")
-    verify_reduce_map_ele(in_shape=(31, 21, 15),
-                          axis=None,
-                          keepdims=False,
-                          type="sum")
-
-if __name__ == "__main__":
-    test_reduce_map()
diff --git a/topi/tests/python_cpp/test_topi_region.py b/topi/tests/python_cpp/test_topi_region.py
deleted file mode 100644
index 28e984b70244..000000000000
--- a/topi/tests/python_cpp/test_topi_region.py
+++ /dev/null
@@ -1,52 +0,0 @@
-"""Test code for region"""
-import logging
-import numpy as np
-import tvm
-import topi
-import topi.testing
-from topi.util import get_const_tuple
-
-def verify_region(batch, in_size, in_channel, n, classes, coords, background, l_softmax):
-    '''Verify region operator by comparing outputs from tvm and numpy implementation'''
-    in_height = in_width = in_size
-
-    A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
-    B = topi.cpp.yolo.region(A, n, classes, coords, background, l_softmax)
-
-    a_shape = get_const_tuple(A.shape)
-    dtype = A.dtype
-
-    def get_ref_data_region():
-        '''Randomly initialize the data variables and get refernce output for the region operation'''
-        a_np = np.random.uniform(size=a_shape).astype(dtype)
-        b_np = topi.testing.region_python(a_np, n, classes, coords, background, l_softmax)
-        return a_np, b_np
-
-    a_np, b_np = get_ref_data_region()
-    def check_device(device):
-        '''Check the device is available and if so, build and run the program'''
-        if not tvm.module.enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.default_schedule(target, [B], False)
-        else:
-            s = topi.cpp.rocm.schedule_region(target, [B])
-        ctx = tvm.context(device, 0)
-        a = tvm.nd.array(a_np, ctx)
-        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
-        func = tvm.build(s, [A, B], device, name="region")
-        func(a, b)
-        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
-    for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm', 'vulkan']:
-        check_device(device)
-
-def test_region():
-    verify_region(1, 19, 425, 5, 80, 4, 0, 1)
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.DEBUG)
-    test_region()
diff --git a/topi/tests/python_cpp/test_topi_relu.py b/topi/tests/python_cpp/test_topi_relu.py
deleted file mode 100644
index 3b1b00ec8f67..000000000000
--- a/topi/tests/python_cpp/test_topi_relu.py
+++ /dev/null
@@ -1,90 +0,0 @@
-"""Test code for relu activation"""
-import os
-import numpy as np
-import tvm
-import topi
-from topi.util import get_const_tuple
-
-def verify_relu(m, n, dtype):
-    A = tvm.placeholder((m, n), name='A', dtype=dtype)
-    B = topi.cpp.nn.relu(A)
-    assert B.dtype == dtype
-
-    a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype)
-    b_np = a_np * (a_np > 0)
-
-    def check_device(device):
-        if not tvm.module.enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.schedule_injective(target, [B])
-        else:
-            s = topi.cpp.cuda.schedule_injective(target, [B])
-        ctx = tvm.context(device, 0)
-        a = tvm.nd.array(a_np, ctx)
-        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
-        foo = tvm.build(s, [A, B], device, name="relu")
-        foo(a, b)
-        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
-    for device in ['cuda', 'opencl', 'metal', 'rocm']:
-        check_device(device)
-
-
-def verify_leaky_relu(m, alpha):
-    A = tvm.placeholder((m,), name='A')
-    B = topi.cpp.nn.leaky_relu(A, alpha)
-    device = "llvm"
-    target = topi.cpp.TEST_create_target(device)
-    s = topi.cpp.generic.schedule_injective(target, [B])
-
-    a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype)
-    b_np = a_np * (a_np > 0) + a_np * (a_np < 0) * alpha
-    ctx = tvm.cpu(0)
-    a = tvm.nd.array(a_np, ctx)
-    b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
-    foo = tvm.build(s, [A, B], device, name="leaky_relu")
-    foo(a, b)
-    tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
-def verify_prelu(x, w, axis, weight_reshape):
-    X = tvm.placeholder((x), name='X')
-    W = tvm.placeholder((w), name='W')
-    x_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(X.shape)).astype(X.dtype)
-    w_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(W.shape)).astype(W.dtype)
-    def _prelu_numpy(x, W):
-        return (x < 0) * (x *W.reshape(weight_reshape)) + (x>=0) * x
-
-    out_np = _prelu_numpy(x_np, w_np)
-    B = topi.cpp.nn.prelu(X, W, axis)
-    device = "llvm"
-    target = topi.cpp.TEST_create_target(device)
-    s = topi.cpp.generic.schedule_injective(target, [B])
-
-    ctx = tvm.cpu(0)
-    x_tvm = tvm.nd.array(x_np, ctx)
-    w_tvm = tvm.nd.array(w_np, ctx)
-
-    b = tvm.nd.array(np.zeros(get_const_tuple(X.shape), dtype=B.dtype), ctx)
-    foo = tvm.build(s, [X, W, B], "llvm", name="prelu")
-    foo(x_tvm, w_tvm, b)
-    tvm.testing.assert_allclose(b.asnumpy(), out_np, rtol=1e-5)
-
-def test_relu():
-    for dtype in ['float32', 'float64', 'int32', 'int16', 'int8', 'int64']:
-        verify_relu(10, 128, dtype)
-
-def test_leaky_relu():
-    verify_leaky_relu(100, 0.5)
-
-def test_prelu():
-    verify_prelu((1, 3, 2, 2), (3,), 1, (3, 1, 1))
-    verify_prelu((1, 3, 2, 2), (2,), 2, (2, 1))
-
-if __name__ == "__main__":
-    test_relu()
-    test_leaky_relu()
-    test_prelu()
diff --git a/topi/tests/python_cpp/test_topi_reorg.py b/topi/tests/python_cpp/test_topi_reorg.py
deleted file mode 100644
index f7767967c699..000000000000
--- a/topi/tests/python_cpp/test_topi_reorg.py
+++ /dev/null
@@ -1,52 +0,0 @@
-"""Test code for reorg"""
-import logging
-import numpy as np
-import tvm
-import topi
-import topi.testing
-from topi.util import get_const_tuple
-
-def verify_reorg(batch, in_size, in_channel, stride):
-    '''Verify reorg operator by comparing outputs from tvm and numpy implementation'''
-    in_height = in_width = in_size
-
-    A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
-    B = topi.cpp.vision.reorg(A, stride)
-
-    a_shape = get_const_tuple(A.shape)
-    dtype = A.dtype
-
-    def get_ref_data_reorg():
-        '''Randomly initialize the data variables and get refernce output for the reorg operation'''
-        a_np = np.random.uniform(size=a_shape).astype(dtype)
-        b_np = topi.testing.reorg_python(a_np, stride)
-        return a_np, b_np
-
-    a_np, b_np = get_ref_data_reorg()
-    def check_device(device):
-        '''Check the device is available and if so, build and run the program'''
-        if not tvm.module.enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.default_schedule(target, [B], False)
-        else:
-            s = topi.cpp.cuda.schedule_injective(target, [B])
-        ctx = tvm.context(device, 0)
-        a = tvm.nd.array(a_np, ctx)
-        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
-        func = tvm.build(s, [A, B], device, name="reorg")
-        func(a, b)
-        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
-    for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm', 'vulkan']:
-        check_device(device)
-
-def test_reorg():
-    verify_reorg(1, 38, 64, 2)
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.DEBUG)
-    test_reorg()
diff --git a/topi/tests/python_cpp/test_topi_softmax.py b/topi/tests/python_cpp/test_topi_softmax.py
deleted file mode 100644
index 09f838ef57ec..000000000000
--- a/topi/tests/python_cpp/test_topi_softmax.py
+++ /dev/null
@@ -1,82 +0,0 @@
-"""Test code for softmax"""
-import os
-import numpy as np
-import tvm
-import topi
-import logging
-import topi.testing
-from topi.util import get_const_tuple
-
-def verify_softmax(m, n):
-    A = tvm.placeholder((m, n), name='A')
-    B = topi.cpp.nn.softmax(A, 1)
-    # confirm lower works
-    s = tvm.create_schedule([B.op])
-    tvm.lower(s, [A, B], simple_mode=True)
-
-    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
-    b_np = topi.testing.softmax_python(a_np)
-
-    def check_device(device):
-        if not tvm.module.enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.default_schedule(target, [B], False)
-        else:
-            s = topi.cpp.cuda.schedule_softmax(target, [B])
-        ctx = tvm.context(device, 0)
-        a = tvm.nd.array(a_np, ctx)
-        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
-        foo = tvm.build(s, [A, B], device, name="softmax")
-        foo(a, b)
-        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
-    for device in ['cuda', 'opencl', 'metal', 'rocm']:
-        check_device(device)
-
-def test_softmax():
-    verify_softmax(32, 10)
-    verify_softmax(3, 4)
-
-
-def verify_log_softmax(m, n):
-    A = tvm.placeholder((m, n), name='A')
-    B = topi.cpp.nn.log_softmax(A)
-    # confirm lower works
-    s = tvm.create_schedule([B.op])
-    tvm.lower(s, [A, B], simple_mode=True)
-    a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
-    b_np = topi.testing.log_softmax_python(a_np)
-
-    def check_device(device):
-        if not tvm.module.enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.default_schedule(target, [B], False)
-        else:
-            s = topi.cpp.cuda.schedule_softmax(target, [B])
-        ctx = tvm.context(device, 0)
-        a = tvm.nd.array(a_np, ctx)
-        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
-        foo = tvm.build(s, [A, B], device, name="log_softmax")
-        foo(a, b)
-        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-
-    for device in ["cuda", "opencl", "metal", "rocm"]:
-        check_device(device)
-
-
-def test_log_softmax():
-    verify_log_softmax(32, 10)
-    verify_log_softmax(3, 4)
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.DEBUG)
-    test_softmax()
-    test_log_softmax()
diff --git a/topi/tests/python_cpp/test_topi_tensor.py b/topi/tests/python_cpp/test_topi_tensor.py
deleted file mode 100644
index 762ee045e38a..000000000000
--- a/topi/tests/python_cpp/test_topi_tensor.py
+++ /dev/null
@@ -1,81 +0,0 @@
-"""Test code for tensor operator"""
-import numpy as np
-import tvm
-import topi
-
-def verify_elemwise_sum(num_args, dtype):
-    shape = (3,5,4)
-
-    tvm_placeholders = []
-    for i in range(num_args):
-        tvm_placeholders.append(
-            tvm.placeholder(shape, name="data"+str(i), dtype=dtype))
-    esum = topi.cpp.elemwise_sum(tvm_placeholders)
-    s = tvm.create_schedule([esum.op])
-
-    def get_ref_data():
-        np_nd = [np.random.uniform(0, 10, size=shape).astype(dtype)
-                 for i in range(num_args)]
-        return np_nd
-    np_nd = get_ref_data()
-
-    def check_device(device):
-        if not tvm.module.enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-
-        ctx = tvm.context(device, 0)
-        out = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
-        f = tvm.build(s, tvm_placeholders + [esum], device, name="elemwise_sum")
-        tvm_nd = [tvm.nd.array(nd, ctx) for nd in np_nd] + [out]
-        f(*tvm_nd)
-        np_out = np.sum(np.array(np_nd), axis=0)
-        tvm.testing.assert_allclose(out.asnumpy(), np_out, rtol=1e-5)
-
-    for device in ["llvm"]:
-        check_device(device)
-
-
-def verify_full(shape, dtype, fill_value):
-    A = tvm.placeholder(shape, dtype=dtype, name="A")
-    B = topi.cpp.full_like(A, fill_value)
-    C = topi.cpp.full(shape, dtype, fill_value)
-    s1 = tvm.create_schedule([B.op])
-    s2 = tvm.create_schedule([C.op])
-
-    def get_ref_data():
-        return np.full(shape, fill_value, dtype)
-    np_nd = get_ref_data()
-
-    def check_device(device):
-        if not tvm.module.enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-        target = topi.cpp.TEST_create_target(device)
-        ctx = tvm.context(device, 0)
-        out = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
-        f = tvm.build(s1, [A, B], device, name="full_like")
-        f(tvm.nd.array(np.zeros(shape, dtype), ctx), out)
-        tvm.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)
-
-        f = tvm.build(s2, [C], device, name="full")
-        f(out)
-        tvm.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)
-
-    for device in ["llvm"]:
-        check_device(device)
-
-
-def test_elemwise_sum():
-    verify_elemwise_sum(1, "float32")
-    verify_elemwise_sum(5, "float32")
-    verify_elemwise_sum(4, "int32")
-
-
-def test_full():
-    verify_full((3,4,5), "float32", 3.14)
-    verify_full((10,), "int32", 7)
-
-if __name__ == "__main__":
-    test_elemwise_sum()
-    test_full()
diff --git a/topi/tests/python_cpp/test_topi_transform.py b/topi/tests/python_cpp/test_topi_transform.py
deleted file mode 100644
index b411375b333e..000000000000
--- a/topi/tests/python_cpp/test_topi_transform.py
+++ /dev/null
@@ -1,384 +0,0 @@
-"""Test code for broadcasting operators."""
-import numpy as np
-import tvm
-import topi
-
-def verify_expand_dims(in_shape, out_shape, axis, num_newaxis):
-    A = tvm.placeholder(shape=in_shape, name="A")
-    B = topi.cpp.expand_dims(A, axis, num_newaxis)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.schedule_injective(target, [B])
-        else:
-            s = topi.cpp.cuda.schedule_injective(target, [B])
-        foo = tvm.build(s, [A, B], device, name="expand_dims")
-        data_npy = np.random.uniform(size=in_shape).astype(A.dtype)
-        out_npy = data_npy.reshape(out_shape)
-        data_nd = tvm.nd.array(data_npy, ctx)
-        out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), ctx)
-        foo(data_nd, out_nd)
-        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
-
-    for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
-        check_device(device)
-
-
-def verify_tranpose(in_shape, axes):
-    A = tvm.placeholder(shape=in_shape, name="A")
-    B = topi.cpp.transpose(A, axes)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.schedule_injective(target, [B])
-        else:
-            s = topi.cpp.cuda.schedule_injective(target, [B])
-        ctx = tvm.context(device, 0)
-        foo = tvm.build(s, [A, B], device, name="tranpose")
-        data_npy = np.arange(np.prod(in_shape)).reshape(in_shape).astype(A.dtype)
-        out_npy = data_npy.transpose(axes)
-        data_nd = tvm.nd.array(data_npy, ctx)
-        out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=B.dtype)
-        foo(data_nd, out_nd)
-        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
-
-    for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
-        check_device(device)
-
-
-def verify_reshape(src_shape, dst_shape):
-    A = tvm.placeholder(shape=src_shape, name="A")
-    B = topi.cpp.reshape(A, dst_shape)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.schedule_injective(target, [B])
-        else:
-            s = topi.cpp.cuda.schedule_injective(target, [B])
-        foo = tvm.build(s, [A, B], device, name="reshape")
-        data_npy = np.random.normal(size=src_shape).astype(A.dtype)
-        out_npy = np.reshape(data_npy, newshape=dst_shape)
-        data_nd = tvm.nd.array(data_npy, ctx)
-        out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype)
-        foo(data_nd, out_nd)
-        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
-
-    for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
-        check_device(device)
-
-
-def verify_squeeze(src_shape, axis):
-    A = tvm.placeholder(shape=src_shape, name="A")
-    B = topi.cpp.squeeze(A, axis)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.schedule_injective(target, [B])
-        else:
-            s = topi.cpp.cuda.schedule_injective(target, [B])
-        foo = tvm.build(s, [A, B], device, name="squeeze")
-        data_npy = np.random.normal(size=src_shape).astype(A.dtype)
-        out_npy = np.squeeze(data_npy, axis=axis)
-        data_nd = tvm.nd.array(data_npy, ctx)
-        out_nd_shape = out_npy.shape
-        out_nd = tvm.nd.empty(out_nd_shape, ctx=ctx, dtype=B.dtype)
-        foo(data_nd, out_nd)
-        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
-
-    for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
-        check_device(device)
-
-def verify_concatenate(shapes, axis):
-    tensor_l = []
-    for i, shape in enumerate(shapes):
-        tensor_l.append(tvm.placeholder(shape, name="A" + str(i)))
-    out_tensor = topi.cpp.concatenate(tensor_l, axis)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.schedule_injective(target, [out_tensor])
-        else:
-            s = topi.cpp.cuda.schedule_injective(target, [out_tensor])
-        foo = tvm.build(s, tensor_l + [out_tensor], device, name="concatenate")
-        data_npys = [np.random.normal(size=shape).astype(tensor_l[0].dtype) for shape in shapes]
-        out_npy = np.concatenate(data_npys, axis=axis)
-        data_nds = [tvm.nd.array(data_npy, ctx) for data_npy in data_npys]
-        out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=out_tensor.dtype)
-        foo(*(data_nds + [out_nd]))
-        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
-
-    for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
-        check_device(device)
-
-
-def verify_split(src_shape, indices_or_sections, axis):
-    A = tvm.placeholder(shape=src_shape, name="A")
-    tensor_l = topi.cpp.split(A, indices_or_sections, axis)
-    tensor_l = list(tensor_l)
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.schedule_injective(target, tensor_l)
-        else:
-            s = topi.cpp.cuda.schedule_injective(target, tensor_l)
-        ctx = tvm.context(device, 0)
-        foo = tvm.build(s, [A] + tensor_l, device, name="split")
-        data_npy = np.random.normal(size=src_shape).astype(A.dtype)
-        out_npys = np.split(data_npy, indices_or_sections, axis=axis)
-        data_nd = tvm.nd.array(data_npy, ctx)
-        out_nds = [tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=tensor_l[0].dtype) for out_npy in out_npys]
-        foo(*([data_nd] + out_nds))
-        for out_nd, out_npy in zip(out_nds, out_npys):
-            tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
-
-    for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
-        check_device(device)
-
-def verify_take(src_shape, indices_src, axis=None):
-    src_dtype = "float32"
-    indices_dtype = "int32"
-    indices_src = np.array(indices_src, dtype=indices_dtype)
-    A = tvm.placeholder(shape=src_shape, dtype=src_dtype, name="A")
-    indices = tvm.placeholder(shape=indices_src.shape, dtype=indices_dtype, name="indices")
-    if axis is None:
-        out_tensor = topi.cpp.take(A, indices)
-    else:
-        out_tensor = topi.cpp.take(A, indices, axis)
-
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        with tvm.target.create(device):
-            s = topi.generic.schedule_injective(out_tensor)
-
-        foo = tvm.build(s, [A] + [indices] + [out_tensor] , device, name="take")
-        shape_size = 1
-        for i in range(len(src_shape)):
-            shape_size = shape_size * src_shape[i]
-        data_npy = np.arange(shape_size, dtype=src_dtype).reshape((src_shape))
-
-        if axis is None:
-            out_npys = np.take(data_npy, indices_src)
-        else:
-            out_npys = np.take(data_npy, indices_src, axis=axis)
-        data_nd = tvm.nd.array(data_npy, ctx)
-        indices_nd = tvm.nd.array(indices_src, ctx)
-        out_nd = tvm.nd.empty(out_npys.shape, ctx=ctx, dtype=src_dtype)
-        foo(data_nd, indices_nd, out_nd)
-        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npys)
-
-    for device in ["llvm", "opencl"]:
-        check_device(device)
-
-def verify_where(condition, x, y):
-    dtype = "float32"
-    if len(condition.shape) == 1:
-        np_out = np.array([xv if c else yv for (c,xv,yv) in zip(condition,x,y)])
-    else:
-        np_out = np.where(condition, x, y)
-    A = tvm.placeholder(shape=condition.shape, dtype=dtype, name="condition")
-    B = tvm.placeholder(shape=x.shape, dtype=dtype, name="x")
-    C = tvm.placeholder(shape=y.shape, dtype=dtype, name="y")
-    out_tensor = topi.cpp.where(A, B, C)
-
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        with tvm.target.create(device):
-            s = topi.generic.schedule_injective(out_tensor)
-
-        foo = tvm.build(s, [A, B, C, out_tensor], device, name="where")
-        tvm_out = tvm.nd.empty(x.shape, ctx=ctx, dtype=dtype)
-        foo(tvm.nd.array(condition, ctx), tvm.nd.array(x, ctx),
-            tvm.nd.array(y, ctx), tvm_out)
-        tvm.testing.assert_allclose(tvm_out.asnumpy(), np_out)
-
-    for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
-        check_device(device)
-
-def verify_concatenate_split(shapes, axis, indices_or_sections):
-    tensor_l_concatenate = []
-    for i, shape in enumerate(shapes):
-        tensor_l_concatenate.append(tvm.placeholder(shape, name="A" + str(i)))
-    out_tensor = topi.cpp.concatenate(tensor_l_concatenate, axis)
-    tensor_l = topi.cpp.split(out_tensor, indices_or_sections, axis)
-    tensor_l = list(tensor_l)
-    def check_device(device):
-        if not tvm.module.enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.schedule_injective(target, tensor_l)
-        else:
-            s = topi.cpp.cuda.schedule_injective(target, tensor_l)
-        ctx = tvm.context(device, 0)
-        foo = tvm.build(s, tensor_l_concatenate + tensor_l, device, name="concatenate_split")
-        data_npys = [np.random.normal(size=shape).astype(tensor_l_concatenate[0].dtype) for shape in shapes]
-        out_npy_conc = np.concatenate(data_npys, axis=axis)
-        out_npys_split = np.split(out_npy_conc, indices_or_sections, axis=axis)
-        data_nds = [tvm.nd.array(data_npy, ctx) for data_npy in data_npys]
-        out_nds = [tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=tensor_l[0].dtype) for out_npy in out_npys_split]
-        foo(*(data_nds + out_nds))
-        for out_nd, out_npy in zip(out_nds, out_npys_split):
-            tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
-
-    for device in ["llvm", "cuda", "opencl", "metal", "rocm"]:
-        check_device(device)
-
-def verify_concatenate_broadcast(shapes, axis, rhs_shape):
-    B = tvm.placeholder(shape=rhs_shape, name="B")
-    tensor_l = []
-    for i, shape in enumerate(shapes):
-        tensor_l.append(tvm.placeholder(shape, name="A" + str(i)))
-    out_tensor = topi.cpp.concatenate(tensor_l, axis)
-    C = out_tensor + B
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not ctx.exist:
-            print("Skip because %s is not enabled" % device)
-            return
-        print("Running on target: %s" % device)
-        target = topi.cpp.TEST_create_target(device)
-        if device == "llvm":
-            s = topi.cpp.generic.schedule_injective(target, [C])
-        else:
-            s = topi.cpp.cuda.schedule_injective(target, [C])
-        ctx = tvm.context(device, 0)
-        foo = tvm.build(s, tensor_l + [B, C], device, name="broadcast_binary_add")
-        data_npys = [np.random.normal(size=shape).astype(tensor_l[0].dtype) for shape in shapes]
-        lhs_npy = np.concatenate(data_npys, axis=axis)
-        rhs_npy = np.random.uniform(size=rhs_shape).astype(B.dtype)
-        out_npy = lhs_npy + rhs_npy
-        data_nds = [tvm.nd.array(data_npy, ctx) for data_npy in data_npys]
-        rhs_nd = tvm.nd.array(rhs_npy, ctx)
-        out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), ctx)
-        for _ in range(1):
-            foo(*(data_nds + [rhs_nd] + [out_nd]))
-        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)
-
-    for device in ["llvm", "cuda", "opencl", "metal", "rocm"]:
-        check_device(device)
-
-
-def test_expand_dims():
-    verify_expand_dims((3, 10), (3, 10, 1, 1), 2, 2)
-    verify_expand_dims((3, 10), (1, 3, 10), -3, 1)
-
-
-def test_tranpose():
-    verify_tranpose((3, 10, 2), (1, 0, 2))
-    verify_tranpose((3, 10, 5), (2, 0, 1))
-    verify_tranpose((3, 10), None)
-    verify_tranpose((3, 10, 5), (2, -3, 1))
-
-
-def test_reshape():
-    verify_reshape((1, 2, 3, 4), (2, 3, 4))
-    verify_reshape((4, 2, 3, 4), (2, 4, 12))
-    verify_reshape((4, 2, 3, 4), (2, 48))
-    verify_reshape((16, ), (2, 2, 2, 2))
-
-
-def test_squeeze():
-    verify_squeeze((1, 2, 3, 4), 0)
-    verify_squeeze((1, 2, 1, 4), None)
-    verify_squeeze((1, 1, 1, 4), (1, 2))
-    verify_squeeze((1, 1, 1, 1), None)
-
-
-def test_concatenate():
-    verify_concatenate([(2,), (2,), (2,)], 0)
-    verify_concatenate([(2, 3, 4), (2, 2, 4), (2, 5, 4)], 1)
-    verify_concatenate([(1, 2, 4), (1, 2, 3), (1, 2, 7), (1, 2, 8), (1, 2, 1)], -1)
-    verify_concatenate([(5, 6, 7, 3),
-                        (16, 6, 7, 3),
-                        (12, 6, 7, 3),
-                        (8, 6, 7, 3),
-                        (2, 6, 7, 3)], 0)
-
-
-def test_split():
-    verify_split((2, 12, 3), 3, 1)
-    verify_split((2, 12, 3), 3, -1)
-    verify_split((2, 12, 3), [2, 4], 1)
-    verify_split((10, 12, 24), [5, 7, 9], -1)
-
-def test_take():
-    verify_take((4,), [1])
-    verify_take((4,), [[0,1,2,3]])
-    verify_take((3,3,3), [[11,25]])
-    verify_take((4,), [[0,1],[2,3]])
-    verify_take((4,), [1], 0)
-    verify_take((2,2), [[[1,0],[0,1]]], 0)
-    verify_take((2,2), [[[1,0],[0,1]]], 1)
-    verify_take((4,3,5,6), [[2,1,0,0]], -2)
-
-def test_where():
-    shape = (10, 3, 7, 13)
-    condition = np.random.uniform(low=-1, high=1, size=shape).astype("float32")
-    x = np.random.uniform(size=shape).astype("float32")
-    y = np.random.uniform(size=shape).astype("float32")
-    verify_where(condition, x, y)
-    condition = np.random.uniform(low=-1, high=1, size=(shape[0],)).astype("float32")
-    x = np.random.uniform(size=shape).astype("float32")
-    y = np.random.uniform(size=shape).astype("float32")
-    verify_where(condition, x, y)
-
-
-def test_regression_1():
-    verify_concatenate_split([(2, 3, 4), (2, 2, 4), (2, 5, 4)], 1, [3, 7])
-    verify_concatenate_split([(3, 4), (2, 4), (3, 4)], 0, [1, 2, 3, 4])
-
-def test_regression_2():
-    verify_concatenate_broadcast([(5, 1, 3), (5, 1, 3)], 1, [2, 1])
-    verify_concatenate_broadcast([(5, 1, 2), (5, 1, 3)], 2, [1, 5])
-
-if __name__ == "__main__":
-    test_concatenate()
-    test_tranpose()
-    test_expand_dims()
-    test_reshape()
-    test_squeeze()
-    test_split()
-    test_take()
-    test_where()
-    test_regression_1()
-    test_regression_2()