apache · tqchen · Aug 15, 2017 · Aug 15, 2017
diff --git a/python/tvm/expr.py b/python/tvm/expr.py
@@ -50,6 +50,12 @@ def __truediv__(self, other):
     def __rtruediv__(self, other):
         return self.__rdiv__(other)
 
+    def __floordiv__(self, other):
+        return self.__div__(other)
+
+    def __rfloordiv__(self, other):
+        return self.__rdiv__(other)
+
     def __mod__(self, other):
         return _make.Mod(self, other)
 

diff --git a/python/tvm/make.py b/python/tvm/make.py
@@ -52,10 +52,11 @@ def static_cast(dtype, expr):
     """
     target_type = TVMType(dtype)
     src_type = TVMType(expr.dtype)
-    if target_type.type_code == src_type.type_code\
-       and src_type.lanes == 1\
-       and target_type.lanes > 1:
-        return Broadcast(expr, target_type.lanes)
+    if target_type.type_code == src_type.type_code and src_type.bits == target_type.bits:
+        if src_type.lanes == target_type.lanes:
+            return expr
+        elif src_type.lanes == 1 and target_type.lanes > 1:
+            return Broadcast(expr, target_type.lanes)
     return Cast(dtype, expr)
 
 

diff --git a/topi/python/topi/__init__.py b/topi/python/topi/__init__.py
@@ -15,3 +15,4 @@
 from . import nn
 from . import cuda
 from . import testing
+from . import util
diff --git a/topi/python/topi/nn/convolution.py b/topi/python/topi/nn/convolution.py
@@ -1,12 +1,11 @@
-# pylint: disable=invalid-name, line-too-long, unused-variable, too-many-locals
+# pylint: disable=invalid-name, unused-variable, too-many-locals
 """Convolution operators"""
 from __future__ import absolute_import as _abs
 import tvm
-import numpy as np
-from ..util import get_const_tuple
+from ..util import simplify
+from .pad import pad, _spatial2d_pad_option
 
 
-@tvm.tag_scope(tag="conv2d_nchw")
 def conv2d_nchw(Input, Filter, stride, padding):
     """Convolution operator in HWCN layout.
 
@@ -31,45 +30,33 @@ def conv2d_nchw(Input, Filter, stride, padding):
     """
     assert isinstance(stride, int) or len(stride) == 2
     assert isinstance(padding, int) or padding in ['VALID', 'SAME']
-    batch, in_channel, in_height, in_width = get_const_tuple(Input.shape)
-    num_filter, channel, kernel_h, kernel_w = get_const_tuple(Filter.shape)
+    batch, in_channel, in_height, in_width = Input.shape
+    num_filter, channel, kernel_h, kernel_w = Filter.shape
     if isinstance(stride, int):
         stride_h = stride_w = stride
     else:
         stride_h, stride_w = stride
-    # compute the padding size
-    if isinstance(padding, int):
-        pad_h = pad_w = padding * 2
-    elif padding == 'VALID':
-        pad_h = 0
-        pad_w = 0
-    else: # 'SAME'
-        pad_h = kernel_h - 1
-        pad_w = kernel_w - 1
-    pad_top = int(np.ceil(float(pad_h) / 2))
-    pad_left = int(np.ceil(float(pad_w) / 2))
+    pad_top, pad_left, pad_down, pad_right = _spatial2d_pad_option(
+        padding, (kernel_h, kernel_w))
     # compute the output shape
     out_channel = num_filter
-    out_height = (in_height - kernel_h + pad_h) // stride_h + 1
-    out_width = (in_width - kernel_w + pad_w) // stride_w + 1
+    out_height = simplify((in_height - kernel_h + pad_top + pad_down) // stride_h + 1)
+    out_width = simplify((in_width - kernel_w + pad_left + pad_right) // stride_w + 1)
     # compute graph
-    temp = tvm.compute(
-        (batch, in_channel, in_height + pad_h, in_width + pad_w),
-        lambda nn, cc, yy, xx: tvm.select(
-            tvm.all(yy >= pad_top, yy - pad_top < in_height,
-                    xx >= pad_left, xx - pad_left < in_width),
-            Input[nn, cc, yy - pad_top, xx - pad_left], tvm.const(0.)),
-        name='temp')
+    pad_before = [0, 0, pad_top, pad_left]
+    pad_after = [0, 0, pad_down, pad_right]
+    temp = pad(Input, pad_before, pad_after, name="pad_temp")
     rc = tvm.reduce_axis((0, in_channel), name='rc')
     ry = tvm.reduce_axis((0, kernel_h), name='ry')
     rx = tvm.reduce_axis((0, kernel_w), name='rx')
+
     return tvm.compute(
         (batch, out_channel, out_height, out_width),
         lambda nn, ff, yy, xx: tvm.sum(
             temp[nn, rc, yy * stride_h + ry, xx * stride_w + rx] * Filter[ff, rc, ry, rx],
-            axis=[rc, ry, rx]))
+            axis=[rc, ry, rx]), tag="conv2d_nchw")
+
 
-@tvm.tag_scope(tag="conv2d_hwcn")
 def conv2d_hwcn(Input, Filter, stride, padding):
     """Convolution operator in HWCN layout.
 
@@ -93,36 +80,22 @@ def conv2d_hwcn(Input, Filter, stride, padding):
         4-D with shape [out_height, out_width, out_channel, batch]
     """
     assert isinstance(stride, int) or len(stride) == 2
-    assert isinstance(padding, int) or padding in ['VALID', 'SAME']
-    in_height, in_width, in_channel, batch = get_const_tuple(Input.shape)
-    kernel_h, kernel_w, channel, num_filter = get_const_tuple(Filter.shape)
+    in_height, in_width, in_channel, batch = Input.shape
+    kernel_h, kernel_w, channel, num_filter = Filter.shape
     if isinstance(stride, int):
         stride_h = stride_w = stride
     else:
         stride_h, stride_w = stride
-    # compute the padding size
-    if isinstance(padding, int):
-        pad_h = pad_w = padding * 2
-    elif padding == 'VALID':
-        pad_h = 0
-        pad_w = 0
-    else: # 'SAME'
-        pad_h = kernel_h - 1
-        pad_w = kernel_w - 1
-    pad_top = int(np.ceil(float(pad_h) / 2))
-    pad_left = int(np.ceil(float(pad_w) / 2))
+
+    pad_top, pad_left, pad_down, pad_right = _spatial2d_pad_option(
+        padding, (kernel_h, kernel_w))
     # compute the output shape
     out_channel = num_filter
-    out_height = (in_height - kernel_h + pad_h) // stride_h + 1
-    out_width = (in_width - kernel_w + pad_w) // stride_w + 1
-    # compute graph
-    PaddedInput = tvm.compute(
-        (in_height + pad_h, in_width + pad_w, in_channel, batch),
-        lambda yy, xx, cc, nn: tvm.select(
-            tvm.all(yy >= pad_top, yy - pad_top < in_height,
-                    xx >= pad_left, xx - pad_left < in_width),
-            Input[yy - pad_top, xx - pad_left, cc, nn], tvm.const(0.)),
-        name='PaddedInput')
+    out_height = simplify((in_height - kernel_h + pad_top + pad_down) // stride_h + 1)
+    out_width = simplify((in_width - kernel_w + pad_left + pad_right) // stride_w + 1)
+    pad_before = [pad_top, pad_left, 0, 0]
+    pad_after = [pad_down, pad_right, 0, 0]
+    PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput")
     rc = tvm.reduce_axis((0, in_channel), name='rc')
     ry = tvm.reduce_axis((0, kernel_h), name='ry')
     rx = tvm.reduce_axis((0, kernel_w), name='rx')
@@ -131,12 +104,11 @@ def conv2d_hwcn(Input, Filter, stride, padding):
         lambda yy, xx, ff, nn: tvm.sum(
             PaddedInput[yy * stride_h + ry, xx * stride_w + rx, rc, nn] * Filter[ry, rx, rc, ff],
             axis=[ry, rx, rc]),
-        name='Conv2dOutput')
+        name="Conv2dOutput", tag="conv2d_hwcn")
     return Output
 
 
-@tvm.tag_scope(tag="depthwise_conv2d")
-def depthwise_conv2d(Input, Filter, Stride, padding):
+def depthwise_conv2d(Input, Filter, stride, padding):
     """Depthwise convolution operator.
 
     Parameters
@@ -147,8 +119,8 @@ def depthwise_conv2d(Input, Filter, Stride, padding):
     Filter : tvm.Tensor
         4-D with shape [in_channel, channel_multiplier, filter_height, filter_width]
 
-    Stride : tvm.Tensor
-        1-D of size 2
+    stride : tuple of two ints
+        The spatial stride along height and width
 
     padding : str
         'VALID' or 'SAME'
@@ -158,49 +130,28 @@ def depthwise_conv2d(Input, Filter, Stride, padding):
     Output : tvm.Tensor
         4-D with shape [batch, out_channel, out_height, out_width]
     """
-    in_shape = get_const_tuple(Input.shape)
-    batch = in_shape[0]
-    in_channel = in_shape[1]
-    in_height = in_shape[2]
-    in_width = in_shape[3]
-    filter_shape = get_const_tuple(Filter.shape)
-    filter_channel = filter_shape[0]
-    channel_multiplier = filter_shape[1]
-    filter_height = filter_shape[2]
-    filter_width = filter_shape[3]
-    stride_h = Stride.asnumpy()[0]
-    stride_w = Stride.asnumpy()[1]
-    # calculate output shape
-    if padding == 'VALID':
-        out_channel = in_channel * channel_multiplier
-        out_height = (in_height - filter_height) // stride_h + 1
-        out_width = (in_width - filter_width) // stride_w + 1
-        pad_along_height = 0
-        pad_along_width = 0
-    if padding == 'SAME':
-        out_channel = in_channel * channel_multiplier
-        out_height = np.int(np.ceil(float(in_height) / float(stride_h)))
-        out_width = np.int(np.ceil(float(in_width) / float(stride_w)))
-        pad_along_height = np.int(np.max((out_height - 1) * stride_h + filter_height - in_height, 0))
-        pad_along_width = np.int(np.max((out_width - 1) * stride_w + filter_width - in_width, 0))
-    height_after_pad = in_height + pad_along_height
-    width_after_pad = in_width + pad_along_width
-    pad_top = np.int(np.ceil(float(pad_along_height) / 2))
-    pad_left = np.int(np.ceil(float(pad_along_width) / 2))
+    batch, in_channel, in_height, in_width = Input.shape
+    filter_channel, channel_multiplier, filter_height, filter_width = Filter.shape
+    stride_h, stride_w = stride
+
+    pad_top, pad_left, pad_down, pad_right = _spatial2d_pad_option(
+        padding, (filter_height, filter_width))
+    out_channel = simplify(in_channel * channel_multiplier)
+    out_height = simplify((in_height - filter_height + pad_top + pad_down) // stride_h + 1)
+    out_width = simplify((in_width - filter_width + pad_left + pad_right) // stride_w + 1)
+
     # padding stage
-    PaddedInput = tvm.compute(
-        (batch, in_channel, height_after_pad, width_after_pad),
-        lambda b, c, i, j: tvm.select(
-            tvm.all(i >= pad_top, i - pad_top < in_height, j >= pad_left, j - pad_left < in_width),
-            Input[b, c, i - pad_top, j - pad_left], tvm.const(0.0)),
-        name="PaddedInput")
+    pad_before = [0, 0, pad_top, pad_left]
+    pad_after = [0, 0, pad_down, pad_right]
+    PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput")
     # depthconv stage
     di = tvm.reduce_axis((0, filter_height), name='di')
     dj = tvm.reduce_axis((0, filter_width), name='dj')
     Output = tvm.compute(
         (batch, out_channel, out_height, out_width),
         lambda b, c, i, j: tvm.sum(
-            PaddedInput[b, c/channel_multiplier, i*stride_h + di, j*stride_w + dj] * Filter[c/channel_multiplier, c%channel_multiplier, di, dj],
+            (PaddedInput[b, c/channel_multiplier, i*stride_h + di, j*stride_w + dj] *
+             Filter[c/channel_multiplier, c%channel_multiplier, di, dj]),
             axis=[di, dj]),
-        name='DepthwiseConv2d')
+        name='DepthwiseConv2d', tag="depthwise_conv2d")
     return Output
diff --git a/topi/python/topi/nn/dilate.py b/topi/python/topi/nn/dilate.py
@@ -6,35 +6,39 @@
 
 
 @tvm.tag_scope(tag="dilation")
-def dilate(Input, strides):
-    """Dilate Input with zeros.
+def dilate(data, strides, name="DilatedInput"):
+    """Dilate data with zeros.
 
     Parameters
     ----------
-    Input : tvm.Tensor
+    data : tvm.Tensor
         n-D, can be any layout.
 
     strides : list / tuple of n ints
         Dilation stride on each dimension, 1 means no dilation.
 
+    name : str, optional
+        The name prefix operators generated
+
     Returns
     -------
     Output : tvm.Tensor
-        n-D, the same layout as Input.
+        n-D, the same layout as data.
     """
-    n = len(Input.shape)
-    assert len(strides) == n, \
-        "Input dimension and strides size dismatch : %d vs %d" %(n, len(strides))
-    output_size = ()
-    for i in range(n):
-        output_size += (tvm.ir_pass.Simplify((Input.shape[i]-1)*strides[i]+1),)
-
-    def _dilate(data, *indices):
+    n = len(data.shape)
+    if len(strides) != n:
+        raise ValueError("data dimension and strides size dismatch : %d vs %d" % (
+            n, len(strides)))
+
+    out_shape = tuple(
+        tvm.ir_pass.Simplify((data.shape[i] - 1) * strides[i] + 1) for i in range(n))
+
+    def _dilate(*indices):
         not_zero = []
         index_tuple = []
         for i in range(n):
             if not util.equal_const_int(strides[i], 1):
-                index_tuple.append(indices[i]/strides[i])
+                index_tuple.append(indices[i] / strides[i])
                 not_zero.append((indices[i] % strides[i]).equal(0))
             else:
                 index_tuple.append(indices[i])
@@ -43,9 +47,4 @@ def _dilate(data, *indices):
             return tvm.select(not_zero, data(*index_tuple), tvm.const(0.0, data.dtype))
         return data(*index_tuple)
 
-    Output = tvm.compute(
-        output_size,
-        lambda *indices: _dilate(Input, *indices),
-        name='DilatedInput')
-
-    return Output
+    return tvm.compute(out_shape, _dilate, name=name)