Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions python/tvm/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ def __truediv__(self, other):
def __rtruediv__(self, other):
return self.__rdiv__(other)

def __floordiv__(self, other):
return self.__div__(other)

def __rfloordiv__(self, other):
return self.__rdiv__(other)

def __mod__(self, other):
return _make.Mod(self, other)

Expand Down
9 changes: 5 additions & 4 deletions python/tvm/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,11 @@ def static_cast(dtype, expr):
"""
target_type = TVMType(dtype)
src_type = TVMType(expr.dtype)
if target_type.type_code == src_type.type_code\
and src_type.lanes == 1\
and target_type.lanes > 1:
return Broadcast(expr, target_type.lanes)
if target_type.type_code == src_type.type_code and src_type.bits == target_type.bits:
if src_type.lanes == target_type.lanes:
return expr
elif src_type.lanes == 1 and target_type.lanes > 1:
return Broadcast(expr, target_type.lanes)
return Cast(dtype, expr)


Expand Down
1 change: 1 addition & 0 deletions topi/python/topi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
from . import nn
from . import cuda
from . import testing
from . import util
139 changes: 45 additions & 94 deletions topi/python/topi/nn/convolution.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# pylint: disable=invalid-name, line-too-long, unused-variable, too-many-locals
# pylint: disable=invalid-name, unused-variable, too-many-locals
"""Convolution operators"""
from __future__ import absolute_import as _abs
import tvm
import numpy as np
from ..util import get_const_tuple
from ..util import simplify
from .pad import pad, _spatial2d_pad_option


@tvm.tag_scope(tag="conv2d_nchw")
def conv2d_nchw(Input, Filter, stride, padding):
"""Convolution operator in HWCN layout.

Expand All @@ -31,45 +30,33 @@ def conv2d_nchw(Input, Filter, stride, padding):
"""
assert isinstance(stride, int) or len(stride) == 2
assert isinstance(padding, int) or padding in ['VALID', 'SAME']
batch, in_channel, in_height, in_width = get_const_tuple(Input.shape)
num_filter, channel, kernel_h, kernel_w = get_const_tuple(Filter.shape)
batch, in_channel, in_height, in_width = Input.shape
num_filter, channel, kernel_h, kernel_w = Filter.shape
if isinstance(stride, int):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
# compute the padding size
if isinstance(padding, int):
pad_h = pad_w = padding * 2
elif padding == 'VALID':
pad_h = 0
pad_w = 0
else: # 'SAME'
pad_h = kernel_h - 1
pad_w = kernel_w - 1
pad_top = int(np.ceil(float(pad_h) / 2))
pad_left = int(np.ceil(float(pad_w) / 2))
pad_top, pad_left, pad_down, pad_right = _spatial2d_pad_option(
padding, (kernel_h, kernel_w))
# compute the output shape
out_channel = num_filter
out_height = (in_height - kernel_h + pad_h) // stride_h + 1
out_width = (in_width - kernel_w + pad_w) // stride_w + 1
out_height = simplify((in_height - kernel_h + pad_top + pad_down) // stride_h + 1)
out_width = simplify((in_width - kernel_w + pad_left + pad_right) // stride_w + 1)
# compute graph
temp = tvm.compute(
(batch, in_channel, in_height + pad_h, in_width + pad_w),
lambda nn, cc, yy, xx: tvm.select(
tvm.all(yy >= pad_top, yy - pad_top < in_height,
xx >= pad_left, xx - pad_left < in_width),
Input[nn, cc, yy - pad_top, xx - pad_left], tvm.const(0.)),
name='temp')
pad_before = [0, 0, pad_top, pad_left]
pad_after = [0, 0, pad_down, pad_right]
temp = pad(Input, pad_before, pad_after, name="pad_temp")
rc = tvm.reduce_axis((0, in_channel), name='rc')
ry = tvm.reduce_axis((0, kernel_h), name='ry')
rx = tvm.reduce_axis((0, kernel_w), name='rx')

return tvm.compute(
(batch, out_channel, out_height, out_width),
lambda nn, ff, yy, xx: tvm.sum(
temp[nn, rc, yy * stride_h + ry, xx * stride_w + rx] * Filter[ff, rc, ry, rx],
axis=[rc, ry, rx]))
axis=[rc, ry, rx]), tag="conv2d_nchw")


@tvm.tag_scope(tag="conv2d_hwcn")
def conv2d_hwcn(Input, Filter, stride, padding):
"""Convolution operator in HWCN layout.

Expand All @@ -93,36 +80,22 @@ def conv2d_hwcn(Input, Filter, stride, padding):
4-D with shape [out_height, out_width, out_channel, batch]
"""
assert isinstance(stride, int) or len(stride) == 2
assert isinstance(padding, int) or padding in ['VALID', 'SAME']
in_height, in_width, in_channel, batch = get_const_tuple(Input.shape)
kernel_h, kernel_w, channel, num_filter = get_const_tuple(Filter.shape)
in_height, in_width, in_channel, batch = Input.shape
kernel_h, kernel_w, channel, num_filter = Filter.shape
if isinstance(stride, int):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
# compute the padding size
if isinstance(padding, int):
pad_h = pad_w = padding * 2
elif padding == 'VALID':
pad_h = 0
pad_w = 0
else: # 'SAME'
pad_h = kernel_h - 1
pad_w = kernel_w - 1
pad_top = int(np.ceil(float(pad_h) / 2))
pad_left = int(np.ceil(float(pad_w) / 2))

pad_top, pad_left, pad_down, pad_right = _spatial2d_pad_option(
padding, (kernel_h, kernel_w))
# compute the output shape
out_channel = num_filter
out_height = (in_height - kernel_h + pad_h) // stride_h + 1
out_width = (in_width - kernel_w + pad_w) // stride_w + 1
# compute graph
PaddedInput = tvm.compute(
(in_height + pad_h, in_width + pad_w, in_channel, batch),
lambda yy, xx, cc, nn: tvm.select(
tvm.all(yy >= pad_top, yy - pad_top < in_height,
xx >= pad_left, xx - pad_left < in_width),
Input[yy - pad_top, xx - pad_left, cc, nn], tvm.const(0.)),
name='PaddedInput')
out_height = simplify((in_height - kernel_h + pad_top + pad_down) // stride_h + 1)
out_width = simplify((in_width - kernel_w + pad_left + pad_right) // stride_w + 1)
pad_before = [pad_top, pad_left, 0, 0]
pad_after = [pad_down, pad_right, 0, 0]
PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput")
rc = tvm.reduce_axis((0, in_channel), name='rc')
ry = tvm.reduce_axis((0, kernel_h), name='ry')
rx = tvm.reduce_axis((0, kernel_w), name='rx')
Expand All @@ -131,12 +104,11 @@ def conv2d_hwcn(Input, Filter, stride, padding):
lambda yy, xx, ff, nn: tvm.sum(
PaddedInput[yy * stride_h + ry, xx * stride_w + rx, rc, nn] * Filter[ry, rx, rc, ff],
axis=[ry, rx, rc]),
name='Conv2dOutput')
name="Conv2dOutput", tag="conv2d_hwcn")
return Output


@tvm.tag_scope(tag="depthwise_conv2d")
def depthwise_conv2d(Input, Filter, Stride, padding):
def depthwise_conv2d(Input, Filter, stride, padding):
"""Depthwise convolution operator.

Parameters
Expand All @@ -147,8 +119,8 @@ def depthwise_conv2d(Input, Filter, Stride, padding):
Filter : tvm.Tensor
4-D with shape [in_channel, channel_multiplier, filter_height, filter_width]

Stride : tvm.Tensor
1-D of size 2
stride : tuple of two ints
The spatial stride along height and width

padding : str
'VALID' or 'SAME'
Expand All @@ -158,49 +130,28 @@ def depthwise_conv2d(Input, Filter, Stride, padding):
Output : tvm.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
in_shape = get_const_tuple(Input.shape)
batch = in_shape[0]
in_channel = in_shape[1]
in_height = in_shape[2]
in_width = in_shape[3]
filter_shape = get_const_tuple(Filter.shape)
filter_channel = filter_shape[0]
channel_multiplier = filter_shape[1]
filter_height = filter_shape[2]
filter_width = filter_shape[3]
stride_h = Stride.asnumpy()[0]
stride_w = Stride.asnumpy()[1]
# calculate output shape
if padding == 'VALID':
out_channel = in_channel * channel_multiplier
out_height = (in_height - filter_height) // stride_h + 1
out_width = (in_width - filter_width) // stride_w + 1
pad_along_height = 0
pad_along_width = 0
if padding == 'SAME':
out_channel = in_channel * channel_multiplier
out_height = np.int(np.ceil(float(in_height) / float(stride_h)))
out_width = np.int(np.ceil(float(in_width) / float(stride_w)))
pad_along_height = np.int(np.max((out_height - 1) * stride_h + filter_height - in_height, 0))
pad_along_width = np.int(np.max((out_width - 1) * stride_w + filter_width - in_width, 0))
height_after_pad = in_height + pad_along_height
width_after_pad = in_width + pad_along_width
pad_top = np.int(np.ceil(float(pad_along_height) / 2))
pad_left = np.int(np.ceil(float(pad_along_width) / 2))
batch, in_channel, in_height, in_width = Input.shape
filter_channel, channel_multiplier, filter_height, filter_width = Filter.shape
stride_h, stride_w = stride

pad_top, pad_left, pad_down, pad_right = _spatial2d_pad_option(
padding, (filter_height, filter_width))
out_channel = simplify(in_channel * channel_multiplier)
out_height = simplify((in_height - filter_height + pad_top + pad_down) // stride_h + 1)
out_width = simplify((in_width - filter_width + pad_left + pad_right) // stride_w + 1)

# padding stage
PaddedInput = tvm.compute(
(batch, in_channel, height_after_pad, width_after_pad),
lambda b, c, i, j: tvm.select(
tvm.all(i >= pad_top, i - pad_top < in_height, j >= pad_left, j - pad_left < in_width),
Input[b, c, i - pad_top, j - pad_left], tvm.const(0.0)),
name="PaddedInput")
pad_before = [0, 0, pad_top, pad_left]
pad_after = [0, 0, pad_down, pad_right]
PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput")
# depthconv stage
di = tvm.reduce_axis((0, filter_height), name='di')
dj = tvm.reduce_axis((0, filter_width), name='dj')
Output = tvm.compute(
(batch, out_channel, out_height, out_width),
lambda b, c, i, j: tvm.sum(
PaddedInput[b, c/channel_multiplier, i*stride_h + di, j*stride_w + dj] * Filter[c/channel_multiplier, c%channel_multiplier, di, dj],
(PaddedInput[b, c/channel_multiplier, i*stride_h + di, j*stride_w + dj] *
Filter[c/channel_multiplier, c%channel_multiplier, di, dj]),
axis=[di, dj]),
name='DepthwiseConv2d')
name='DepthwiseConv2d', tag="depthwise_conv2d")
return Output
37 changes: 18 additions & 19 deletions topi/python/topi/nn/dilate.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,35 +6,39 @@


@tvm.tag_scope(tag="dilation")
def dilate(Input, strides):
"""Dilate Input with zeros.
def dilate(data, strides, name="DilatedInput"):
"""Dilate data with zeros.

Parameters
----------
Input : tvm.Tensor
data : tvm.Tensor
n-D, can be any layout.

strides : list / tuple of n ints
Dilation stride on each dimension, 1 means no dilation.

name : str, optional
The name prefix operators generated

Returns
-------
Output : tvm.Tensor
n-D, the same layout as Input.
n-D, the same layout as data.
"""
n = len(Input.shape)
assert len(strides) == n, \
"Input dimension and strides size dismatch : %d vs %d" %(n, len(strides))
output_size = ()
for i in range(n):
output_size += (tvm.ir_pass.Simplify((Input.shape[i]-1)*strides[i]+1),)

def _dilate(data, *indices):
n = len(data.shape)
if len(strides) != n:
raise ValueError("data dimension and strides size dismatch : %d vs %d" % (
n, len(strides)))

out_shape = tuple(
tvm.ir_pass.Simplify((data.shape[i] - 1) * strides[i] + 1) for i in range(n))

def _dilate(*indices):
not_zero = []
index_tuple = []
for i in range(n):
if not util.equal_const_int(strides[i], 1):
index_tuple.append(indices[i]/strides[i])
index_tuple.append(indices[i] / strides[i])
not_zero.append((indices[i] % strides[i]).equal(0))
else:
index_tuple.append(indices[i])
Expand All @@ -43,9 +47,4 @@ def _dilate(data, *indices):
return tvm.select(not_zero, data(*index_tuple), tvm.const(0.0, data.dtype))
return data(*index_tuple)

Output = tvm.compute(
output_size,
lambda *indices: _dilate(Input, *indices),
name='DilatedInput')

return Output
return tvm.compute(out_shape, _dilate, name=name)
Loading