Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions topi/python/topi/arm_cpu/conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,15 +167,20 @@ def _decl_winograd(cfg, data, kernel, strides, padding, dilation, out_dtype, til
idxm(b*VP + bb, nW) * m + nu],
name='d')

# transform kernel
if pre_computed:
U = kernel
if autotvm.GLOBAL_SCOPE.in_tuning:
VC = cfg['tile_k'].size[-1]
kvshape = (KH + tile_size - 1, KW + tile_size - 1, idxd(CO, VC), CI, VC)
U = tvm.te.placeholder(kvshape, kernel.dtype, name="U")
else:
r_kh = te.reduce_axis((0, KH), 'r_kh')
r_kw = te.reduce_axis((0, KW), 'r_kw')
U = te.compute((alpha, alpha, idxd(K, VK), C, VK), lambda eps, nu, k, c, kk:
te.sum(kernel[k * VK + kk][c][r_kh][r_kw].astype(out_dtype) *
G[eps][r_kh] * G[nu][r_kw], axis=[r_kh, r_kw]), name='U')
# transform kernel
if pre_computed:
U = kernel
else:
r_kh = te.reduce_axis((0, KH), 'r_kh')
r_kw = te.reduce_axis((0, KW), 'r_kw')
U = te.compute((alpha, alpha, idxd(K, VK), C, VK), lambda eps, nu, k, c, kk:
te.sum(kernel[k * VK + kk][c][r_kh][r_kw].astype(out_dtype) *
G[eps][r_kh] * G[nu][r_kw], axis=[r_kh, r_kw]), name='U')

# transform image
r_eps = te.reduce_axis((0, alpha), 'r_eps')
Expand Down
44 changes: 24 additions & 20 deletions topi/python/topi/arm_cpu/conv2d_spatial_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,15 @@ def conv2d_spatial_pack_nchw(cfg, data, kernel, strides, padding, dilation,
data_pad[n][ci][h*VH*HSTR+vh][w*VW*WSTR+vw],
name='data_vec')

if pre_packed:
kernel_vec = kernel
if autotvm.GLOBAL_SCOPE.in_tuning:
kernel_vec = tvm.te.placeholder(kvshape, kernel.dtype, name="kernel")
else:
kernel_vec = te.compute(kvshape, lambda co, ci, kh, kw, vc:
kernel[co*VC+vc][ci][kh][kw],
name='kernel_vec')
if pre_packed:
kernel_vec = kernel
else:
kernel_vec = te.compute(kvshape, lambda co, ci, kh, kw, vc:
kernel[co*VC+vc][ci][kh][kw],
name='kernel_vec')

ci = te.reduce_axis((0, CI), name='ci')
kh = te.reduce_axis((0, KH), name='kh')
Expand Down Expand Up @@ -187,12 +190,8 @@ def schedule_conv2d_spatial_pack_nchw(cfg, s, data_vec, kernel_vec,
s[data_vec].parallel(h)

if kernel_vec.op.name == 'kernel_vec':
co, _, _, _, _ = s[kernel_vec].op.axis
if autotvm.GLOBAL_SCOPE.in_tuning:
# kernel packing will be pre-computed during compilation, so we skip
# this part to make tuning records correct
s[kernel_vec].pragma(co, 'debug_skip_region')
else:
if not autotvm.GLOBAL_SCOPE.in_tuning:
co, _, _, _, _ = s[kernel_vec].op.axis
s[kernel_vec].parallel(co)
elif kernel_vec.op.name == 'kernel_vec_conv2d_transpose': # for conv2d transpose
co, _, _, _, _ = s[kernel_vec].op.axis
Expand Down Expand Up @@ -267,9 +266,13 @@ def conv2d_spatial_pack_nhwc(cfg, data, kernel, strides, padding, dilation, out_
data_vec = te.compute(dvshape, lambda n, oho, owo, ohi, owi, ic:
data_pad[n][oho*OHI*HSTR+ohi][owo*OWI*WSTR+owi][ic],
name='data_vec')
kernel_vec = te.compute(kvshape, lambda oco, kh, kw, ic, oci: \
kernel[kh][kw][ic][oco*OCI+oci],
name='kernel_vec')

if autotvm.GLOBAL_SCOPE.in_tuning:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change schedule for arm_cpu conv2d as well?

kernel_vec = tvm.te.placeholder(kvshape, kernel.dtype, name="kernel")
else:
kernel_vec = te.compute(kvshape, lambda oco, kh, kw, ic, oci: \
kernel[kh][kw][ic][oco*OCI+oci],
name='kernel_vec')

ic = te.reduce_axis((0, IC), name='ic')
kh = te.reduce_axis((0, KH), name='kh')
Expand Down Expand Up @@ -339,12 +342,13 @@ def schedule_conv2d_spatial_pack_nhwc(cfg, s, op, output):
s[kernel_vec].compute_at(s[conv], compat_axis)
s[data_vec].compute_at(s[conv], compat_axis)

# schedule kernel pack
oco, kh, kw, ic, oci = kernel_vec.op.axis
s[kernel_vec].vectorize(oci)
s[kernel_vec].unroll(ic)
if cfg['compat'].val == 2:
s[kernel_vec].parallel(oco)
if not autotvm.GLOBAL_SCOPE.in_tuning:
# schedule kernel pack
oco, kh, kw, ic, oci = kernel_vec.op.axis
s[kernel_vec].vectorize(oci)
s[kernel_vec].unroll(ic)
if cfg['compat'].val == 2:
s[kernel_vec].parallel(oco)

# schedule data pack
if data_vec.op.name == 'data_vec_undilated':
Expand Down
24 changes: 10 additions & 14 deletions topi/python/topi/bifrost/conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,7 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec):
s[data_vec].unroll(vw)

if isinstance(kernel_vec.op, tvm.te.ComputeOp) and kernel_vec.name == 'kernel_vec':
if autotvm.GLOBAL_SCOPE.in_tuning:
# kernel packing will be pre-computed during compilation, so we skip
# this part to make tuning records correct
s[kernel_vec].pragma(s[kernel_vec].op.axis[0], 'debug_skip_region')
else:
if not autotvm.GLOBAL_SCOPE.in_tuning:
max_threads = tvm.target.Target.current(allow_none=False).max_num_threads
co, ci, kh, kw, vc = s[kernel_vec].op.axis
fused = s[kernel_vec].fuse(co, ci, kh, kw, vc)
Expand Down Expand Up @@ -313,10 +309,15 @@ def upround(x, align):
data_pad[n][c][h][w],
name='d')

if pre_computed:
U = kernel
if autotvm.GLOBAL_SCOPE.in_tuning:
VC = cfg['tile_k'].size[-1]
kvshape = (KH + tile_size - 1, KW + tile_size - 1, tvm.tir.indexdiv(CO, VC), CI, VC)
U = tvm.te.placeholder(kvshape, kernel.dtype, name="U")
else:
U = _decl_winograd_kernel_transform(kernel, tile_size, G)
if pre_computed:
U = kernel
else:
U = _decl_winograd_kernel_transform(kernel, tile_size, G)

# V [alpha * alpha, C, P_round)
# Perform the image transform
Expand Down Expand Up @@ -370,12 +371,7 @@ def _schedule_winograd(cfg, s, op):
s[G].compute_inline()
eps, _, _, _ = s[U].op.axis
y, _, _, _ = s[padded_kernel].op.axis
if autotvm.GLOBAL_SCOPE.in_tuning:
# Kernel transformation will be pre-computed during compilation, so we skip
# this part to make tuning records correct
s[U].pragma(eps, 'debug_skip_region')
s[padded_kernel].pragma(y, 'debug_skip_region')
else:
if not autotvm.GLOBAL_SCOPE.in_tuning:
Copy link
Contributor

@kevinthesun kevinthesun May 27, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also winograd kernel transformation is different from spatial_pack. We need to have a different path inside winograd compute to generate placeholder.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I put the placeholder in _decl_winograd to biforst/con2d.py and mali/con2d.py

# Pad kernel
y, x, ky, kx = s[padded_kernel].op.axis
s[padded_kernel].unroll(ky)
Expand Down
31 changes: 16 additions & 15 deletions topi/python/topi/mali/conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,9 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec):
s[data_vec].unroll(vw)

if isinstance(kernel_vec.op, tvm.te.ComputeOp) and kernel_vec.name == 'kernel_vec':
co, ci, kh, kw, vc = s[kernel_vec].op.axis
if autotvm.GLOBAL_SCOPE.in_tuning:
# Directly use modified data layout placeholder.
kvshape = (co // vc, ci, kh, kw, vc)
kernel_vec = tvm.te.placeholder(kvshape, kernel_vec.dtype, name="kernel")
s[kernel_vec] = kernel_vec
else:
if not autotvm.GLOBAL_SCOPE.in_tuning:
max_threads = tvm.target.Target.current(allow_none=False).max_num_threads
co, ci, kh, kw, vc = s[kernel_vec].op.axis
fused = s[kernel_vec].fuse(co, ci, kh, kw, vc)
fused, vec = s[kernel_vec].split(fused, VC)
bb, tt = s[kernel_vec].split(fused, max_threads)
Expand Down Expand Up @@ -280,15 +275,21 @@ def _decl_winograd(cfg, data, kernel, strides, padding, dilation, out_dtype, til
data_pad[(b*bnb+bb) // (nH*nW)][ci][(b*bnb+bb) // nW % nH * m + eps]
[(b*bnb+bb) % nW * m + nu], tvm.tir.const(0, data_pad.dtype)), name='d')

# transform kernel
if pre_computed:
U = kernel
if autotvm.GLOBAL_SCOPE.in_tuning:
VC = cfg['tile_k'].size[-1]
kvshape = (KH + tile_size - 1, KW + tile_size - 1, tvm.tir.indexdiv(CO, VC), CI, VC)
U = tvm.te.placeholder(kvshape, kernel.dtype, name="U")
else:
r_kh = te.reduce_axis((0, KH), 'r_kh')
r_kw = te.reduce_axis((0, KW), 'r_kw')
U = te.compute((alpha, alpha, CO // bna, CI, bna), lambda eps, nu, co, ci, vco:
te.sum(kernel[co * bna + vco][ci][r_kh][r_kw] * G[eps][r_kh] * G[nu][r_kw],
axis=[r_kh, r_kw]), name='U')
# transform kernel
if pre_computed:
U = kernel
else:
r_kh = te.reduce_axis((0, KH), 'r_kh')
r_kw = te.reduce_axis((0, KW), 'r_kw')
U = te.compute((alpha, alpha, CO // bna, CI, bna), lambda eps, nu, co, ci, vco:
te.sum(kernel[co * bna + vco][ci][r_kh][r_kw] *
G[eps][r_kh] * G[nu][r_kw],
axis=[r_kh, r_kw]), name='U')

# transform image
r_a = te.reduce_axis((0, alpha), 'r_a')
Expand Down