Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions python/tvm/relay/op/strategy/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,25 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
wrap_topi_schedule(topi.cuda.schedule_conv2d_nhwc_tensorcore),
name="conv2d_nhwc_tensorcore.cuda",
plevel=20)
elif layout == "HWNC":
assert kernel_layout in ["HWOI", "HWOI16o16i", "HWOI8o32i", "HWOI32o16i"]
_, _, N, in_channels = get_const_tuple(data.shape)
pre_computed = len(kernel.shape) == 6
if pre_computed:
_, _, oc_chunk, _, oc_block_factor, _ = get_const_tuple(kernel.shape)
out_channels = oc_chunk * oc_block_factor
else:
_, _, out_channels, _ = get_const_tuple(kernel.shape)
if topi.cuda.is_shape_tensorcore_direct_qualified(
batch=N, in_channels=in_channels, num_filter=out_channels, in_dtype=data.dtype):
strategy.add_implementation(
wrap_compute_conv2d(topi.cuda.conv2d_hwnc_tensorcore),
wrap_topi_schedule(topi.cuda.schedule_conv2d_hwnc_tensorcore),
name="conv2d_hwnc_tensorcore_direct.cuda",
plevel=20)
else:
raise RuntimeError("Unsupported shape for conv2d HWNC.\
Need to satisfy tensor core schedule.")
elif layout == "NCHW4c" and data.dtype in ["int8", "uint8"]:
assert kernel_layout == "OIHW4o4i"
strategy.add_implementation(
Expand Down
1 change: 1 addition & 0 deletions python/tvm/topi/cuda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,6 @@
from .conv2d_nhwc_tensorcore import *
from .conv3d_ndhwc_tensorcore import *
from .dense_tensorcore import *
from .conv2d_hwnc_tensorcore import *
from .correlation import *
from .sparse import *
30 changes: 30 additions & 0 deletions python/tvm/topi/cuda/conv2d_alter_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,36 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type):
dispatch_ctx.update(target, new_workload, cfg)
return relay.nn.conv2d(*inputs, **new_attrs)

if topi_tmpl == "conv2d_HWNCnc_tensorcore.cuda":
assert data_layout == "HWNC" and kernel_layout == "HWOI"
assert float(tvm.gpu(0).compute_version) >= 7.5
H, W, N, CI = get_const_tuple(data.shape)
KH, KW, CO, _ = get_const_tuple(kernel.shape)

if kernel.dtype in ['int4', 'uint4'] and (CI % 32 != 0 or CO % 8 != 0) or \
kernel.dtype in ['int8', 'uint8'] and (CI % 16 != 0 or CO % 32 != 0):
return relay.nn.conv2d(*inputs, **new_attrs)

new_attrs["channels"] = CO
if kernel.dtype in ['int4', 'uint4']:
new_attrs['kernel_layout'] = 'HWOI8o32i'
ic_block_factor = 32
oc_block_factor = 8
else:
new_attrs['kernel_layout'] = 'HWOI32o16i'
ic_block_factor = 16
oc_block_factor = 32

new_kernel = te.placeholder((KH, KW, CO // oc_block_factor, CI // ic_block_factor,
oc_block_factor, ic_block_factor), dtype=kernel.dtype)

new_workload = autotvm.task.args_to_workload(
[data, new_kernel, strides, padding, dilation, out_dtype],
"conv2d_HWNCnc_tensorcore.cuda")

dispatch_ctx.update(target, new_workload, cfg)
return relay.nn.conv2d(*inputs, **new_attrs)

return None

@conv2d_legalize.register("cuda")
Expand Down
Loading