diff --git a/python/tvm/autotvm/task/task.py b/python/tvm/autotvm/task/task.py index 42315320a569..a94a8d3262b2 100644 --- a/python/tvm/autotvm/task/task.py +++ b/python/tvm/autotvm/task/task.py @@ -56,7 +56,7 @@ def _encode(x): return ("TENSOR", get_const_tuple(x.shape), x.dtype) if isinstance(x, (tuple, list, container.Array)): return tuple([_encode(a) for a in x]) - if isinstance(x, (str, int, float, np.int, np.float, expr.Var)): + if isinstance(x, (str, int, float, np.int, np.float, expr.Var, expr.Any)): return x if isinstance(x, (expr.StringImm, expr.IntImm, expr.FloatImm)): return x.value diff --git a/python/tvm/relay/op/_tensor.py b/python/tvm/relay/op/_tensor.py index 6b7f139dc7c9..907c512c4a7c 100644 --- a/python/tvm/relay/op/_tensor.py +++ b/python/tvm/relay/op/_tensor.py @@ -19,6 +19,7 @@ from tvm.te.hybrid import script from tvm import topi +from tvm.runtime import convert from .op import register_compute, register_shape_func from .op import register_broadcast_schedule, register_injective_schedule @@ -156,11 +157,22 @@ def _full_shape_func(shape): return out +@script +def _convert_shape(shape): + out = output_tensor((len(shape),), "int64") + for i in const_range(len(shape)): + out[i] = int64(shape[i]) + return out + + def full_shape_func(attrs, inputs, out_ndims): """ Shape func for full. """ - return [_full_shape_func(inputs[1])] + if len(inputs) > 1: + return [_full_shape_func(inputs[1])] + + return [_convert_shape(convert(attrs.shape))] def no_data_full_shape_func(attrs, inputs, out_ndims): @@ -216,9 +228,9 @@ def elemwise_shape_func(attrs, inputs, _): register_shape_func("cast", False, elemwise_shape_func) -register_shape_func("zeros", False, full_shape_func) +register_shape_func("zeros", False, no_data_full_shape_func) register_shape_func("zeros_like", False, elemwise_shape_func) -register_shape_func("ones", False, full_shape_func) +register_shape_func("ones", False, no_data_full_shape_func) register_shape_func("ones_like", False, elemwise_shape_func) register_shape_func("full", False, full_shape_func) register_shape_func("full_like", False, elemwise_shape_func) @@ -257,3 +269,6 @@ def elemwise_shape_func(attrs, inputs, _): register_shape_func("floor", False, elemwise_shape_func) register_shape_func("log", False, elemwise_shape_func) register_shape_func("device_copy", False, elemwise_shape_func) +register_shape_func("clip", False, elemwise_shape_func) +register_shape_func("log2", False, elemwise_shape_func) +register_shape_func("sigmoid", False, elemwise_shape_func) diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index dca3e9acc399..d0a0bd24d604 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -745,3 +745,53 @@ def adv_index_shape_func(attrs, inputs, _): Only allow single index tensor. """ return [_adv_index_shape_func(inputs)] + + +@script +def _repeat_shape_func(data_shape, repeats, axis): + out = output_tensor((data_shape.shape[0],), "int64") + + for i in const_range(data_shape.shape[0]): + if i == axis: + out[i] = int64(data_shape[i] * repeats) + else: + out[i] = data_shape[i] + + return out + +@_reg.register_shape_func("repeat", False) +def repeat_shape_func(attrs, inputs, _): + """ + Shape func for repeat. + """ + axis = get_const_int(attrs.axis) + if axis < 0: + axis = inputs[0].shape[0] + axis + return [_repeat_shape_func(inputs[0], attrs.repeats, convert(axis))] + + +@_reg.register_shape_func("broadcast_to_like", False) +def broadcast_to_like_shape_func(attrs, inputs, _): + return [topi.math.identity(inputs[1])] + + +@script +def _stack_shape_func(data_shape, axis, num_inputs): + out = output_tensor((data_shape.shape[0] + 1,), "int64") + + for i in const_range(data_shape.shape[0] + 1): + if i == axis: + out[i] = int64(num_inputs) + elif i < axis: + out[i] = data_shape[i] + else: + out[i] = data_shape[i - 1] + + return out + +@_reg.register_shape_func("stack", False) +def stack_shape_func(attrs, inputs, _): + axis = get_const_int(attrs.axis) + if axis < 0: + axis += inputs[0].shape[0] + 1 + return [_stack_shape_func(inputs[0], convert(axis), convert(len(inputs)))] diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py index df29d8841c36..00cc94c07af3 100644 --- a/python/tvm/relay/op/nn/_nn.py +++ b/python/tvm/relay/op/nn/_nn.py @@ -773,6 +773,49 @@ def conv2d_NCHWc_shape_func(attrs, inputs, _): ] +@script +def _conv2d_transpose_nchw_shape_func(dshape, kshape, strides, + padding, dilation, output_padding): + out = output_tensor((dshape.shape[0],), "int64") + kheight = kshape[2] + kwidth = kshape[3] + dilated_kh = (kheight - 1) * dilation[0] + 1 + dilated_kw = (kwidth - 1) * dilation[1] + 1 + + out_height = strides[0] * (dshape[2] - 1) + dilated_kh - \ + 2 * padding[0] + output_padding[0] + out_width = strides[1] * (dshape[3] - 1) + dilated_kw - \ + 2 * padding[1] + output_padding[1] + + out[0] = dshape[0] + out[1] = kshape[1] + out[2] = out_height + out[3] = out_width + return out + + +@reg.register_shape_func("nn.conv2d_transpose", False) +def conv2d_transpose_nchw_shape_func(attrs, inputs, _): + """ + Shape function for conv2d_transpose op. + """ + strides = get_const_tuple(attrs.strides) + padding = get_const_tuple(attrs.padding) + dilation = get_const_tuple(attrs.dilation) + output_padding = get_const_tuple(attrs.output_padding) + + return [ + _conv2d_transpose_nchw_shape_func( + inputs[0], + inputs[1], + convert(strides), + convert(padding), + convert(dilation), + convert(output_padding) + ) + ] + + @script def _pool2d_shape_func(data_shape, pool_size, strides, padding, height_axis, width_axis): out = output_tensor((data_shape.shape[0],), "int64") diff --git a/python/tvm/relay/op/vision/_vision.py b/python/tvm/relay/op/vision/_vision.py index 85bd8a2cda34..28e21e9b7e9f 100644 --- a/python/tvm/relay/op/vision/_vision.py +++ b/python/tvm/relay/op/vision/_vision.py @@ -20,6 +20,8 @@ from tvm import topi from tvm.te.hybrid import script +from tvm.runtime import convert + from .. import op as reg from .. import strategy from ..op import OpPattern @@ -81,3 +83,18 @@ def nms_shape_func(attrs, inputs, _): if attrs.return_indices: return _nms_shape_func(inputs[0]) return [topi.math.identity(inputs[0])] + + +@script +def _roi_align_shape_func(data_shape, rois_shape, pooled_size): + out = output_tensor((4,), "int64") + out[0] = rois_shape[0] + out[1] = data_shape[1] + out[2] = int64(pooled_size[0]) + out[3] = int64(pooled_size[1]) + return out + +@reg.register_shape_func("vision.roi_align", False) +def roi_align_shape_func(attrs, inputs, _): + return [_roi_align_shape_func(inputs[0], inputs[1], + convert(attrs.pooled_size))] diff --git a/python/tvm/topi/scatter.py b/python/tvm/topi/scatter.py index 347105796273..f1c307a43a44 100644 --- a/python/tvm/topi/scatter.py +++ b/python/tvm/topi/scatter.py @@ -32,8 +32,8 @@ def _scatter_1d(data, indices, updates): @hybrid.script def _scatter_2d(data, indices, updates, axis): out = output_tensor(data.shape, data.dtype) - for i in const_range(data.shape[0]): - for j in const_range(data.shape[1]): + for i in range(data.shape[0]): + for j in range(data.shape[1]): out[i, j] = data[i, j] if axis == 0: for i in range(indices.shape[0]): @@ -54,14 +54,14 @@ def _scatter_2d(data, indices, updates, axis): @hybrid.script def _scatter_3d(data, indices, updates, axis): out = output_tensor(data.shape, data.dtype) - for i in const_range(data.shape[0]): - for j in const_range(data.shape[1]): - for k in const_range(data.shape[2]): + for i in range(data.shape[0]): + for j in range(data.shape[1]): + for k in range(data.shape[2]): out[i, j, k] = data[i, j, k] if axis == 0: for i in range(indices.shape[0]): for j in range(indices.shape[1]): - for k in const_range(indices.shape[2]): + for k in range(indices.shape[2]): out[ indices[i, j, k] if indices[i, j, k] >= 0 @@ -72,7 +72,7 @@ def _scatter_3d(data, indices, updates, axis): elif axis == 1: for i in range(indices.shape[0]): for j in range(indices.shape[1]): - for k in const_range(indices.shape[2]): + for k in range(indices.shape[2]): out[ i, indices[i, j, k] @@ -83,7 +83,7 @@ def _scatter_3d(data, indices, updates, axis): else: for i in range(indices.shape[0]): for j in range(indices.shape[1]): - for k in const_range(indices.shape[2]): + for k in range(indices.shape[2]): out[ i, j, @@ -98,17 +98,17 @@ def _scatter_3d(data, indices, updates, axis): @hybrid.script def _scatter_4d(data, indices, updates, axis): out = output_tensor(data.shape, data.dtype) - for i in const_range(data.shape[0]): - for j in const_range(data.shape[1]): - for k in const_range(data.shape[2]): - for l in const_range(data.shape[3]): + for i in range(data.shape[0]): + for j in range(data.shape[1]): + for k in range(data.shape[2]): + for l in range(data.shape[3]): out[i, j, k, l] = data[i, j, k, l] if axis == 0: for i in range(indices.shape[0]): for j in range(indices.shape[1]): - for k in const_range(indices.shape[2]): - for l in const_range(indices.shape[3]): + for k in range(indices.shape[2]): + for l in range(indices.shape[3]): out[ indices[i, j, k, l] if indices[i, j, k, l] >= 0 @@ -120,8 +120,8 @@ def _scatter_4d(data, indices, updates, axis): elif axis == 1: for i in range(indices.shape[0]): for j in range(indices.shape[1]): - for k in const_range(indices.shape[2]): - for l in const_range(indices.shape[3]): + for k in range(indices.shape[2]): + for l in range(indices.shape[3]): out[ i, indices[i, j, k, l] @@ -133,8 +133,8 @@ def _scatter_4d(data, indices, updates, axis): elif axis == 2: for i in range(indices.shape[0]): for j in range(indices.shape[1]): - for k in const_range(indices.shape[2]): - for l in const_range(indices.shape[3]): + for k in range(indices.shape[2]): + for l in range(indices.shape[3]): out[ i, j, @@ -146,8 +146,8 @@ def _scatter_4d(data, indices, updates, axis): else: for i in range(indices.shape[0]): for j in range(indices.shape[1]): - for k in const_range(indices.shape[2]): - for l in const_range(indices.shape[3]): + for k in range(indices.shape[2]): + for l in range(indices.shape[3]): out[ i, j, diff --git a/python/tvm/topi/x86/conv2d.py b/python/tvm/topi/x86/conv2d.py index 47fb48ec586f..2723a1cbe24f 100644 --- a/python/tvm/topi/x86/conv2d.py +++ b/python/tvm/topi/x86/conv2d.py @@ -140,6 +140,17 @@ def _pack_data(cfg, data, kernel): ic_chunk = ic // ic_bn oc_chunk = oc // oc_bn + # Handle dynamic shape to pass tuning dispatch. + if isinstance(n, tvm.tir.Any): + n = tvm.te.size_var("n") + if isinstance(ih, tvm.tir.Any): + ih = tvm.te.size_var("ih") + if isinstance(iw, tvm.tir.Any): + iw = tvm.te.size_var("iw") + if isinstance(ic, tvm.tir.Any): + raise RuntimeError("Dynamic input channel is not supported for conv2d.") + + data = te.compute( (n, ic_chunk, ih, iw, ic_bn), lambda bs, c, h, w, vc: data[bs, c * ic_bn + vc, h, w], diff --git a/python/tvm/topi/x86/dense.py b/python/tvm/topi/x86/dense.py index e318493898e7..b0cc71acb232 100644 --- a/python/tvm/topi/x86/dense.py +++ b/python/tvm/topi/x86/dense.py @@ -236,7 +236,8 @@ def dense_blas_common(cfg, data, weight, bias, out_dtype, lib): """Compute dense using a BLAS library""" M, K = get_const_tuple(data.shape) N, _ = get_const_tuple(weight.shape) - cfg.add_flop(M * K * N * 2) + if isinstance(M, int) and isinstance(K, int) and isinstance(N, int): + cfg.add_flop(M * K * N * 2) if data.dtype == "uint8" and weight.dtype == "int8" and out_dtype == "int32": if not hasattr(lib, "matmul_u8s8s32"): raise NotImplementedError( diff --git a/python/tvm/topi/x86/roi_align.py b/python/tvm/topi/x86/roi_align.py index fd65053293aa..baa23ad2a135 100644 --- a/python/tvm/topi/x86/roi_align.py +++ b/python/tvm/topi/x86/roi_align.py @@ -25,7 +25,7 @@ @hybrid.script -def roi_align_nchw_ir(data, rois, w_pc, pos_pc, pooled_size, spatial_scale, sample_ratio): +def roi_align_nchw_ir(data, rois, num_rois, w_pc, pos_pc, pooled_size, spatial_scale, sample_ratio): """Hybrid routing fo ROI align operator in NCHW layout. Parameters @@ -37,6 +37,10 @@ def roi_align_nchw_ir(data, rois, w_pc, pos_pc, pooled_size, spatial_scale, samp 2-D with shape [num_roi, 5]. The last dimension should be in format of [batch_index, w_start, h_start, w_end, h_end] + num_rois : tvm.tir.IntImm or tvm.tir.Var + Number of roi. We need to pass it in since hybrid script doesn't support + binding variable to symbolic dim. + w_pc : tvm.te.Tensor or numpy NDArray 3-D weight pre-calculation buffer @@ -61,7 +65,6 @@ def roi_align_nchw_ir(data, rois, w_pc, pos_pc, pooled_size, spatial_scale, samp channels = data.shape[1] height = data.shape[2] width = data.shape[3] - num_rois = rois.shape[0] pooled_size_h = pooled_size[0] pooled_size_w = pooled_size[1] output = output_tensor((num_rois, channels, pooled_size_h, pooled_size_w), data.dtype) @@ -235,6 +238,7 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): _, _, height, width = get_const_tuple(data.shape) max_roi_bin_grid_h = math.ceil(height / pooled_size[0]) max_roi_bin_grid_w = math.ceil(width / pooled_size[1]) + num_rois = rois.shape[0] max_pc_shape = ( rois.shape[0], max_roi_bin_grid_h * max_roi_bin_grid_w * pooled_size[0] * pooled_size[1], @@ -247,5 +251,5 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): spatial_scale = tvm.tir.const(spatial_scale, "float32") sample_ratio = tvm.tir.const(sample_ratio, "int32") return roi_align_nchw_ir( - data, rois, w_pc_buffer, pos_pc_buffer, pooled_size, spatial_scale, sample_ratio + data, rois, num_rois, w_pc_buffer, pos_pc_buffer, pooled_size, spatial_scale, sample_ratio ) diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index e3d095055284..bb1b8d788df1 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -310,7 +310,9 @@ bool StackRel(const Array& types, int num_inputs, const Attrs& attrs, CHECK_EQ(e_dtype, dtype) << "relay.stack requires all tensors have the same dtype"; for (size_t j = 0; j < first->shape.size(); ++j) { if (j == static_cast(axis)) continue; - if (reporter->AssertEQ(first->shape[j], e->shape[j])) continue; + if (first->shape[j].as() || e->shape[j].as() || + reporter->AssertEQ(first->shape[j], e->shape[j])) + continue; throw Error( "relay.stack requires all tensors have the same shape " "on non-stacking axes"); @@ -1292,7 +1294,11 @@ bool RepeatRel(const Array& types, int num_inputs, const Attrs& attrs, for (int i = 0; i < pivot; ++i) { oshape.emplace_back(data->shape[i]); } - oshape.emplace_back(data->shape[pivot] * repeats); + if (data->shape[pivot].as()) { + oshape.emplace_back(Any()); + } else { + oshape.emplace_back(data->shape[pivot] * repeats); + } for (int i = pivot + 1; i < ndim; ++i) { oshape.emplace_back(data->shape[i]); } @@ -3243,7 +3249,6 @@ RELAY_REGISTER_OP("adv_index") .add_type_rel("AdvIndex", AdvIndexRel) .set_attr("TOpIsStateful", false) .set_attr("TOpPattern", kInjective) - .set_attr("FInferCorrectLayout", ElemwiseArbitraryLayout) .set_attr("FTVMCompute", AdvIndexCompute); } // namespace relay diff --git a/src/relay/op/tensor/unary.cc b/src/relay/op/tensor/unary.cc index 938142fccc7d..ba8833e39772 100644 --- a/src/relay/op/tensor/unary.cc +++ b/src/relay/op/tensor/unary.cc @@ -449,7 +449,6 @@ RELAY_REGISTER_OP("shape_of") // Use kOpaque for shape_of op for now since it won't be performance critic, // and it makes things easier for dynamic shape func .set_attr("TOpPattern", kOpaque) - .set_attr("FInferCorrectLayout", ElemwiseArbitraryLayout) .set_support_level(10) .set_attr("FTVMCompute", ShapeOfCompute); diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py index b2b0c19416d5..2dde8f98067b 100644 --- a/tests/python/relay/test_any.py +++ b/tests/python/relay/test_any.py @@ -36,9 +36,10 @@ def any_dims(ndim): return tuple(shape) -def check_result(args, mod, expected, flatten=False, assert_shape=False, only_vm=False): +def check_result(args, mod, expected, flatten=False, assert_shape=False, only_vm=False, targets=None): for kind in ["debug", "vm"]: - for tgt, ctx in tvm.testing.enabled_targets(): + targets = targets or tvm.testing.enabled_targets() + for tgt, ctx in targets: if kind == "debug" and (only_vm or ctx.device_type != tvm.cpu().device_type): continue ex = relay.create_executor(kind, mod=mod, ctx=ctx, target=tgt) @@ -454,11 +455,10 @@ def verify_any_conv2d_NCHWc( check_result([data_np, kernel_np], mod, ref_out_shape, assert_shape=True) -# TODO(@kevinthesun): Need to fix the compute in conv2d_NCHWc to support any -@pytest.mark.skip +# TODO(@kevinthesun): Support dynamic input height and width. def test_any_conv2d_NCHWc(): verify_any_conv2d_NCHWc( - (relay.Any(), 8, relay.Any(), relay.Any(), 8), + (relay.Any(), 8, 224, 224, 8), (8, 8, 3, 3, 8, 8), (1, 1), (1, 1), @@ -470,7 +470,7 @@ def test_any_conv2d_NCHWc(): (1, 8, 224, 224, 8), ) verify_any_conv2d_NCHWc( - (relay.Any(), 8, relay.Any(), relay.Any(), 8), + (relay.Any(), 8, 224, 224, 8), (8, 8, 3, 3, 8, 8), (1, 1), (1, 1), @@ -478,8 +478,66 @@ def test_any_conv2d_NCHWc(): "NCHW8c", "OIHW8i8o", "NCHW8c", - (1, 8, 224, 224, 8), - (1, 8, 222, 222, 8), + (2, 8, 224, 224, 8), + (2, 8, 222, 222, 8), + ) + + +def verify_any_conv2d_transpose_nchw( + data_shape, + kernel_shape, + strides, + padding, + dilation, + groups, + static_data_shape, + ref_out_shape, + output_padding, +): + mod = tvm.IRModule() + dtype = "float32" + data = relay.var("data", shape=data_shape, dtype=dtype) + kernel = relay.var("kernel", shape=kernel_shape, dtype=dtype) + y = relay.nn.conv2d_transpose( + data, + kernel, + strides, + padding, + dilation, + groups, + kernel_size=kernel_shape[2:4], + output_padding=output_padding, + ) + mod["main"] = relay.Function([data, kernel], y) + data_np = np.random.uniform(size=static_data_shape).astype(dtype) + kernel_np = np.random.uniform(size=kernel_shape).astype(dtype) + check_result([data_np, kernel_np], mod, ref_out_shape, assert_shape=True, + targets=[('llvm', tvm.cpu())]) + + +# TODO(@kevinthesun): Support dynamic input height and width. +def test_any_conv2d_transpose_nchw(): + verify_any_conv2d_transpose_nchw( + (relay.Any(), 64, 224, 224), + (64, 192, 3, 3), + (1, 1), + (1, 1), + (1, 1), + 1, + (2, 64, 224, 224), + (2, 192, 224, 224), + (0, 0), + ) + verify_any_conv2d_transpose_nchw( + (relay.Any(), 32, 224, 224), + (32, 64, 3, 3), + (2, 2), + (1, 1), + (1, 1), + 1, + (1, 32, 224, 224), + (1, 64, 448, 448), + (1, 1) ) @@ -1084,5 +1142,44 @@ def test_any_adv_index(): check_result([np_data, np_index, np_index], mod, ref_res) +def verify_any_repeat(data_shape, np_dshape, repeats, axis): + mod = tvm.IRModule() + dtype = "float32" + data = relay.var("data", shape=data_shape, dtype=dtype) + y = relay.repeat(data, repeats, axis) + mod["main"] = relay.Function([data], y) + np_data = np.random.uniform(size=np_dshape).astype(dtype) + ref_res = np.repeat(np_data, repeats, axis) + check_result([np_data], mod, ref_res) + +@tvm.testing.uses_gpu +def test_any_repeat(): + verify_any_repeat(any_dims(2), (1, 2), 2, 0) + verify_any_repeat(any_dims(1), (3,), 3, -1) + verify_any_repeat(any_dims(4), (2, 1, 1, 4), 4, 2) + + +def verify_any_stack(data_shape, np_dshape, num_data, axis): + mod = tvm.IRModule() + dtype = "float32" + inputs = [] + for i in range(num_data): + inputs.append(relay.var("data{}".format(i), shape=data_shape, dtype=dtype)) + y = relay.stack(inputs, axis) + mod["main"] = relay.Function(inputs, y) + np_inputs = [] + for _ in range(num_data): + np_inputs.append(np.random.uniform(size=np_dshape).astype(dtype)) + ref_res = np.stack(np_inputs, axis) + check_result(np_inputs, mod, ref_res) + + +@tvm.testing.uses_gpu +def test_any_stack(): + verify_any_stack(any_dims(2), (1, 2), 3, 0) + verify_any_stack(any_dims(1), (3,), 4, -1) + verify_any_stack(any_dims(4), (2, 1, 1, 4), 2, 2) + + if __name__ == "__main__": pytest.main([__file__])