From badd8608d41a8b10efc4f373de03c9227f189e78 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 12 Nov 2019 00:49:05 -0800 Subject: [PATCH 01/22] [TOPI][Relay][OP] Dynamic NMS and strided_slice --- include/tvm/relay/attrs/transform.h | 15 +- include/tvm/relay/attrs/vision.h | 26 +- python/tvm/relay/frontend/common.py | 2 +- python/tvm/relay/frontend/tensorflow.py | 51 +++ .../tvm/relay/frontend/tensorflow_parser.py | 4 +- python/tvm/relay/op/_transform.py | 12 + python/tvm/relay/op/transform.py | 10 +- python/tvm/relay/op/vision/nms.py | 36 ++- python/tvm/relay/testing/tf.py | 2 +- src/relay/op/tensor/transform.cc | 306 +++++++++++------- src/relay/op/vision/nms.cc | 34 +- .../transforms/combine_parallel_conv2d.cc | 19 +- src/relay/transforms/pattern_util.h | 2 +- .../frontend/tensorflow/test_control_flow.py | 26 +- .../frontend/tensorflow/test_debugging.py | 12 +- .../frontend/tensorflow/test_forward.py | 27 +- tests/python/frontend/tflite/test_forward.py | 2 +- tests/python/relay/test_any.py | 32 ++ tests/python/relay/test_op_level4.py | 42 ++- tests/python/relay/test_op_level5.py | 28 +- topi/python/topi/sort.py | 4 +- topi/python/topi/vision/nms.py | 247 ++++++++++++-- topi/python/topi/vision/ssd/multibox.py | 2 +- topi/tests/python/test_topi_vision.py | 17 +- tutorials/frontend/from_tensorflow.py | 4 +- 25 files changed, 740 insertions(+), 222 deletions(-) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index ccf8e54fea96..4f0c90ec4f4a 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -210,14 +210,17 @@ struct SplitAttrs : public tvm::AttrsNode { /*! \brief Attributes for StridedSlice operator */ struct StridedSliceAttrs : public tvm::AttrsNode { - Array begin; - Array end; - Array strides; + Expr begin; + Expr end; + Expr strides; TVM_DECLARE_ATTRS(StridedSliceAttrs, "relay.attrs.StridedSliceAttrs") { - TVM_ATTR_FIELD(begin).describe("Indices for begin of slice, begin index is also inclusive"); - TVM_ATTR_FIELD(end).describe("Indices for end of slice, end index is exclusive"); - TVM_ATTR_FIELD(strides).set_default(Array({})).describe("Stride values of the slice"); + TVM_ATTR_FIELD(begin) + .describe("Indices for begin of slice, begin index is also inclusive"); + TVM_ATTR_FIELD(end) + .describe("Indices for end of slice, end index is exclusive"); + TVM_ATTR_FIELD(strides) + .describe("Stride values of the slice"); } }; diff --git a/include/tvm/relay/attrs/vision.h b/include/tvm/relay/attrs/vision.h index e7e24b19228b..0a0ca9fe93d6 100644 --- a/include/tvm/relay/attrs/vision.h +++ b/include/tvm/relay/attrs/vision.h @@ -89,6 +89,7 @@ struct GetValidCountsAttrs : public tvm::AttrsNode { /*! \brief Attributes used in non_maximum_suppression operator */ struct NonMaximumSuppressionAttrs : public tvm::AttrsNode { int max_output_size; + double score_threshold; double iou_threshold; bool force_suppress; int top_k; @@ -101,20 +102,29 @@ struct NonMaximumSuppressionAttrs : public tvm::AttrsNode 4 else None + + scores = AttrCvt(op_name="expand_dims", + extras={'axis': -1, 'num_newaxis': 1})([inputs[1]], attr) + + data = get_relay_op('concatenate')([scores, inputs[0]], -1) + # expand to [class_id, prob, box] + # data = _get_relay_op('concatenate')([scores, data], -1) + # expand to [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5] + + data = get_relay_op('expand_dims')(data, 0, 1) + + # Don't need to call get_valid_counts for TensorFlow and ONNX + # ct, data = _get_relay_op('get_valid_counts')(data, score_threshold=score_threshold, + # id_index=-1, score_index=0) + # get the number of anchors + data_shape = attr['_input_shapes'][inputs[1]] + valid_cnt = _expr.const(data_shape) + # TensorFlow NMS doesn't have parameter top_k + top_k = -1 + # score_index is 0 since TF doesn't have class id for nms input + score_index = 0 + nms_ret = get_relay_op('non_max_suppression')(data=data, + valid_count=valid_cnt, + max_output_size=max_output_size, + score_threshold=score_threshold, + iou_threshold=iou_threshold, + force_suppress=False, + top_k=top_k, + coord_start=1, + score_index=score_index, + id_index=-1, + return_indices=True, + invalid_to_bottom=False) + + end = get_relay_op("squeeze")(nms_ret[1], axis=[1]) + data_slice = get_relay_op("squeeze")(nms_ret[0], axis=[0]) + ret = get_relay_op("strided_slice")(data_slice, _expr.const([0]), end, _expr.const([1])) + return ret return _impl def _decode_image(): @@ -2027,6 +2076,8 @@ def _impl(inputs, attr, params, mod): 'Mod' : _elemwise('mod'), 'Mul' : _elemwise('multiply'), 'Neg' : AttrCvt('negative'), + 'NonMaxSuppressionV2' : _nms(), + 'NonMaxSuppressionV3' : _nms(), 'NoOp' : _no_op(), 'NotEqual' : _broadcast('not_equal'), 'OneHot' : _one_hot(), diff --git a/python/tvm/relay/frontend/tensorflow_parser.py b/python/tvm/relay/frontend/tensorflow_parser.py index fdbb8768597f..4e0f14c577cb 100644 --- a/python/tvm/relay/frontend/tensorflow_parser.py +++ b/python/tvm/relay/frontend/tensorflow_parser.py @@ -80,14 +80,14 @@ def _get_output_names(self): "required to restore from saved model.") tags = self._get_tag_set() output_names = set() - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: meta_graph_def = tf.saved_model.loader.load(sess, tags, self._model_dir) for sig_def in meta_graph_def.signature_def.values(): for output_tensor in sig_def.outputs.values(): output_names.add(output_tensor.name.replace(":0", "")) - tf.reset_default_graph() + tf.compat.v1.reset_default_graph() return ",".join(output_names) def _load_saved_model(self): diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index e1c2bd7b0acf..7d81ab13f6db 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -101,6 +101,18 @@ def _arange_shape_func(start, stop, step): def arange_shape_func(attrs, inputs, _): return [_arange_shape_func(*inputs)] +@script +def _strided_slice_shape_func(data_shape, begin, end, strides): + ndim = len(data_shape.shape) + out = output_tensor((ndim,), "int64") + for i in const_range(ndim): + out[i] = int64(ceil_div((int64(end[i]) - int64(begin[i])), int64(strides[i]))) + return out + +@_reg.register_shape_func("strided_slice", True) +def strided_slice_shape_func(attrs, inputs, _): + return [_strided_slice_shape_func(*inputs)] + @script def _concatenate_shape_func(inputs, axis): ndim = inputs[0].shape[0] diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index 1da58ae3d90e..c8c540fa861b 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -619,13 +619,13 @@ def strided_slice(data, begin, end, strides=None): data : relay.Expr The source array to be sliced. - begin: list of int + begin: relay.Expr The indices to begin with in the slicing. - end: list of int + end: relay.Expr Indices indicating end of the slice. - strides: list of int, optional + strides: relay.Expr, optional Specifies the stride values, it can be negative in that case, the input tensor will be reversed in that particular axis. @@ -634,8 +634,8 @@ def strided_slice(data, begin, end, strides=None): ret : relay.Expr The computed result. """ - strides = strides or [] - return _make.strided_slice(data, list(begin), list(end), list(strides)) + strides = strides or const(1) + return _make.strided_slice(data, begin, end, strides) def strided_set(data, v, begin, end, strides=None): diff --git a/python/tvm/relay/op/vision/nms.py b/python/tvm/relay/op/vision/nms.py index 70a9ec9ed5e4..a4e98a732715 100644 --- a/python/tvm/relay/op/vision/nms.py +++ b/python/tvm/relay/op/vision/nms.py @@ -56,6 +56,7 @@ def get_valid_counts(data, def non_max_suppression(data, valid_count, max_output_size=-1, + score_threshold=0.0, iou_threshold=0.5, force_suppress=False, top_k=-1, @@ -69,9 +70,11 @@ def non_max_suppression(data, Parameters ---------- data : relay.Expr - 3-D tensor with shape [batch_size, num_anchors, 6]. + 3-D tensor with shape [batch_size, num_anchors, 6] + or [batch_size, num_anchors, 5]. The last dimension should be in format of - [class_id, score, box_left, box_top, box_right, box_bottom]. + [class_id, score, box_left, box_top, box_right, box_bottom] + or [score, box_left, box_top, box_right, box_bottom]. valid_count : relay.Expr 1-D tensor for valid number of boxes. @@ -80,6 +83,9 @@ def non_max_suppression(data, Max number of output valid boxes for each instance. By default all valid boxes are returned. + score_threshold : float, optional + Lower limit of score for valid bounding boxes. + iou_threshold : float, optional Non-maximum suppression threshold. @@ -106,10 +112,24 @@ def non_max_suppression(data, Returns ------- - out : relay.Expr - 3-D tensor with shape [batch_size, num_anchors, 6]. + out : relay.Expr or relay.Tuple + return relay.Expr if return_indices is disabled, a 3-D tensor + with shape [batch_size, num_anchors, 6] or [batch_size, num_anchors, 6]. + if return_indices is True, return relay.Tuple of two 2-D tensors, with + shape [batch_size, num_anchors] and [batch_size, num_valid_anchors] respectively. """ - return _make.non_max_suppression(data, valid_count, max_output_size, - iou_threshold, force_suppress, top_k, - coord_start, score_index, id_index, - return_indices, invalid_to_bottom) + out = _make.non_max_suppression(data, + valid_count, + max_output_size, + score_threshold, + iou_threshold, + force_suppress, + top_k, + coord_start, + score_index, + id_index, + return_indices, + invalid_to_bottom) + if return_indices: + return TupleWrapper(out, 2) + return out diff --git a/python/tvm/relay/testing/tf.py b/python/tvm/relay/testing/tf.py index dc7937c0b346..567724d9d251 100644 --- a/python/tvm/relay/testing/tf.py +++ b/python/tvm/relay/testing/tf.py @@ -77,7 +77,7 @@ def AddShapesToGraphDef(session, out_node): Parameters ---------- - session : tf.Session + session : tf.compat.v1.Session Tensorflow session out_node : String or List Final output node of the graph. diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index 7282ac74e6f3..5d471f99a47f 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1095,9 +1095,12 @@ bool ArangeRel(const Array& types, int num_inputs, const Attrs& raw_attrs, } } -inline te::Tensor DynamicArange(const te::Tensor& start, const te::Tensor& stop, - const te::Tensor& step, tvm::DataType dtype, - std::string name = "tensor", std::string tag = topi::kInjective) { +inline te::Tensor DynamicArange(const te::Tensor& start, + const te::Tensor& stop, + const te::Tensor& step, + tvm::DataType dtype, + std::string name = "T_arange_dynamic", + std::string tag = topi::kInjective) { tvm::PrimExpr num_elem = tvm::tir::Var("num_elem"); return te::compute( {num_elem}, @@ -1110,6 +1113,7 @@ inline te::Tensor DynamicArange(const te::Tensor& start, const te::Tensor& stop, Array ArangeCompute(const Attrs& attrs, const Array& inputs, const Type& out_type) { const ArangeAttrs* param = attrs.as(); + CHECK(param != nullptr); te::Tensor start = inputs[0]; te::Tensor stop = inputs[1]; te::Tensor step = inputs[2]; @@ -1671,93 +1675,101 @@ Array GetIntArray(Array arr) { // strided_slice TVM_REGISTER_NODE_TYPE(StridedSliceAttrs); -bool StridedSliceRel(const Array& types, int num_inputs, const Attrs& attrs, - const TypeReporter& reporter) { - CHECK_EQ(types.size(), 2); - const auto* data = types[0].as(); - if (data == nullptr) return false; +bool StridedSliceRel(const Array& types, + int num_inputs, + const Attrs& attrs, + const TypeReporter& reporter) { + CHECK_EQ(types.size(), 5); const StridedSliceAttrs* param = attrs.as(); CHECK(param != nullptr); - + const auto* data = types[0].as(); + CHECK(data != nullptr); auto dshape = data->shape; auto num_axis = dshape.size(); - std::vector stride_vec; - for (Integer i : param->strides) { - CHECK(i.defined()); - stride_vec.push_back(i->value); - } - for (size_t i = stride_vec.size(); i < num_axis; ++i) { - stride_vec.push_back(1); - } - const int64_t max_range = std::numeric_limits::max(); - - std::vector begin_vec; - for (size_t i = 0; i < param->begin.size(); ++i) { - if (!param->begin[i].defined()) { - // value=None + // calculate output shape + std::vector oshape(num_axis); + const ConstantNode *cbegin, *cend, *cstrides; + if ((cbegin = param->begin.as()) && + (cend = param->end.as()) && + (cstrides = param->strides.as())) { + std::vector stride_vec; + int32_t* strides_val = reinterpret_cast(cstrides->data->data); + for (size_t i = 0; i < cstrides->data.Shape().front(); ++i){ + stride_vec.push_back(strides_val[i]); + } + for (size_t i = stride_vec.size(); i < num_axis; ++i) { + stride_vec.push_back(1); + } + const int64_t max_range = std::numeric_limits::max(); + std::vector begin_vec; + int32_t* begin_val = reinterpret_cast(cbegin->data->data); + for (size_t i = 0; i < cbegin->data.Shape().front(); ++i){ + begin_vec.push_back(begin_val[i]); + } + for (size_t i = begin_vec.size(); i < num_axis; ++i) { begin_vec.push_back(stride_vec[i] > 0 ? 0 : max_range); - } else { - begin_vec.push_back(param->begin[i]->value); } - } - for (size_t i = begin_vec.size(); i < num_axis; ++i) { - begin_vec.push_back(stride_vec[i] > 0 ? 0 : max_range); - } - - std::vector end_vec; - for (size_t i = 0; i < param->end.size(); ++i) { - // allow end to be None - if (!param->end[i].defined()) { + std::vector end_vec; + int32_t* end_val = reinterpret_cast(cend->data->data); + for (size_t i = 0; i < cend->data.Shape().front(); ++i){ + end_vec.push_back(end_val[i]); + } + for (size_t i = end_vec.size(); i < num_axis; ++i) { end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); - } else { - end_vec.push_back(param->end[i]->value); } - } - for (size_t i = end_vec.size(); i < num_axis; ++i) { - end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); - } - - std::vector oshape(dshape.size()); - for (size_t i = 0; i < num_axis; ++i) { - int64_t stride_v = stride_vec[i]; - int64_t begin_v = begin_vec[i]; - int64_t end_v = end_vec[i]; - if ((stride_v == 1 && begin_v == 0 && end_v == max_range) || - (stride_v == -1 && begin_v == max_range && end_v == 0)) { - // Quick path, do not slice this dimension. - oshape[i] = dshape[i]; - continue; + for (size_t i = 0; i < num_axis; ++i) { + int64_t stride_v = stride_vec[i]; + int64_t begin_v = begin_vec[i]; + int64_t end_v = end_vec[i]; + + if ((stride_v == 1 && + begin_v == 0 && + end_v == max_range) || + (stride_v == -1 && + begin_v == max_range && + end_v == 0)) { + // Quick path, do not slice this dimension. + oshape[i] = dshape[i]; + continue; + } + // Normal path, require the shape to be concrete integer. + // Require concrete integer as symbolic inference of min/max + // can get complicated and not very helpful. + const int64_t* p_dim_size = tir::as_const_int(dshape[i]); + CHECK(p_dim_size) + << "strided_slice requires sliced dimension to be concrete int"; + int64_t dim_size = p_dim_size[0]; + begin_v = (begin_v < 0) ? dim_size + begin_v : begin_v; + end_v = (end_v < 0) ? dim_size + end_v : end_v; + + int64_t slice_range, step; + if (stride_v < 0) { + if (end_v < -1) end_v = -1; + CHECK_LT(end_v, begin_v) + << "strided_slice get empty slice at axis " << i; + begin_v = std::min(dim_size - 1, begin_v); + slice_range = begin_v - end_v; + step = -stride_v; + } else { + if (begin_v < 0) begin_v = 0; + CHECK_GE(stride_v, 0); + CHECK_LT(begin_v, end_v) + << "strided_slice get empty slice at axis " << i; + end_v = std::min(dim_size, end_v); + slice_range = end_v - begin_v; + step = stride_v; + } + oshape[i] = tir::make_const(dshape[i].dtype(), (slice_range + step - 1) / step); } - // Normal path, require the shape to be concrete integer. - // Require concrete integer as symbolic inference of min/max - // can get complicated and not very helpful. - const int64_t* p_dim_size = tir::as_const_int(dshape[i]); - CHECK(p_dim_size) << "strided_slice requires sliced dimension to be concrete int"; - int64_t dim_size = p_dim_size[0]; - begin_v = (begin_v < 0) ? dim_size + begin_v : begin_v; - end_v = (end_v < 0) ? dim_size + end_v : end_v; - - int64_t slice_range, step; - if (stride_v < 0) { - if (end_v < -1) end_v = -1; - CHECK_LT(end_v, begin_v) << "strided_slice get empty slice at axis " << i; - begin_v = std::min(dim_size - 1, begin_v); - slice_range = begin_v - end_v; - step = -stride_v; - } else { - if (begin_v < 0) begin_v = 0; - CHECK_GE(stride_v, 0); - CHECK_LT(begin_v, end_v) << "strided_slice get empty slice at axis " << i; - end_v = std::min(dim_size, end_v); - slice_range = end_v - begin_v; - step = stride_v; + } else { + for (size_t i = 0; i < num_axis; ++i) { + oshape[i] = Any::make(); } - oshape[i] = tir::make_const(dshape[i].dtype(), (slice_range + step - 1) / step); } - reporter->Assign(types[1], TensorType(oshape, data->dtype)); + reporter->Assign(types[4], TensorType(oshape, data->dtype)); return true; } @@ -1783,11 +1795,31 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, auto shape = old_in_shapes[0]; // NOTE: Discard "const" qualifier here. - auto* params = const_cast(attrs.as()); + auto *params = const_cast(attrs.as()); + CHECK(params != nullptr); + Array begin, end, strides; + const ConstantNode *cbegin, *cend, *cstrides; + if ((cbegin = params->begin.as()) && + (cend = params->end.as()) && + (cstrides = params->strides.as())) { + + int32_t* strides_val = reinterpret_cast(cstrides->data->data); + for (size_t i = 0; i < cstrides->data.Shape().front(); ++i){ + strides.push_back(strides_val[i]); + } + int32_t* begin_val = reinterpret_cast(cbegin->data->data); + for (size_t i = 0; i < cbegin->data.Shape().front(); ++i){ + begin.push_back(begin_val[i]); + } + int32_t* end_val = reinterpret_cast(cend->data->data); + for (size_t i = 0; i < cend->data.Shape().front(); ++i){ + end.push_back(end_val[i]); + } + } Array new_begin, new_end; - for (size_t i = 0; i < params->begin.size(); i++) { + for (size_t i = 0; i < begin.size(); i++) { const LayoutAxis& axis = layout[i]; if (!axis.IsPrimal()) { // original layout that contains splitted axes is not supported @@ -1795,53 +1827,104 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, } auto factor = new_layout.FactorOf(axis); if (factor == -1) { - new_begin.push_back(params->begin[i]); - new_end.push_back(params->end[i]); + new_begin.push_back(begin[i]); + new_end.push_back(end[i]); } else { - if (params->strides.defined() && i < params->strides.size()) { - auto stride = params->strides[i]; + if (strides.defined() && i < strides.size()) { + auto stride = strides[i]; // arbitrary stride is not supported if (stride.defined() && stride->value != 1) { return {{Layout::Undef()}, {Layout::Undef()}}; } } - int64_t begin = params->begin[i].defined() ? params->begin[i]->value : 0; - int64_t end = - params->end[i].defined() ? params->end[i]->value : shape[i].as()->value; - if (begin % factor || end % factor) { + int64_t bg = begin[i].defined() ? begin[i]->value : 0; + int64_t ed = end[i].defined() ? end[i]->value : + shape[i].as()->value; + if (bg % factor || ed % factor) { // transform to original layout return {{Layout::Undef()}, {Layout::Undef()}}; } - new_begin.push_back(tvm::Integer(begin / factor)); - new_end.push_back(tvm::Integer(end / factor)); + new_begin.push_back(tvm::Integer(bg / factor)); + new_end.push_back(tvm::Integer(ed / factor)); } } layout = new_layout; - params->begin = new_begin; - params->end = new_end; } return {{layout}, {layout}}; } -// Positional relay function to create StridedSlice operator used by frontend FFI. -Expr MakeStridedSlice(Expr data, Array begin, Array end, Array strides) { - auto attrs = make_object(); - attrs->begin = std::move(begin); - attrs->end = std::move(end); - attrs->strides = std::move(strides); - static const Op& op = Op::Get("strided_slice"); - return Call(op, {data}, Attrs(attrs), {}); +inline Tensor DynamicStridedSlice(const tvm::Tensor& input, + const tvm::Tensor& begin, + const tvm::Tensor& end, + const tvm::Tensor& strides, + std::string name = "T_strided_slice_dynamic", + std::string tag = topi::kInjective) { + size_t src_tensor_dim = static_cast(input->shape.size()); + Array out_shape; + for(size_t i = 0; i < src_tensor_dim; ++i){ + out_shape.push_back(tvm::Var("dim")); + } + return tvm::compute(out_shape, [&](const Array& indices) { + Array real_indices; + for (int32_t i = 0; i < src_tensor_dim; ++i) { + real_indices.push_back(indices[i] * strides(i) + begin(i)); + } + return input(real_indices); + }, name, tag); } Array StridedSliceCompute(const Attrs& attrs, const Array& inputs, const Type& out_type) { const StridedSliceAttrs* param = attrs.as(); CHECK(param != nullptr); - return Array{ - topi::strided_slice(inputs[0], param->begin, param->end, param->strides)}; + const ConstantNode *cbegin, *cend, *cstrides; + if ((cbegin = param->begin.as()) && + (cend = param->end.as()) && + (cstrides = param->strides.as())) { + Array begin, end, strides; + int32_t* strides_val = reinterpret_cast(cstrides->data->data); + for (size_t i = 0; i < cstrides->data.Shape().front(); ++i){ + strides.push_back(strides_val[i]); + } + int32_t* begin_val = reinterpret_cast(cbegin->data->data); + for (size_t i = 0; i < cbegin->data.Shape().front(); ++i){ + begin.push_back(begin_val[i]); + } + int32_t* end_val = reinterpret_cast(cend->data->data); + for (size_t i = 0; i < cend->data.Shape().front(); ++i){ + end.push_back(end_val[i]); + } + return Array{ + topi::strided_slice(inputs[0], begin, end, strides) + }; + } else { + Tensor data = inputs[0]; + Tensor begin = inputs[1]; + Tensor end = inputs[2]; + Tensor strides = inputs[3]; + // Dynamic computation + return Array{ + DynamicStridedSlice(data, begin, end, strides) + }; + } } -TVM_REGISTER_GLOBAL("relay.op._make.strided_slice").set_body_typed(MakeStridedSlice); +// Positional relay function to create StridedSlice operator used by frontend FFI. +Expr MakeStridedSlice(Expr data, + Expr begin, + Expr end, + Expr strides) { + auto attrs = make_object(); + attrs->begin = begin; + attrs->end = end; + attrs->strides = strides; + static const Op& op = Op::Get("strided_slice"); + return Call(op, {data, begin, end, strides}, Attrs(attrs), {}); +} + +TVM_REGISTER_GLOBAL("relay.op._make.strided_slice") +.set_body_typed(MakeStridedSlice); + RELAY_REGISTER_OP("strided_slice") .describe(R"code(Strided slice of an array. @@ -1867,14 +1950,19 @@ Examples:: [[ 5., 6.], [ 7., 8.]]] )code" TVM_ADD_FILELINE) - .set_num_inputs(1) - .add_argument("data", "Tensor", "The input tensor.") - .set_support_level(4) - .set_attrs_type() - .add_type_rel("StridedSlice", StridedSliceRel) - .set_attr("FTVMCompute", StridedSliceCompute) - .set_attr("TOpPattern", kInjective) - .set_attr("FInferCorrectLayout", StridedSliceInferCorrectLayout); +.set_num_inputs(4) +.add_argument("data", "Tensor", "The input tensor.") +.add_argument("begin", "Tensor", "The indices to begin with in the slicing.") +.add_argument("end", "Tensor", "Indices indicating end of the slice.") +.add_argument("strides", "Tensor", "The stride values.") +.set_support_level(4) +.set_attrs_type() +.add_type_rel("StridedSlice", StridedSliceRel) +.set_attr("FTVMCompute", StridedSliceCompute) +// TODO(@icemelon, @yongwww): Change to kOpaque because FuseOps doesn't consider dynamic shape +.set_attr("TOpPattern", kOpaque) +.set_attr("AnyCodegenStrategy", kVariableDimensions) +.set_attr("FInferCorrectLayout", StridedSliceInferCorrectLayout); // strided_set bool StridedSetRel(const Array& types, int num_inputs, const Attrs& attrs, diff --git a/src/relay/op/vision/nms.cc b/src/relay/op/vision/nms.cc index b1aaaf01ae9c..bdd4f664165b 100644 --- a/src/relay/op/vision/nms.cc +++ b/src/relay/op/vision/nms.cc @@ -82,19 +82,35 @@ bool NMSRel(const Array& types, int num_inputs, const Attrs& attrs, // assign output type if (param->return_indices) { + std::vector fields; + // dynamic happens for return_indices in TensorFlow & ONNX std::vector oshape({dshape[0], dshape[1]}); - reporter->Assign(types[2], TensorType(oshape, DataType::Int(32))); + fields.push_back(TensorType(oshape, DataType::Int(32))); + std::vector countshape({dshape[0], 1}); + fields.push_back(TensorType(countshape, DataType::Int(32))); + reporter->Assign(types[2], TupleType(Array(fields))); } else { reporter->Assign(types[2], TensorType(dshape, data->dtype)); } return true; } -Expr MakeNMS(Expr data, Expr valid_count, int max_output_size, double iou_threshold, - bool force_suppress, int top_k, int coord_start, int score_index, int id_index, - bool return_indices, bool invalid_to_bottom) { + +Expr MakeNMS(Expr data, + Expr valid_count, + int max_output_size, + double score_threshold, + double iou_threshold, + bool force_suppress, + int top_k, + int coord_start, + int score_index, + int id_index, + bool return_indices, + bool invalid_to_bottom) { auto attrs = make_object(); attrs->max_output_size = max_output_size; + attrs->score_threshold = score_threshold; attrs->iou_threshold = iou_threshold; attrs->force_suppress = force_suppress; attrs->top_k = top_k; @@ -107,12 +123,14 @@ Expr MakeNMS(Expr data, Expr valid_count, int max_output_size, double iou_thresh return Call(op, {data, valid_count}, Attrs(attrs), {}); } -TVM_REGISTER_GLOBAL("relay.op.vision._make.non_max_suppression").set_body_typed(MakeNMS); +TVM_REGISTER_GLOBAL("relay.op.vision._make.non_max_suppression") +.set_body_typed(MakeNMS); RELAY_REGISTER_OP("vision.non_max_suppression") - .describe(R"doc(Non-maximum suppression. The input boxes should -be in the format of [class_id, score, left, top, right, bottom]. -Set id_index to be -1 to ignore class_id axis. +.describe(R"doc(Non-maximum suppression. The input boxes should +be in the format of [class_id, score, left, top, right, bottom] +or [score, left, top, right, bottom]. Set id_index to be -1 to +ignore class_id axis. )doc" TVM_ADD_FILELINE) .set_num_inputs(2) .add_argument("data", "Tensor", "Input data.") diff --git a/src/relay/transforms/combine_parallel_conv2d.cc b/src/relay/transforms/combine_parallel_conv2d.cc index 1990414c3aa4..e4b2e1ea980b 100644 --- a/src/relay/transforms/combine_parallel_conv2d.cc +++ b/src/relay/transforms/combine_parallel_conv2d.cc @@ -176,7 +176,24 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { begin.push_back(index); index += channels; end.push_back(index); - auto slice = MakeStridedSlice(data, std::move(begin), std::move(end), Array{}); + + + DLContext ctx; + ctx.device_type = kDLCPU; + ctx.device_id = 0; + auto begin_ndarray = runtime::NDArray::Empty({1}, DataType::Int(64), ctx); + auto end_ndarray = runtime::NDArray::Empty({1}, DataType::Int(64), ctx); + auto strides_ndarray = runtime::NDArray::Empty({1}, DataType::Int(64), ctx); + int64_t* begin_data = static_cast(begin_ndarray->data); + int64_t* end_data = static_cast(end_ndarray->data); + + for (size_t i = 0; i < begin.size(); ++i){ + begin_data[i] = begin[i]; + end_data[i] = end[i]; + } + + auto slice = MakeStridedSlice(data, ConstantNode::make(begin_ndarray), + ConstantNode::make(end_ndarray), ConstantNode::make(strides_ndarray)); subst_map->insert({GetRef(branch[depth]), slice}); } } diff --git a/src/relay/transforms/pattern_util.h b/src/relay/transforms/pattern_util.h index 06b1e8290fe0..8964959bfcfd 100644 --- a/src/relay/transforms/pattern_util.h +++ b/src/relay/transforms/pattern_util.h @@ -673,7 +673,7 @@ Expr MakeConcatenate(Expr data, int axis); Expr MakeRepeat(Expr data, int repeats, int axis); -Expr MakeStridedSlice(Expr data, Array begin, Array end, Array strides); +Expr MakeStridedSlice(Expr data, Expr begin, Expr end, Expr strides); Expr MakeStack(Expr data, int axis); diff --git a/tests/python/frontend/tensorflow/test_control_flow.py b/tests/python/frontend/tensorflow/test_control_flow.py index 9777a8dc4462..95d5b797430c 100644 --- a/tests/python/frontend/tensorflow/test_control_flow.py +++ b/tests/python/frontend/tensorflow/test_control_flow.py @@ -53,7 +53,7 @@ def b(i): return tf.add(i, 1) r = tf.while_loop(c, b, [i]) - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -70,7 +70,7 @@ def b(i): return tf.add(i, 1) r = tf.while_loop(c, b, [i]) - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -89,7 +89,7 @@ def b(i, j): return [tf.add(i, 1), j] i1, i2 = tf.while_loop(c, b, loop_vars=[i0, j0]) i1 += tf.constant(1337) - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: tf_out = sess.run(i1) check_equal(graph, tf_out) @@ -107,7 +107,7 @@ def c(i, j, k): return i < 10 def b(i, j, k): return [i+1, j * k, k + i] r = tf.while_loop(c, b, loop_vars=[i0, j0, k0]) - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -127,7 +127,7 @@ def c(i, j, k): return \ def b(i, j, k): return [i+j, j+k, k+1] r = tf.while_loop(c, b, loop_vars=[i, j, k]) - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -147,7 +147,7 @@ def condition(x): return tf.reduce_sum(x) < 100 x = tf.constant(0, shape=[2, 2]) r = tf.while_loop(condition, body, [x]) - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -170,7 +170,7 @@ def condition(x): x = tf.constant(3) r = tf.while_loop(condition, body, loop_vars=[x]) - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -189,7 +189,7 @@ def f2(): return tf.add(4, 23) r = tf.cond(tf.less(i, j), f1, f2) - with tf.Session(graph=graph) as sess: + with tf.compat.v1.Session(graph=graph) as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -204,7 +204,7 @@ def test_multiple_cond_vars(): r = tf.cond(tf.less(tf.add(x1, x2), 10), lambda: tf.add(10, 2), lambda: tf.square(5)) - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -224,7 +224,7 @@ def fn2(x, y): k = tf.constant(3) r = tf.cond(tf.less(i, j), lambda: fn1(i, k), lambda: fn2(j, k)) - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: tf_out = sess.run(r, feed_dict={i: 1, j: 2, k: 3}) check_equal(graph, tf_out) @@ -252,7 +252,7 @@ def fn2(a, b): pred = tf.less(x, y) r = tf.cond(pred, lambda: fn1(x, y), lambda: fn2(y, z)) - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: tf_out = sess.run(r, feed_dict={x: 1, y: 2, z: 3, pred: True}) check_equal(graph, tf_out) @@ -279,7 +279,7 @@ def fn2(a, b): pred = tf.less(x, y) r = tf.cond(pred, lambda: fn1(x, y), lambda: fn2(y, z)) - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: tf_out = sess.run(r, feed_dict={x: 1, y: 2, z: 3, pred: True}) check_equal(graph, tf_out) @@ -300,7 +300,7 @@ def condition(x): return tf.less(x, 100) r = tf.while_loop(condition, body, loop_vars=[x]) - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) diff --git a/tests/python/frontend/tensorflow/test_debugging.py b/tests/python/frontend/tensorflow/test_debugging.py index 01ad6a256f88..8dac612b4879 100644 --- a/tests/python/frontend/tensorflow/test_debugging.py +++ b/tests/python/frontend/tensorflow/test_debugging.py @@ -17,6 +17,7 @@ """Unit tests for converting TensorFlow debugging ops to Relay.""" try: import tensorflow.compat.v1 as tf + tf.disable_v2_behavior() except ImportError: import tensorflow as tf @@ -24,6 +25,7 @@ from tvm import relay from tvm.relay.frontend.tensorflow import from_tensorflow + def run_relay(graph, shape_dict=None, *vars): mod, params = from_tensorflow( graph.as_graph_def(add_shapes=True), @@ -31,6 +33,7 @@ def run_relay(graph, shape_dict=None, *vars): ex = relay.create_executor('debug', mod=mod) return ex.evaluate()(*vars) + def test_assert_true(): g = tf.Graph() shape = (1, 2) @@ -51,7 +54,8 @@ def test_assert_true(): # do that, it's happening in Relay, and that optimization shouldn't # affect the arity of the main function. We should have to pass in # x_value here. - np.testing.assert_allclose(0, run_relay(g, {'input':shape}).asnumpy()) + np.testing.assert_allclose(0, run_relay(g, {'input': shape}).asnumpy()) + def test_assert_true_var_capture(): g = tf.Graph() @@ -71,7 +75,8 @@ def test_assert_true_var_capture(): # the graph as a boolean, which is not correct - as you can see above, # TF believes that the value of this graph is None. np.testing.assert_allclose(True, - run_relay(g, None, x_value).asnumpy()) + run_relay(g, None, x_value).asnumpy()) + def test_assert_false(): g = tf.Graph() @@ -91,9 +96,8 @@ def test_assert_false(): # argument is false. np.testing.assert_allclose(0, run_relay(g).asnumpy()) - + if __name__ == "__main__": test_assert_true() test_assert_true_var_capture() test_assert_false() - diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py index c6a285c93d6a..76d2fe13aa49 100644 --- a/tests/python/frontend/tensorflow/test_forward.py +++ b/tests/python/frontend/tensorflow/test_forward.py @@ -1886,6 +1886,30 @@ def test_forward_crop_and_resize(): extrapolation_value=0.2, method='nearest') +####################################################################### +# Non Max Suppression +# ------------------- +def _test_forward_nms_v3(bx_shape, score_shape, iou_threshold, score_threshold, out_size, dtype="float32"): + boxes = np.random.uniform(0, 10, size=bx_shape).astype(dtype) + scores = np.random.uniform(size=score_shape).astype(dtype) + tf.reset_default_graph() + in_data_1 = tf.placeholder(dtype, boxes.shape, name="in_data_1") + in_data_2 = tf.placeholder(dtype, scores.shape, name="in_data_2") + tf.image.non_max_suppression(boxes=in_data_1, scores=in_data_2, + max_output_size=out_size, iou_threshold=iou_threshold, + score_threshold=score_threshold, name="nms") + compare_tf_with_tvm([boxes, scores], ['in_data_1:0', 'in_data_2:0'], + 'nms/NonMaxSuppressionV3:0', mode='vm') + compare_tf_with_tvm([boxes, scores], ['in_data_1:0', 'in_data_2:0'], + 'nms/NonMaxSuppressionV3:0', mode='debug') + +def test_forward_nms_v3(): + """ NonMaxSuppressionV3 """ + _test_forward_nms_v3((5, 4), (5,), 0.7, 0.5, 5) + _test_forward_nms_v3((20, 4), (20,), 0.5, 0.6, 10) + _test_forward_nms_v3((1000, 4), (1000,), 0.3, 0.7, 1000) + + ####################################################################### # LSTM # ---- @@ -3231,6 +3255,7 @@ def test_forward_isfinite(): test_forward_truncatemod() test_forward_one_hot() test_forward_atan2() + test_forward_nms_v3() # Activations test_forward_sigmoid() @@ -3322,4 +3347,4 @@ def test_forward_isfinite(): test_read_variable_op() # Sharing params case using Mean ops - test_sharing_node() + test_sharing_node() \ No newline at end of file diff --git a/tests/python/frontend/tflite/test_forward.py b/tests/python/frontend/tflite/test_forward.py index 7a8437aaedd3..eca5fb7d4b74 100644 --- a/tests/python/frontend/tflite/test_forward.py +++ b/tests/python/frontend/tflite/test_forward.py @@ -169,7 +169,7 @@ def compare_tflite_with_tvm(in_data, in_name, input_tensors, for i in range(len(in_name)): in_node[i] = in_name[i].split(':')[0] if ":" in in_name[i] else in_name[i] - with tf.Session() as sess: + with tf.compat.v1.Session() as sess: if init_global_variables: sess.run(variables.global_variables_initializer()) # convert to tflite model diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py index 504c20a7f21f..e6a462bd737e 100644 --- a/tests/python/relay/test_any.py +++ b/tests/python/relay/test_any.py @@ -22,6 +22,7 @@ from tvm import relay from tvm.relay.loops import while_loop from tvm.relay.testing import run_infer_type as infer_type +import topi.testing def int32(val): return relay.const(val, 'int32') @@ -642,6 +643,35 @@ def test_arange_with_dynamic_shape(): result = ex.evaluate()(data) tvm.testing.assert_allclose(result.asnumpy(), np.array(range(10)).astype("int32")+1) +def verify_any_strided_slice(data_shape, begin_shape, end_shape, + strides_shape, data_np_shape): + mod = relay.Module() + data = relay.var('data', shape=data_shape, dtype='float32') + begin = relay.var('begin', shape=begin_shape, dtype="int32") + end = relay.var('end', shape=end_shape, dtype="int32") + strides = relay.var('strides', shape=strides_shape, dtype="int32") + y = relay.strided_slice(data, begin, end, strides) + mod["main"] = relay.Function([data, begin, end, strides], y) + + # Generate random numpy input data + data_np = np.random.uniform(size=data_np_shape).astype('float32') + begin_np = np.random.randint(2, size=begin_shape, dtype="int32") + end_np = np.random.randint(5, 15, size=end_shape, dtype="int32") + strides_np = np.random.randint(1, 3, size=strides_shape, dtype="int32") + + ref_res = topi.testing.strided_slice_python(data_np, begin_np, end_np, strides_np) + + for kind in ["debug", "vm"]: + ex = relay.create_executor(kind, mod=mod, ctx=tvm.cpu(), target="llvm") + result = ex.evaluate()(data_np, begin_np, end_np, strides_np) + tvm.testing.assert_allclose(result.asnumpy(), ref_res) + +def test_any_strided_slice(): + verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (15, 17, 21)) + verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (23, 29, 41)) + verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70)) + + def test_recursive_concat(): """ fn @concat_loop(%i: int32, %st: (any, 1)) -> (any, 1) { @@ -796,7 +826,9 @@ def test_mixed_input_type(): test_any_softmax() test_any_topk() test_fused_ops() + test_any_argwhere() test_arange_with_dynamic_shape() + test_any_strided_slice() test_recursive_concat() test_recursive_concat_with_wrong_annotation() test_tuple_get_item() diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index 947a4bfd0b3b..358c8f18ea34 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -297,15 +297,44 @@ def test_mean_var_std(): def test_strided_slice(): def verify(dshape, begin, end, strides, output, test_ref=True): + dtype = "int32" x = relay.var("x", relay.TensorType(dshape, "float32")) - z = relay.strided_slice(x, begin=begin, end=end, strides=strides) + ndim = len(dshape) + begin = begin if begin else [0] * ndim + end = end if end else list(dshape) + strides = strides if strides else [1] * ndim + for i in range(ndim): + if len(begin) <= i: + begin.append(0) + if len(end) <= i: + end.append(dshape[i]) + if len(strides) <= i: + strides.append(1) + if not begin[i]: + begin[i] = 0 + if not end[i]: + end[i] = dshape[i] + if not strides[i]: + strides[i] = 1 + + begin_expr = relay.const(begin, dtype=dtype) + end_expr = relay.const(end, dtype=dtype) + strides_expr = relay.const(strides, dtype=dtype) + + z = relay.strided_slice(x, + begin=begin_expr, + end=end_expr, + strides=strides_expr) func = relay.Function([x], z) + func = run_infer_type(func) text = func.astext() assert "begin=" in text assert "end=" in text + if output: assert func.body.checked_type == relay.ty.TensorType(output, "float32") + if not test_ref: return x_data = np.random.uniform(size=dshape).astype("float32") @@ -316,18 +345,18 @@ def verify(dshape, begin, end, strides, output, test_ref=True): op_res = intrp.evaluate(func)(x_data) tvm.testing.assert_allclose(op_res.asnumpy(), ref_res) - d1, d2, d3, d4 = te.var("d1"), te.var("d2"), te.var("d3"), te.var("d4") - verify((d1, d2, 3), [None, None, 1], [None, None, 2], None, (d1, d2, 1), False) + verify((1, 224, 224, 3), [0, 20, 20, 0], [1, 140, 140, 3], [1, 1, 1, 1], (1, 120, 120, 3), dtype="int64") + verify((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], (1, 3, 3), dtype="int16") verify((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2], (3, 1, 2)) verify((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], (1, 3, 3)) - verify((3, 4, 3), [1, -1, 0], [4, -5, 3], [2, -1, 1], (1, 4, 3)) + verify((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2], (3, 1, 2)) verify((3, 4, 3), [1, 0, 0], [2, 2, 3], [1, 1, 2], (1, 2, 2)) - verify((3, 4, 3), [1, -1, 0], [2, -3, 3], [1, -1, 1], (1, 2, 3)) verify((3, 4, 3), [1, 1, 0], [4, 4, 3], None, (2, 3, 3)) verify((3, 4, 3), [1, 1, 0], [4, 1000, 3], None, (2, 3, 3)) verify((3, 4, 3), [1, 1, 0], [4, 4], None, (2, 3, 3)) verify((3, 4, 3), [1, 1], [4, 4, 3], None, (2, 3, 3)) - + verify((3, 4, 3), [1, -1, 0], [4, -5, 3], [2, -1, 1], (1, 4, 3)) + verify((3, 4, 3), [1, -1, 0], [2, -3, 3], [1, -1, 1], (1, 2, 3)) def test_strided_set(): def verify(dshape, begin, end, strides, vshape, test_ref=True): @@ -379,3 +408,4 @@ def verify(dshape, begin, end, strides, vshape, test_ref=True): test_where() test_reduce_functions() test_mean_var_std() + diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index c3067523fb03..b73428b0a8f5 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -284,18 +284,22 @@ def verify_nms(x0_data, x1_data, dshape, ref_res, ref_indices_res, check_type_only=False): x0 = relay.var("x0", relay.ty.TensorType(dshape, "float32")) x1 = relay.var("x1", relay.ty.TensorType((dshape[0],), "int32")) - z = relay.vision.non_max_suppression(x0, x1, max_output_size = -1, \ - iou_threshold = iou_threshold, force_suppress = force_suppress, \ - top_k = top_k, return_indices=False) - z_indices = relay.vision.non_max_suppression(x0, x1, max_output_size = -1, \ - iou_threshold = iou_threshold, force_suppress = force_suppress, \ - top_k = top_k) + z = relay.vision.non_max_suppression(x0, x1, max_output_size=-1, \ + iou_threshold=iou_threshold, force_suppress=force_suppress, \ + top_k=top_k, return_indices=False) + z_indices = relay.vision.non_max_suppression(x0, x1, max_output_size=-1, score_threshold=0.5, \ + iou_threshold=iou_threshold, force_suppress=force_suppress, \ + top_k=top_k) + if isinstance(z_indices, relay.expr.TupleWrapper): + z_indices = z_indices.astuple() assert "iou_threshold" in z.astext() assert "iou_threshold" in z_indices.astext() zz = run_infer_type(z) zz_indices = run_infer_type(z_indices) assert zz.checked_type == relay.ty.TensorType(dshape, "float32") - assert zz_indices.checked_type == relay.ty.TensorType((dshape[0], dshape[1]), "int32") + assert zz_indices.checked_type == relay.ty.TupleType( + [relay.ty.TensorType((dshape[0], dshape[1]), "int32"), + relay.ty.TensorType((dshape[0], 1), "int32")]) if check_type_only: return @@ -307,14 +311,16 @@ def verify_nms(x0_data, x1_data, dshape, ref_res, ref_indices_res, for target, ctx in ctx_list(): intrp1 = relay.create_executor("graph", ctx=ctx, target=target) op_res1 = intrp1.evaluate(func)(x0_data, x1_data) - op_indices_res1 = intrp1.evaluate(func_indices)(x0_data, x1_data) tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5) - tvm.testing.assert_allclose(op_indices_res1.asnumpy(), ref_indices_res, rtol=1e-5) + if top_k == -1: + op_indices_res1 = intrp1.evaluate(func_indices)(x0_data, x1_data) + tvm.testing.assert_allclose(op_indices_res1[0].asnumpy(), ref_indices_res, rtol=1e-5) intrp2 = relay.create_executor("debug", ctx=ctx, target=target) op_res2 = intrp2.evaluate(func)(x0_data, x1_data) - op_indices_res2 = intrp2.evaluate(func_indices)(x0_data, x1_data) tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5) - tvm.testing.assert_allclose(op_indices_res2.asnumpy(), ref_indices_res, rtol=1e-5) + if top_k == -1: + op_indices_res2 = intrp2.evaluate(func_indices)(x0_data, x1_data) + tvm.testing.assert_allclose(op_indices_res2[0].asnumpy(), ref_indices_res, rtol=1e-5) np_data = np.array([[[0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80], [0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79], diff --git a/topi/python/topi/sort.py b/topi/python/topi/sort.py index e492d683a09d..f79eb52e9266 100644 --- a/topi/python/topi/sort.py +++ b/topi/python/topi/sort.py @@ -31,10 +31,10 @@ def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): The input tensor. valid_count : tvm.te.Tensor, optional - 1-D tensor for valid number of boxes only for ssd. + 1-D tensor for valid number of boxes. axis : int, optional - Axis along which to sort the input tensor. + Axis along which to sort the input tensor. By default the flattened array is used. is_ascend : boolean, optional diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 28598dedffbd..48e85cdd574a 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -22,6 +22,55 @@ from tvm.te import hybrid from ..sort import argsort + +@hybrid.script +def hybrid_rearrange_idx(data): + """Hybrid routine to rearrange nms output to + move all valid entries to top. + + Parameters + ---------- + data : tvm.te.Tensor or numpy NDArray + NMS output. 2-D tensor with shape + [batch_size, num_anchors]. + + one: tvm.tir.const + Constant one with the same dtype as data. + + Returns + ------- + output : tvm.te.Tensor or numpy NDArray + Transformed NMS output. 2-D tensor with shape + [batch_size, num_anchors]. + + shape : tvm.te.Tensor or numpy NDArray + Shape of Tensor with valid indexes + [Batch_size, num_valid_indices] + """ + batch_size = data.shape[0] + num_anchors = data.shape[1] + out_tensor = output_tensor((batch_size, + num_anchors), + data.dtype) + out_shape = output_tensor((batch_size, + 1), + data.dtype) + + for i in range(batch_size): # range instead + valid_idx = 0 + for j in range(num_anchors): + if data[i, j] >= 0: + out_tensor[i, valid_idx] = data[i, j] + valid_idx += 1 + if data[i, j] > num_anchors or data[i, j] < -num_anchors: + out_tensor[i, valid_idx] = 0 + valid_idx += 1 + if j >= valid_idx: + out_tensor[i, j] = -1 + out_shape[i, 0] = valid_idx + return out_tensor, out_shape + + @hybrid.script def hybrid_rearrange_out(data, one): """Hybrid routine to rearrange nms output to @@ -40,7 +89,7 @@ def hybrid_rearrange_out(data, one): ------- output : tvm.te.Tensor or numpy NDArray Transformed NMS output. 3-D tensor with shape - [batch_size, num_anchors, 6]. + [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5]. """ batch_size = data.shape[0] num_anchors = data.shape[1] @@ -60,6 +109,7 @@ def hybrid_rearrange_out(data, one): if j >= valid_idx: for k in range(elem_length): output[i, j, k] = -one + return output @@ -154,9 +204,8 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): @hybrid.script -def hybrid_nms(data, sorted_index, valid_count, - max_output_size, iou_threshold, force_suppress, - top_k, coord_start, id_index, score_index, zero, one): +def hybrid_nms(data, sorted_index, valid_count, max_output_size, iou_threshold, + force_suppress, top_k, coord_start, id_index, score_index, zero, one): """Hybrid routing for non-maximum suppression. Parameters @@ -203,7 +252,8 @@ def hybrid_nms(data, sorted_index, valid_count, Returns ------- output : tvm.te.Tensor - 3-D tensor with shape [batch_size, num_anchors, 6]. + 3-D tensor with shape [batch_size, num_anchors, 6] + or [batch_size, num_anchors, 5]. box_indices: tvm.te.Tensor 2-D tensor with shape [batch_size, num_anchors]. @@ -211,7 +261,7 @@ def hybrid_nms(data, sorted_index, valid_count, batch_size = data.shape[0] num_anchors = data.shape[1] box_data_length = data.shape[2] - box_indices = output_tensor((batch_size, num_anchors), "int32") + box_indices = output_tensor((batch_size, num_anchors), sorted_index.dtype) output = output_tensor((batch_size, num_anchors, box_data_length,), data.dtype) @@ -289,8 +339,136 @@ def hybrid_nms(data, sorted_index, valid_count, num_valid_boxes += 1 return output, box_indices +@hybrid.script +def hybrid_dynamic_nms(data, sorted_index, max_output_size, score_threshold, + iou_threshold, score_index, zero, one): + """Hybrid routing for non-maximum suppression. -def non_max_suppression(data, valid_count, max_output_size=-1, + Parameters + ---------- + data: tvm.te.Tensor or numpy NDArray + Bounding boxes with class and score. 3-D tensor with shape + [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5]. + + sorted_index : tvm.te.Tensor or numpy NDArray + Bounding box indexes sorted by score, with shape + [batch_size, num_anchors]. + + max_output_size : tvm.tir.const + Max number of output valid boxes for each instance. + By default all valid boxes are returned. + + score_threshold : tvm.tir.const + Lower limit of score for valid bounding boxes. + + iou_threshold : tvm.tir.const + Overlapping(IoU) threshold to suppress object with smaller score. + + score_index: tvm.tir.const + Index of the scores/confidence of boxes. + + zero: tvm.tir.const + Constant zero with the same dtype as data. + + one: tvm.tir.const + Constant one with the same dtype as data. + + Returns + ------- + box_indices: tvm.te.Tensor + 2-D tensor with shape [batch_size, num_anchors]. + """ + + + batch_size = data.shape[0] + num_anchors = data.shape[1] + box_data_length = data.shape[2] + + # box_indices is the expected value, similar to TF & ONNX + box_indices = output_tensor((batch_size, num_anchors), sorted_index.dtype) + output = output_tensor((batch_size, + num_anchors, + box_data_length,), data.dtype) + + for i in range(batch_size): + if iou_threshold > 0: + # Reorder output + for j in parallel(num_anchors): + for k in range(box_data_length): + output[i, j, k] = data[i, sorted_index[i, j], k] + if output[i, j, score_index] > score_threshold: + box_indices[i, j] = sorted_index[i, j] + else: + box_indices[i, j] = -1 + + # Apply nms + box_start_idx = 1 + batch_idx = i + + for j in range(num_anchors): + # index sorted + j_sorted = sorted_index[i, j] + + box_a_idx = j + # l: left, t: top, r: right, b: bottom + a_l = min(output[batch_idx, box_a_idx, box_start_idx], + output[batch_idx, box_a_idx, box_start_idx + 2]) + a_t = min(output[batch_idx, box_a_idx, box_start_idx + 1], + output[batch_idx, box_a_idx, box_start_idx + 3]) + a_r = max(output[batch_idx, box_a_idx, box_start_idx], + output[batch_idx, box_a_idx, box_start_idx + 2]) + a_b = max(output[batch_idx, box_a_idx, box_start_idx + 1], + output[batch_idx, box_a_idx, box_start_idx + 3]) + + for k in parallel(j + 1, num_anchors): + k_sorted = sorted_index[i, k] + box_b_idx = k + # l: left, t: top, r: right, b: bottom + b_l = min(output[batch_idx, box_b_idx, box_start_idx], + output[batch_idx, box_b_idx, box_start_idx + 2]) + b_t = min(output[batch_idx, box_b_idx, box_start_idx + 1], + output[batch_idx, box_b_idx, box_start_idx + 3]) + b_r = max(output[batch_idx, box_b_idx, box_start_idx], + output[batch_idx, box_b_idx, box_start_idx + 2]) + b_b = max(output[batch_idx, box_b_idx, box_start_idx + 1], + output[batch_idx, box_b_idx, box_start_idx + 3]) + + # Overlapping width and height + w = max(zero, min(a_r, b_r) - max(a_l, b_l)) + h = max(zero, min(a_b, b_b) - max(a_t, b_t)) + + # Overlapping area + area = h * w + + # total area of the figure formed by box a and box b except for overlapping area + u = (a_r - a_l) * (a_b - a_t) + (b_r - b_l) * (b_b - b_t) - area + + # get the iou + iou = area / u + + # output[i, k, sorted_index] = iou + + if iou >= score_threshold: + box_indices[i, k] = -1 + + else: + for j in parallel(num_anchors): + box_indices[i, j] = sorted_index[i, j] + + # Only return max_output_size valid boxes + num_valid_boxes = 0 + if max_output_size > 0: + for j in parallel(num_anchors): + if num_valid_boxes == max_output_size: + box_indices[i, j] = -1 + else: + num_valid_boxes += 1 + + return output, box_indices + + +@tvm.target.generic_func +def non_max_suppression(data, valid_count, max_output_size=-1, score_threshold=0.0, iou_threshold=0.5, force_suppress=False, top_k=-1, coord_start=2, score_index=1, id_index=0, return_indices=True, invalid_to_bottom=False): @@ -308,6 +486,9 @@ def non_max_suppression(data, valid_count, max_output_size=-1, Max number of output valid boxes for each instance. By default all valid boxes are returned. + score_threshold : optional, float + Lower limit of score for valid bounding boxes. + iou_threshold : optional, float Non-maximum suppression threshold. @@ -334,8 +515,12 @@ def non_max_suppression(data, valid_count, max_output_size=-1, Returns ------- - out : tvm.te.Tensor - 3-D tensor with shape [batch_size, num_anchors, 6]. + out : tvm.te.Tensor or tuple of tvm.te.Tensor + 3-D tensor with shape [batch_size, num_anchors, 6] + or [batch_size, num_anchors, 6]. Out is a tuple of tvm.te.Tensor + if return_indices is True, the Tensor in the tuple is 2-D tensor + with shape [batch_size, num_anchors] and shape + [batch_size, num_valid_anchors] respectively. Example -------- @@ -366,17 +551,33 @@ def non_max_suppression(data, valid_count, max_output_size=-1, score_shape = (batch_size, num_anchors) score_tensor = te.compute(score_shape, lambda i, j: data[i, j, score_axis]) sort_tensor = argsort(score_tensor, valid_count=valid_count, axis=1, is_ascend=False) - out, box_indices = hybrid_nms(data, sort_tensor, valid_count, - tvm.tir.const(max_output_size, dtype="int32"), - tvm.tir.const(iou_threshold, dtype=data.dtype), - tvm.tir.const(force_suppress, dtype="bool"), - tvm.tir.const(top_k, dtype="int32"), - tvm.tir.const(coord_start, dtype="int32"), - tvm.tir.const(id_index, dtype="int32"), - tvm.tir.const(score_index, dtype="int32"), - zero=tvm.tir.const(0, dtype=data.dtype), - one=tvm.tir.const(1, dtype=data.dtype)) - if not return_indices and invalid_to_bottom: - out = hybrid_rearrange_out(out, one=tvm.tir.const(1, dtype=data.dtype)) - - return box_indices if return_indices else out + + if return_indices: + # return a tuple with two tensor, one is the computed valid indices of boxes, appending -1 as invalid boxes + # the other one is the number of valid boxes + out, box_indices = hybrid_dynamic_nms(data, + sort_tensor, + tvm.tir.const(max_output_size, dtype="int32"), + tvm.tir.const(score_threshold, dtype=data.dtype), + tvm.tir.const(iou_threshold, dtype=data.dtype), + tvm.tir.const(score_index, dtype="int32"), + zero=tvm.tir.const(0, dtype=data.dtype), + one=tvm.tir.const(1, dtype=data.dtype)) + box_indices, out_shape = hybrid_rearrange_idx(box_indices) + return [box_indices, out_shape] + else: + out, box_indices = hybrid_nms(data, + sort_tensor, + valid_count, + tvm.tir.const(max_output_size, dtype="int32"), + tvm.tir.const(iou_threshold, dtype=data.dtype), + tvm.tir.const(force_suppress, dtype="bool"), + tvm.tir.const(top_k, dtype="int32"), + tvm.tir.const(coord_start, dtype="int32"), + tvm.tir.const(id_index, dtype="int32"), + tvm.tir.const(score_index, dtype="int32"), + zero=tvm.tir.const(0, dtype=data.dtype), + one=tvm.tir.const(1, dtype=data.dtype)) + if invalid_to_bottom: + out = hybrid_rearrange_out(out, one=tvm.tir.const(1, dtype=data.dtype)) + return out diff --git a/topi/python/topi/vision/ssd/multibox.py b/topi/python/topi/vision/ssd/multibox.py index ba0cf5440c9a..e1ddc7bab9b0 100644 --- a/topi/python/topi/vision/ssd/multibox.py +++ b/topi/python/topi/vision/ssd/multibox.py @@ -304,7 +304,7 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm """ inter_out = multibox_transform_loc(cls_prob, loc_pred, anchor, clip, threshold, variances) - out = non_max_suppression(inter_out[0], inter_out[1], max_output_size=-1, + out = non_max_suppression(inter_out[0], inter_out[1], max_output_size=-1, score_threshold=0, iou_threshold=nms_threshold, force_suppress=force_suppress, top_k=nms_topk, return_indices=False) return out diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index 3ccb44d0f47c..77cd2f1d7e65 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -120,13 +120,14 @@ def test_get_valid_counts(): verify_get_valid_counts((16, 500, 5), 0.95, -1, 1) -def verify_non_max_suppression(np_data, np_valid_count, np_result, np_indices_result, iou_threshold, - force_suppress, top_k, coord_start, score_index, id_index): +def verify_non_max_suppression(np_data, np_valid_count, np_indices, np_result, np_indices_result, + iou_threshold, force_suppress, top_k, coord_start, score_index, id_index): dshape = np_data.shape batch, num_anchors, _ = dshape indices_dshape = (batch, num_anchors) data = te.placeholder(dshape, name="data") valid_count = te.placeholder((batch,), dtype="int32", name="valid_count") + indices = te.placeholder((batch, num_anchors), dtype="int32", name="indices") def check_device(device): ctx = tvm.context(device, 0) @@ -136,10 +137,10 @@ def check_device(device): print("Running on target: %s" % device) with tvm.target.create(device): fcompute, fschedule = topi.testing.dispatch(device, _nms_implement) - out = fcompute(data, valid_count, -1, iou_threshold, force_suppress, top_k, + out = fcompute(data, valid_count, indices, -1, iou_threshold, force_suppress, top_k, coord_start=coord_start, score_index=score_index, id_index=id_index, return_indices=False) - indices_out = fcompute(data, valid_count, -1, iou_threshold, force_suppress, top_k, + indices_out = fcompute(data, valid_count, indices, -1, iou_threshold, force_suppress, top_k, coord_start=coord_start, score_index=score_index, id_index=id_index) s = fschedule(out) indices_s = fschedule(indices_out) @@ -153,7 +154,7 @@ def check_device(device): tvm.testing.assert_allclose(tvm_out.asnumpy(), np_result, rtol=1e-4) tvm_indices_out = tvm.nd.array(np.zeros(indices_dshape, dtype="int32"), ctx) - f = tvm.build(indices_s, [data, valid_count, indices_out], device) + f = tvm.build(indices_s, [data, valid_count, indices_out[0]], device) f(tvm_data, tvm_valid_count, tvm_indices_out) tvm.testing.assert_allclose(tvm_indices_out.asnumpy(), np_indices_result, rtol=1e-4) @@ -171,7 +172,7 @@ def test_non_max_suppression(): [-1, -1, -1, -1, -1, -1]]]) np_indices_result = np.array([[3, 0, -1, -1, -1]]) - verify_non_max_suppression(np_data, np_valid_count, np_result, np_indices_result, 0.7, True, 2, 2, 1, 0) + verify_non_max_suppression(np_data, np_valid_count, np_result, np_indices_result, 0.6, 0.7, True, 2, 2, 1, 0) np_data = np.array([[[0.8, 1, 20, 25, 45], [0.7, 30, 60, 50, 80], [0.4, 4, 21, 19, 40], [0.9, 35, 61, 52, 79], @@ -181,7 +182,7 @@ def test_non_max_suppression(): [-1, -1, -1, -1, -1], [-1, -1, -1, -1, -1], [-1, -1, -1, -1, -1]]]) np_indices_result = np.array([[3, 0, -1, -1, -1]]) - verify_non_max_suppression(np_data, np_valid_count, np_result, np_indices_result, 0.7, False, 2, 1, 0, -1) + verify_non_max_suppression(np_data, np_valid_count, np_result, np_indices_result, 0.6, 0.7, False, 2, 1, 0, -1) @@ -459,9 +460,9 @@ def test_proposal(): if __name__ == "__main__": test_get_valid_counts() - test_non_max_suppression() test_multibox_prior() test_multibox_detection() test_roi_align() test_roi_pool() test_proposal() + test_non_max_suppression() diff --git a/tutorials/frontend/from_tensorflow.py b/tutorials/frontend/from_tensorflow.py index 0ebd733ef9aa..1b97c442dbca 100644 --- a/tutorials/frontend/from_tensorflow.py +++ b/tutorials/frontend/from_tensorflow.py @@ -101,7 +101,7 @@ # Call the utility to import the graph definition into default graph. graph_def = tf_testing.ProcessGraphDefParam(graph_def) # Add shapes to the graph. - with tf_compat_v1.Session() as sess: + with tf.compat.v1.Session() as sess: graph_def = tf_testing.AddShapesToGraphDef(sess, 'softmax') ###################################################################### @@ -218,7 +218,7 @@ def run_inference_on_image(image): # Creates graph from saved GraphDef. create_graph() - with tf_compat_v1.Session() as sess: + with tf.compat.v1.Session() as sess: softmax_tensor = sess.graph.get_tensor_by_name('softmax:0') predictions = sess.run(softmax_tensor, {'DecodeJpeg/contents:0': image_data}) From 2b6b19a66ac5173b796ca69be78fbb025f015f8e Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Fri, 15 Nov 2019 12:24:29 -0800 Subject: [PATCH 02/22] Incorporate comments --- include/tvm/relay/attrs/vision.h | 1 - python/tvm/relay/frontend/common.py | 2 +- python/tvm/relay/frontend/keras.py | 4 +- python/tvm/relay/frontend/mxnet.py | 25 +- python/tvm/relay/frontend/onnx.py | 13 +- python/tvm/relay/frontend/tensorflow.py | 38 +- python/tvm/relay/op/_tensor_grad.py | 6 +- python/tvm/relay/op/transform.py | 2 +- python/tvm/relay/op/vision/nms.py | 15 +- src/relay/op/tensor/transform.cc | 116 ++++-- src/relay/op/vision/nms.cc | 24 +- .../transforms/combine_parallel_conv2d.cc | 17 +- .../frontend/tensorflow/test_forward.py | 2 + tests/python/relay/test_op_level4.py | 36 +- tests/python/relay/test_op_level5.py | 40 +- topi/python/topi/cuda/ssd/multibox.py | 2 +- topi/python/topi/vision/nms.py | 375 +++++++----------- topi/python/topi/vision/ssd/multibox.py | 2 +- topi/tests/python/test_topi_vision.py | 33 +- 19 files changed, 366 insertions(+), 387 deletions(-) diff --git a/include/tvm/relay/attrs/vision.h b/include/tvm/relay/attrs/vision.h index 0a0ca9fe93d6..3edd23f34494 100644 --- a/include/tvm/relay/attrs/vision.h +++ b/include/tvm/relay/attrs/vision.h @@ -89,7 +89,6 @@ struct GetValidCountsAttrs : public tvm::AttrsNode { /*! \brief Attributes used in non_maximum_suppression operator */ struct NonMaximumSuppressionAttrs : public tvm::AttrsNode { int max_output_size; - double score_threshold; double iou_threshold; bool force_suppress; int top_k; diff --git a/python/tvm/relay/frontend/common.py b/python/tvm/relay/frontend/common.py index 8dc1a70f5b68..e86890f3639a 100644 --- a/python/tvm/relay/frontend/common.py +++ b/python/tvm/relay/frontend/common.py @@ -571,4 +571,4 @@ def __init__(self, new_name): def __call__(self, inputs, attrs, *args): if 'tvm_custom' in attrs: attrs.pop('tvm_custom') - return get_relay_op(self._new_name)(*inputs, **attrs) \ No newline at end of file + return get_relay_op(self._new_name)(*inputs, **attrs) diff --git a/python/tvm/relay/frontend/keras.py b/python/tvm/relay/frontend/keras.py index 986995826724..ef76eb69311d 100644 --- a/python/tvm/relay/frontend/keras.py +++ b/python/tvm/relay/frontend/keras.py @@ -611,8 +611,8 @@ def _convert_cropping(inexpr, keras_layer, _): raise tvm.error.OpNotImplemented( 'Operator {} is not supported for frontend Keras.'.format(crop_type)) int32_max = np.iinfo(np.int32).max - return _op.strided_slice(inexpr, begin=[0, 0, crop_t, crop_l], \ - end=[int32_max, int32_max, in_h-crop_b, in_w-crop_r]) + return _op.strided_slice(inexpr, begin=_expr.const([0, 0, crop_t, crop_l]), \ + end=_expr.const([int32_max, int32_max, in_h-crop_b, in_w-crop_r])) def _convert_batchnorm(inexpr, keras_layer, etab): diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py index c75612dd4916..2853320212e4 100644 --- a/python/tvm/relay/frontend/mxnet.py +++ b/python/tvm/relay/frontend/mxnet.py @@ -411,16 +411,22 @@ def _mx_slice(inputs, attrs): begin = list(attrs.get_int_tuple('begin', None)) end = list(attrs.get_int_tuple('end', None)) stride = attrs.get_int_tuple('step', None) + input_shape = _infer_type(inputs[0]).checked_type.shape if begin is None: raise tvm.error.OpAttributeRequired( 'Attribute "begin" not found in operator Slice.') if end is None: raise tvm.error.OpAttributeRequired( 'Attribute "end" not found in operator Slice.') - begin = tuple(x if x is not None else 0 for x in begin) - new_attrs = {'begin': begin, 'end': end} + begin = (x if x is not None else 0 for x in begin) + for i, ed in enumerate(end): + if ed is None: + end[i] = input_shape[i] + new_attrs = {'begin': _expr.const(list(begin), dtype="int32"), + 'end': _expr.const(list(end), dtype="int32")} if stride is not None: - new_attrs['strides'] = stride + stride = (x if x is not None else 1 for x in stride) + new_attrs['strides'] = _expr.const(list(stride), dtype="int32") return _op.strided_slice(inputs[0], **new_attrs) @@ -460,7 +466,9 @@ def _mx_slice_axis(inputs, attrs): else: begin.append(ax_beg) end.append(ax_end) - return _op.strided_slice(inputs[0], begin, end) + return _op.strided_slice(inputs[0], + _expr.const(begin, dtype="int32"), + _expr.const(end, dtype="int32")) def _mx_crop_like(inputs, attrs): @@ -480,9 +488,9 @@ def _mx_crop_like(inputs, attrs): return _op.slice_like(*inputs, **new_attrs) expr = _infer_type(inputs[1]) like_shape = expr.checked_type.shape - new_attrs['begin'] = [0, 0, offset[0], offset[1]] - new_attrs['end'] = [like_shape[0], like_shape[1], offset[0]+like_shape[2], - offset[1]+like_shape[3]] + new_attrs['begin'] = _expr.const([0, 0, offset[0], offset[1]], dtype="int32") + new_attrs['end'] = _expr.const([like_shape[0], like_shape[1], offset[0]+like_shape[2], + offset[1]+like_shape[3]], dtype="int32") return _op.strided_slice(inputs[0], **new_attrs) @@ -656,7 +664,7 @@ def _mx_multibox_detection(inputs, attrs): ret = _op.vision.multibox_transform_loc(inputs[0], inputs[1], inputs[2], **new_attrs0) - return _op.vision.non_max_suppression(ret[0], ret[1], **new_attrs1) + return _op.vision.non_max_suppression(ret[0], ret[1], ret[1], **new_attrs1) def _mx_batch_dot(inputs, attrs): @@ -820,6 +828,7 @@ def _mx_box_nms(inputs, attrs): id_index=id_index, score_index=score_index) nms_out = _op.vision.non_max_suppression(ret[1], ret[0], + ret[2], iou_threshold=iou_thresh, force_suppress=force_suppress, top_k=top_k, diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index 58ec4ee56a93..6643a888f10b 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -945,11 +945,12 @@ def _impl_v1(cls, inputs, attr, params): attr['ends'] = new_ends except KeyError: pass + begin = list(attr['starts']) + end = list(attr['ends']) - return AttrCvt('strided_slice', - transforms={'starts': 'begin', - 'ends': 'end'}, - ignores=['axes'])(inputs, attr) + return _op.strided_slice(inputs[0], + begin=_expr.const(begin, dtype="int32"), + end=_expr.const(end, dtype="int32")) @classmethod def _impl_v10(cls, inputs, attr, params): @@ -965,7 +966,9 @@ def _impl_v10(cls, inputs, attr, params): starts, ends, axes) starts = new_starts ends = new_ends - return _op.strided_slice(inputs[0], begin=starts, end=ends) + return _op.strided_slice(inputs[0], + begin=_expr.const(starts, dtype="int32"), + end=_expr.const(ends, dtype="int32")) class Gather(OnnxOpConverter): diff --git a/python/tvm/relay/frontend/tensorflow.py b/python/tvm/relay/frontend/tensorflow.py index 5926959349c8..59740d204744 100644 --- a/python/tvm/relay/frontend/tensorflow.py +++ b/python/tvm/relay/frontend/tensorflow.py @@ -97,7 +97,7 @@ def _need_prelude_for_shape_inference(op): return "TensorArray" in op def _need_module_for_shape_inference(op): - return op in ['StridedSlice, NonMaxSuppressionV3'] + return op in ['StridedSlice', 'NonMaxSuppressionV3'] def _rsqrt(): def _impl(inputs, attr, params, mod): @@ -624,41 +624,38 @@ def _impl(inputs, attr, params): # score_threshold was introduced from V3 score_threshold = np.atleast_1d(inputs[4].data.asnumpy())[0] if len(inputs) > 4 else None + # Generate data with shape (1, num_anchors, 5) scores = AttrCvt(op_name="expand_dims", extras={'axis': -1, 'num_newaxis': 1})([inputs[1]], attr) - data = get_relay_op('concatenate')([scores, inputs[0]], -1) - # expand to [class_id, prob, box] - # data = _get_relay_op('concatenate')([scores, data], -1) - # expand to [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5] - data = get_relay_op('expand_dims')(data, 0, 1) - # Don't need to call get_valid_counts for TensorFlow and ONNX - # ct, data = _get_relay_op('get_valid_counts')(data, score_threshold=score_threshold, - # id_index=-1, score_index=0) - # get the number of anchors - data_shape = attr['_input_shapes'][inputs[1]] - valid_cnt = _expr.const(data_shape) + # reason why using get_valid_counts is for inference performance + ct, data, indices = get_relay_op('get_valid_counts')(data, + score_threshold=score_threshold, + id_index=-1, + score_index=0) # TensorFlow NMS doesn't have parameter top_k top_k = -1 - # score_index is 0 since TF doesn't have class id for nms input + # TF doesn't have class id for nms input score_index = 0 nms_ret = get_relay_op('non_max_suppression')(data=data, - valid_count=valid_cnt, + valid_count=ct, + indices=indices, max_output_size=max_output_size, - score_threshold=score_threshold, iou_threshold=iou_threshold, - force_suppress=False, + force_suppress=True, top_k=top_k, coord_start=1, score_index=score_index, id_index=-1, return_indices=True, invalid_to_bottom=False) - + # squeeze it, TF NMS is not batched end = get_relay_op("squeeze")(nms_ret[1], axis=[1]) data_slice = get_relay_op("squeeze")(nms_ret[0], axis=[0]) + + # slice to get the dynamic result ret = get_relay_op("strided_slice")(data_slice, _expr.const([0]), end, _expr.const([1])) return ret return _impl @@ -1515,8 +1512,11 @@ def _transform_mask(stride_dim, ellipsis_mask): fshape_indices = None if begin_mask or end_mask or ellipsis_mask or new_axis_mask or shrink_axis_mask: begin, end, stride, fshape_indices = _transform_mask(stride_dim, ellipsis_mask) - out = _op.strided_slice(inputs[0], begin=begin, end=end, strides=stride) - out_shape = _infer_shape(out, mod) + out = _op.strided_slice(inputs[0], + begin=_expr.const(begin), + end=_expr.const(end), + strides=_expr.const(stride)) + out_shape = _infer_shape(out, mod=mod) if not fshape_indices: fshape_indices = range(len(out_shape)) diff --git a/python/tvm/relay/op/_tensor_grad.py b/python/tvm/relay/op/_tensor_grad.py index 8ba10207020e..c8c5b76e5427 100644 --- a/python/tvm/relay/op/_tensor_grad.py +++ b/python/tvm/relay/op/_tensor_grad.py @@ -390,8 +390,10 @@ def conv2d_grad(orig, grad): assert padded_weight_grad_h >= filter_h assert padded_weight_grad_w >= filter_w if padded_weight_grad_h > filter_h or padded_weight_grad_w > filter_w: - backward_weight = strided_slice(backward_weight, begin=[0, 0, 0, 0], - end=[None, None, filter_h, filter_w]) + backward_weight = strided_slice(backward_weight, + begin=const([0, 0, 0, 0], dtype="int64"), + end=const([out_channel, in_channel // attrs.groups, + filter_h, filter_w], dtype="int64")) return [backward_data, backward_weight] diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index c8c540fa861b..44b8c1c03f9d 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -634,7 +634,7 @@ def strided_slice(data, begin, end, strides=None): ret : relay.Expr The computed result. """ - strides = strides or const(1) + strides = strides or const([1], dtype="int32") return _make.strided_slice(data, begin, end, strides) diff --git a/python/tvm/relay/op/vision/nms.py b/python/tvm/relay/op/vision/nms.py index a4e98a732715..f2657f47c6b2 100644 --- a/python/tvm/relay/op/vision/nms.py +++ b/python/tvm/relay/op/vision/nms.py @@ -47,16 +47,19 @@ def get_valid_counts(data, out_tensor : relay.Expr Rearranged data tensor. + + out_indices: relay.Expr + Indices in input data """ return expr.TupleWrapper( _make.get_valid_counts(data, score_threshold, - id_index, score_index), 2) + id_index, score_index), 3) def non_max_suppression(data, valid_count, + indices, max_output_size=-1, - score_threshold=0.0, iou_threshold=0.5, force_suppress=False, top_k=-1, @@ -79,13 +82,13 @@ def non_max_suppression(data, valid_count : relay.Expr 1-D tensor for valid number of boxes. + indices: relay.Expr + 2-D tensor with shape [batch_size, num_anchors] + max_output_size : int, optional Max number of output valid boxes for each instance. By default all valid boxes are returned. - score_threshold : float, optional - Lower limit of score for valid bounding boxes. - iou_threshold : float, optional Non-maximum suppression threshold. @@ -120,8 +123,8 @@ def non_max_suppression(data, """ out = _make.non_max_suppression(data, valid_count, + indices, max_output_size, - score_threshold, iou_threshold, force_suppress, top_k, diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index 5d471f99a47f..4e1b05e04620 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1676,6 +1676,66 @@ Array GetIntArray(Array arr) { // strided_slice TVM_REGISTER_NODE_TYPE(StridedSliceAttrs); +int64_t* ToVector(const runtime::NDArray& array) { + size_t len = array.Shape().front(); + int64_t* rel_vec = new int64_t[len]; + if (array->dtype.code == kDLInt) { + if (array->dtype.bits == 8) { + int8_t* init_array = reinterpret_cast(array->data); + for (size_t i = 0; i < len; ++i) { + rel_vec[i] = int64_t(init_array[i]); + } + return rel_vec; + } else if (array->dtype.bits == 16) { + int16_t* init_array = reinterpret_cast(array->data); + for (size_t i = 0; i < len; ++i) { + rel_vec[i] = int64_t(init_array[i]); + } + return rel_vec; + } else if (array->dtype.bits == 32) { + int32_t* init_array = reinterpret_cast(array->data); + for (size_t i = 0; i < len; ++i) { + rel_vec[i] = int64_t(init_array[i]); + } + return rel_vec; + } else if (array->dtype.bits == 64) { + int64_t* init_array = reinterpret_cast(array->data); + for (size_t i = 0; i < len; ++i) { + rel_vec[i] = int64_t(init_array[i]); + } + return rel_vec; + } + } else if (array->dtype.code == kDLUInt) { + if (array->dtype.bits == 8) { + uint8_t* init_array = reinterpret_cast(array->data); + for (size_t i = 0; i < len; ++i) { + rel_vec[i] = int64_t(init_array[i]); + } + return rel_vec; + } else if (array->dtype.bits == 16) { + uint16_t* init_array = reinterpret_cast(array->data); + for (size_t i = 0; i < len; ++i) { + rel_vec[i] = int64_t(init_array[i]); + } + return rel_vec; + } else if (array->dtype.bits == 32) { + uint32_t* init_array = reinterpret_cast(array->data); + for (size_t i = 0; i < len; ++i) { + rel_vec[i] = int64_t(init_array[i]); + } + return rel_vec; + } else if (array->dtype.bits == 64) { + uint64_t* init_array = reinterpret_cast(array->data); + for (size_t i = 0; i < len; ++i) { + rel_vec[i] = int64_t(init_array[i]); + } + return rel_vec; + } + } + LOG(FATAL) << "Unknown data type: " << tvm::runtime::TVMType2String(array->dtype); + return rel_vec; +} + bool StridedSliceRel(const Array& types, int num_inputs, const Attrs& attrs, @@ -1686,7 +1746,7 @@ bool StridedSliceRel(const Array& types, const auto* data = types[0].as(); CHECK(data != nullptr); auto dshape = data->shape; - auto num_axis = dshape.size(); + int64_t num_axis = dshape.size(); // calculate output shape std::vector oshape(num_axis); @@ -1695,32 +1755,32 @@ bool StridedSliceRel(const Array& types, (cend = param->end.as()) && (cstrides = param->strides.as())) { std::vector stride_vec; - int32_t* strides_val = reinterpret_cast(cstrides->data->data); - for (size_t i = 0; i < cstrides->data.Shape().front(); ++i){ + int64_t* strides_val = ToVector(cstrides->data); + for (int64_t i = 0; i < cstrides->data.Shape().front(); ++i) { stride_vec.push_back(strides_val[i]); } - for (size_t i = stride_vec.size(); i < num_axis; ++i) { + for (int64_t i = stride_vec.size(); i < num_axis; ++i) { stride_vec.push_back(1); } const int64_t max_range = std::numeric_limits::max(); std::vector begin_vec; - int32_t* begin_val = reinterpret_cast(cbegin->data->data); - for (size_t i = 0; i < cbegin->data.Shape().front(); ++i){ + int64_t* begin_val = ToVector(cbegin->data); + for (int64_t i = 0; i < cbegin->data.Shape().front(); ++i) { begin_vec.push_back(begin_val[i]); } - for (size_t i = begin_vec.size(); i < num_axis; ++i) { + for (int64_t i = begin_vec.size(); i < num_axis; ++i) { begin_vec.push_back(stride_vec[i] > 0 ? 0 : max_range); } std::vector end_vec; - int32_t* end_val = reinterpret_cast(cend->data->data); - for (size_t i = 0; i < cend->data.Shape().front(); ++i){ + int64_t* end_val = ToVector(cend->data); + for (int64_t i = 0; i < cend->data.Shape().front(); ++i) { end_vec.push_back(end_val[i]); } - for (size_t i = end_vec.size(); i < num_axis; ++i) { + for (int64_t i = end_vec.size(); i < num_axis; ++i) { end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); } - for (size_t i = 0; i < num_axis; ++i) { + for (int64_t i = 0; i < num_axis; ++i) { int64_t stride_v = stride_vec[i]; int64_t begin_v = begin_vec[i]; int64_t end_v = end_vec[i]; @@ -1784,9 +1844,9 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, } CHECK(old_in_layouts.defined()); - CHECK_EQ(old_in_layouts.size(), 1); + CHECK_GE(old_in_layouts.size(), 1); CHECK(old_in_shapes.defined()); - CHECK_EQ(old_in_shapes.size(), 1); + CHECK_GE(old_in_shapes.size(), 1); auto layout = old_in_layouts[0]; if (layout.defined() && new_in_layouts.defined()) { @@ -1802,17 +1862,16 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, if ((cbegin = params->begin.as()) && (cend = params->end.as()) && (cstrides = params->strides.as())) { - - int32_t* strides_val = reinterpret_cast(cstrides->data->data); - for (size_t i = 0; i < cstrides->data.Shape().front(); ++i){ + int64_t* strides_val = ToVector(cstrides->data); + for (int64_t i = 0; i < cstrides->data.Shape().front(); ++i) { strides.push_back(strides_val[i]); } - int32_t* begin_val = reinterpret_cast(cbegin->data->data); - for (size_t i = 0; i < cbegin->data.Shape().front(); ++i){ + int64_t* begin_val = ToVector(cbegin->data); + for (int64_t i = 0; i < cbegin->data.Shape().front(); ++i) { begin.push_back(begin_val[i]); } - int32_t* end_val = reinterpret_cast(cend->data->data); - for (size_t i = 0; i < cend->data.Shape().front(); ++i){ + int64_t* end_val = ToVector(cend->data); + for (int64_t i = 0; i < cend->data.Shape().front(); ++i) { end.push_back(end_val[i]); } } @@ -1859,11 +1918,12 @@ inline Tensor DynamicStridedSlice(const tvm::Tensor& input, const tvm::Tensor& strides, std::string name = "T_strided_slice_dynamic", std::string tag = topi::kInjective) { - size_t src_tensor_dim = static_cast(input->shape.size()); + int64_t src_tensor_dim = input->shape.size(); Array out_shape; - for(size_t i = 0; i < src_tensor_dim; ++i){ + for (int64_t i = 0; i < src_tensor_dim; ++i) { out_shape.push_back(tvm::Var("dim")); } + // TODO(yongwww): move the compute into topi after nnvm is removed return tvm::compute(out_shape, [&](const Array& indices) { Array real_indices; for (int32_t i = 0; i < src_tensor_dim; ++i) { @@ -1882,16 +1942,16 @@ Array StridedSliceCompute(const Attrs& attrs, const Arrayend.as()) && (cstrides = param->strides.as())) { Array begin, end, strides; - int32_t* strides_val = reinterpret_cast(cstrides->data->data); - for (size_t i = 0; i < cstrides->data.Shape().front(); ++i){ + int64_t* strides_val = ToVector(cstrides->data); + for (int64_t i = 0; i < cstrides->data.Shape().front(); ++i) { strides.push_back(strides_val[i]); } - int32_t* begin_val = reinterpret_cast(cbegin->data->data); - for (size_t i = 0; i < cbegin->data.Shape().front(); ++i){ + int64_t* begin_val = ToVector(cbegin->data); + for (int64_t i = 0; i < cbegin->data.Shape().front(); ++i) { begin.push_back(begin_val[i]); } - int32_t* end_val = reinterpret_cast(cend->data->data); - for (size_t i = 0; i < cend->data.Shape().front(); ++i){ + int64_t* end_val = ToVector(cend->data); + for (int64_t i = 0; i < cend->data.Shape().front(); ++i) { end.push_back(end_val[i]); } return Array{ diff --git a/src/relay/op/vision/nms.cc b/src/relay/op/vision/nms.cc index bdd4f664165b..4002820d0c15 100644 --- a/src/relay/op/vision/nms.cc +++ b/src/relay/op/vision/nms.cc @@ -37,9 +37,11 @@ bool GetValidCountRel(const Array& types, int num_inputs, const Attrs& att CHECK_EQ(dshape.size(), 3) << "Input data should be 3-D."; std::vector oshape({data->shape[0]}); + std::vector oshape_indices({data->shape[0], data->shape[1]}); std::vector fields; fields.push_back(TensorType(oshape, DataType::Int(32))); fields.push_back(TensorType(data->shape, data->dtype)); + fields.push_back(TensorType(oshape_indices, DataType::Int(32))); // assign output type reporter->Assign(types[1], TupleType(Array(fields))); @@ -71,7 +73,7 @@ TVM_REGISTER_NODE_TYPE(NonMaximumSuppressionAttrs); bool NMSRel(const Array& types, int num_inputs, const Attrs& attrs, const TypeReporter& reporter) { - CHECK_EQ(types.size(), 3); + CHECK_EQ(types.size(), 4); const auto* data = types[0].as(); const auto* valid_count = types[1].as(); const NonMaximumSuppressionAttrs* param = attrs.as(); @@ -88,9 +90,9 @@ bool NMSRel(const Array& types, int num_inputs, const Attrs& attrs, fields.push_back(TensorType(oshape, DataType::Int(32))); std::vector countshape({dshape[0], 1}); fields.push_back(TensorType(countshape, DataType::Int(32))); - reporter->Assign(types[2], TupleType(Array(fields))); + reporter->Assign(types[3], TupleType(Array(fields))); } else { - reporter->Assign(types[2], TensorType(dshape, data->dtype)); + reporter->Assign(types[3], TensorType(dshape, data->dtype)); } return true; } @@ -98,8 +100,8 @@ bool NMSRel(const Array& types, int num_inputs, const Attrs& attrs, Expr MakeNMS(Expr data, Expr valid_count, + Expr indices, int max_output_size, - double score_threshold, double iou_threshold, bool force_suppress, int top_k, @@ -110,7 +112,6 @@ Expr MakeNMS(Expr data, bool invalid_to_bottom) { auto attrs = make_object(); attrs->max_output_size = max_output_size; - attrs->score_threshold = score_threshold; attrs->iou_threshold = iou_threshold; attrs->force_suppress = force_suppress; attrs->top_k = top_k; @@ -120,7 +121,7 @@ Expr MakeNMS(Expr data, attrs->return_indices = return_indices; attrs->invalid_to_bottom = invalid_to_bottom; static const Op& op = Op::Get("vision.non_max_suppression"); - return Call(op, {data, valid_count}, Attrs(attrs), {}); + return Call(op, {data, valid_count, indices}, Attrs(attrs), {}); } TVM_REGISTER_GLOBAL("relay.op.vision._make.non_max_suppression") @@ -132,11 +133,12 @@ be in the format of [class_id, score, left, top, right, bottom] or [score, left, top, right, bottom]. Set id_index to be -1 to ignore class_id axis. )doc" TVM_ADD_FILELINE) - .set_num_inputs(2) - .add_argument("data", "Tensor", "Input data.") - .add_argument("valid_count", "Tensor", "Number of valid anchor boxes.") - .set_support_level(5) - .add_type_rel("NMS", NMSRel); +.set_num_inputs(3) +.add_argument("data", "Tensor", "Input data.") +.add_argument("valid_count", "Tensor", "Number of valid anchor boxes.") +.add_argument("indices", "Tensor", "Corresponding indices in original input tensor.") +.set_support_level(5) +.add_type_rel("NMS", NMSRel); } // namespace relay } // namespace tvm diff --git a/src/relay/transforms/combine_parallel_conv2d.cc b/src/relay/transforms/combine_parallel_conv2d.cc index e4b2e1ea980b..58ac315d3234 100644 --- a/src/relay/transforms/combine_parallel_conv2d.cc +++ b/src/relay/transforms/combine_parallel_conv2d.cc @@ -71,12 +71,15 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { const auto shape_b = tir::BijectiveLayout(Layout(attrs_b->kernel_layout), kOIHW).ForwardShape(tweight_b->shape); - return eq(attrs_a->strides, attrs_b->strides) && eq(attrs_a->padding, attrs_b->padding) && - eq(attrs_a->dilation, attrs_b->dilation) && eq(attrs_a->groups, attrs_b->groups) && + return eq(attrs_a->strides, attrs_b->strides) && + eq(attrs_a->padding, attrs_b->padding) && + eq(attrs_a->dilation, attrs_b->dilation) && + eq(attrs_a->groups, attrs_b->groups) && eq(attrs_a->data_layout, attrs_b->data_layout) && eq(attrs_a->kernel_layout, attrs_b->kernel_layout) && eq(attrs_a->out_dtype, attrs_b->out_dtype) && - eq(attrs_a->out_layout, attrs_b->out_layout) && eq(shape_a[2], shape_b[2]) && + eq(attrs_a->out_layout, attrs_b->out_layout) && + eq(shape_a[2], shape_b[2]) && eq(shape_a[3], shape_b[3]); } @@ -187,13 +190,15 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { int64_t* begin_data = static_cast(begin_ndarray->data); int64_t* end_data = static_cast(end_ndarray->data); - for (size_t i = 0; i < begin.size(); ++i){ + for (size_t i = 0; i < begin.size(); ++i) { begin_data[i] = begin[i]; end_data[i] = end[i]; } - auto slice = MakeStridedSlice(data, ConstantNode::make(begin_ndarray), - ConstantNode::make(end_ndarray), ConstantNode::make(strides_ndarray)); + auto slice = MakeStridedSlice(data, + ConstantNode::make(begin_ndarray), + ConstantNode::make(end_ndarray), + ConstantNode::make(strides_ndarray)); subst_map->insert({GetRef(branch[depth]), slice}); } } diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py index 76d2fe13aa49..82c00e2db1d9 100644 --- a/tests/python/frontend/tensorflow/test_forward.py +++ b/tests/python/frontend/tensorflow/test_forward.py @@ -190,6 +190,7 @@ def name_without_num(name): target=device, out_names=out_name, num_output=len(out_name), opt_level=opt_level, mode=mode, cuda_layout=cuda_layout) + # since the names from tensorflow and relay runs are not exactly same, # first len(tf_output) will be compared for i in range(len(tf_output)): @@ -3320,6 +3321,7 @@ def test_forward_isfinite(): test_forward_space_to_batch_nd() test_forward_batch_to_space_nd() test_forward_dilation() + test_forward_nms_v3() # End to End test_forward_inception_v3() diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index 358c8f18ea34..ea5ebcd6c265 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -296,35 +296,23 @@ def test_mean_var_std(): def test_strided_slice(): - def verify(dshape, begin, end, strides, output, test_ref=True): - dtype = "int32" + def verify(dshape, begin, end, strides, output, test_ref=True, dtype="int32"): x = relay.var("x", relay.TensorType(dshape, "float32")) ndim = len(dshape) begin = begin if begin else [0] * ndim end = end if end else list(dshape) - strides = strides if strides else [1] * ndim - for i in range(ndim): - if len(begin) <= i: - begin.append(0) - if len(end) <= i: - end.append(dshape[i]) - if len(strides) <= i: - strides.append(1) - if not begin[i]: - begin[i] = 0 - if not end[i]: - end[i] = dshape[i] - if not strides[i]: - strides[i] = 1 - begin_expr = relay.const(begin, dtype=dtype) end_expr = relay.const(end, dtype=dtype) - strides_expr = relay.const(strides, dtype=dtype) - - z = relay.strided_slice(x, - begin=begin_expr, - end=end_expr, - strides=strides_expr) + if strides: + strides_expr = relay.const(strides, dtype=dtype) + z = relay.strided_slice(x, + begin=begin_expr, + end=end_expr, + strides=strides_expr) + else: + z = relay.strided_slice(x, + begin=begin_expr, + end=end_expr) func = relay.Function([x], z) func = run_infer_type(func) @@ -348,8 +336,6 @@ def verify(dshape, begin, end, strides, output, test_ref=True): verify((1, 224, 224, 3), [0, 20, 20, 0], [1, 140, 140, 3], [1, 1, 1, 1], (1, 120, 120, 3), dtype="int64") verify((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], (1, 3, 3), dtype="int16") verify((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2], (3, 1, 2)) - verify((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], (1, 3, 3)) - verify((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2], (3, 1, 2)) verify((3, 4, 3), [1, 0, 0], [2, 2, 3], [1, 1, 2], (1, 2, 2)) verify((3, 4, 3), [1, 1, 0], [4, 4, 3], None, (2, 3, 3)) verify((3, 4, 3), [1, 1, 0], [4, 1000, 3], None, (2, 3, 3)) diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index b73428b0a8f5..df3091f3021d 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -244,6 +244,7 @@ def verify_get_valid_counts(dshape, score_threshold, id_index, score_index): np_data = np.random.uniform(low=-2, high=2, size=dshape).astype(dtype) np_out1 = np.zeros(shape=(batch_size,)) np_out2 = np.zeros(shape=dshape).astype(dtype) + np_out3 = np.zeros(shape=(batch_size, num_anchor)) for i in range(batch_size): np_out1[i] = 0 inter_idx = 0 @@ -253,10 +254,12 @@ def verify_get_valid_counts(dshape, score_threshold, id_index, score_index): for k in range(elem_length): np_out2[i, inter_idx, k] = np_data[i, j, k] np_out1[i] += 1 + np_out3[i, inter_idx] = j inter_idx += 1 if j >= np_out1[i]: for k in range(elem_length): np_out2[i, j, k] = -1.0 + np_out3[i, j] = -1 x = relay.var("x", relay.ty.TensorType(dshape, dtype)) z = relay.vision.get_valid_counts(x, score_threshold, id_index, score_index) @@ -271,6 +274,7 @@ def verify_get_valid_counts(dshape, score_threshold, id_index, score_index): if target == 'cuda': return tvm.testing.assert_allclose(out[1].asnumpy(), np_out2, rtol=1e-3, atol=1e-04) + tvm.testing.assert_allclose(out[2].asnumpy(), np_out3, rtol=1e-3, atol=1e-04) verify_get_valid_counts((1, 2500, 6), 0, 0, 1) verify_get_valid_counts((1, 2500, 5), -1, -1, 0) @@ -279,15 +283,16 @@ def verify_get_valid_counts(dshape, score_threshold, id_index, score_index): def test_non_max_suppression(): - def verify_nms(x0_data, x1_data, dshape, ref_res, ref_indices_res, + def verify_nms(x0_data, x1_data, x2_data, dshape, ref_res, ref_indices_res, iou_threshold=0.5, force_suppress=False, top_k=-1, check_type_only=False): x0 = relay.var("x0", relay.ty.TensorType(dshape, "float32")) x1 = relay.var("x1", relay.ty.TensorType((dshape[0],), "int32")) - z = relay.vision.non_max_suppression(x0, x1, max_output_size=-1, \ + x2 = relay.var("x2", relay.ty.TensorType((dshape[0],dshape[1]), "int32")) + z = relay.vision.non_max_suppression(x0, x1, x2, max_output_size=-1, \ iou_threshold=iou_threshold, force_suppress=force_suppress, \ top_k=top_k, return_indices=False) - z_indices = relay.vision.non_max_suppression(x0, x1, max_output_size=-1, score_threshold=0.5, \ + z_indices = relay.vision.non_max_suppression(x0, x1, x2, max_output_size=-1, \ iou_threshold=iou_threshold, force_suppress=force_suppress, \ top_k=top_k) if isinstance(z_indices, relay.expr.TupleWrapper): @@ -304,50 +309,53 @@ def verify_nms(x0_data, x1_data, dshape, ref_res, ref_indices_res, if check_type_only: return - func = relay.Function([x0, x1], z) + func = relay.Function([x0, x1, x2], z) func = run_infer_type(func) - func_indices = relay.Function([x0, x1], z_indices) + func_indices = relay.Function([x0, x1, x2], z_indices) func_indices = run_infer_type(func_indices) for target, ctx in ctx_list(): intrp1 = relay.create_executor("graph", ctx=ctx, target=target) - op_res1 = intrp1.evaluate(func)(x0_data, x1_data) + op_res1 = intrp1.evaluate(func)(x0_data, x1_data, x2_data) tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5) if top_k == -1: - op_indices_res1 = intrp1.evaluate(func_indices)(x0_data, x1_data) + op_indices_res1 = intrp1.evaluate(func_indices)(x0_data, x1_data, x2_data) tvm.testing.assert_allclose(op_indices_res1[0].asnumpy(), ref_indices_res, rtol=1e-5) intrp2 = relay.create_executor("debug", ctx=ctx, target=target) - op_res2 = intrp2.evaluate(func)(x0_data, x1_data) + op_res2 = intrp2.evaluate(func)(x0_data, x1_data, x2_data) tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5) if top_k == -1: - op_indices_res2 = intrp2.evaluate(func_indices)(x0_data, x1_data) + op_indices_res2 = intrp2.evaluate(func_indices)(x0_data, x1_data, x2_data) tvm.testing.assert_allclose(op_indices_res2[0].asnumpy(), ref_indices_res, rtol=1e-5) np_data = np.array([[[0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80], [0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79], [1, 0.5, 100, 60, 70, 110]]]).astype("float32") np_valid_count = np.array([4]).astype("int32") + + np_indices = np.array([[0, 1, 3, 4, -1]]).astype("int32") + np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) np_indices_result = np.array([[3, 0, -1, -1, -1]]) num_anchors = 5 - dshape = (te.size_var("n"), num_anchors, 6) - verify_nms(np_data, np_valid_count, dshape, np_result, np_indices_result, + dshape = (tvm.var("n"), num_anchors, 6) + verify_nms(np_data, np_valid_count, np_indices, dshape, np_result, np_indices_result, force_suppress=True, top_k=2, check_type_only=True) dshape = (1, num_anchors, 6) - verify_nms(np_data, np_valid_count, dshape, np_result, np_indices_result, + verify_nms(np_data, np_valid_count, np_indices, dshape, np_result, np_indices_result, force_suppress=True, top_k=2, check_type_only=False) np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) np_indices_result = np.array([[3, 0, 1, -1, -1]]) - dshape = (te.size_var("n"), num_anchors, 6) - verify_nms(np_data, np_valid_count, dshape, np_result, + dshape = (tvm.var("n"), num_anchors, 6) + verify_nms(np_data, np_valid_count, np_indices, dshape, np_result, np_indices_result, check_type_only=True) dshape = (1, num_anchors, 6) - verify_nms(np_data, np_valid_count, dshape, np_result, + verify_nms(np_data, np_valid_count, np_indices, dshape, np_result, np_indices_result, top_k=3) @@ -390,7 +398,7 @@ def test_default_value(): assert ret.checked_type == ref_type - nms = relay.vision.non_max_suppression(mtl[0], mtl[1], return_indices=False) + nms = relay.vision.non_max_suppression(mtl[0], mtl[1], mtl[0], return_indices=False) func = relay.Function([cls_prob, loc_pred, anchors], nms) func = run_infer_type(func) for target, ctx in ctx_list(): diff --git a/topi/python/topi/cuda/ssd/multibox.py b/topi/python/topi/cuda/ssd/multibox.py index 30784f45a591..22d74438188c 100644 --- a/topi/python/topi/cuda/ssd/multibox.py +++ b/topi/python/topi/cuda/ssd/multibox.py @@ -459,7 +459,7 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm """ inter_out = multibox_transform_loc(cls_prob, loc_pred, anchor, clip, threshold, variances) - out = non_max_suppression(inter_out[0], inter_out[1], max_output_size=-1, + out = non_max_suppression(inter_out[0], inter_out[1], inter_out[1], max_output_size=-1, iou_threshold=nms_threshold, force_suppress=force_suppress, top_k=nms_topk, return_indices=False) return out diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 48e85cdd574a..73e702447de5 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -23,54 +23,6 @@ from ..sort import argsort -@hybrid.script -def hybrid_rearrange_idx(data): - """Hybrid routine to rearrange nms output to - move all valid entries to top. - - Parameters - ---------- - data : tvm.te.Tensor or numpy NDArray - NMS output. 2-D tensor with shape - [batch_size, num_anchors]. - - one: tvm.tir.const - Constant one with the same dtype as data. - - Returns - ------- - output : tvm.te.Tensor or numpy NDArray - Transformed NMS output. 2-D tensor with shape - [batch_size, num_anchors]. - - shape : tvm.te.Tensor or numpy NDArray - Shape of Tensor with valid indexes - [Batch_size, num_valid_indices] - """ - batch_size = data.shape[0] - num_anchors = data.shape[1] - out_tensor = output_tensor((batch_size, - num_anchors), - data.dtype) - out_shape = output_tensor((batch_size, - 1), - data.dtype) - - for i in range(batch_size): # range instead - valid_idx = 0 - for j in range(num_anchors): - if data[i, j] >= 0: - out_tensor[i, valid_idx] = data[i, j] - valid_idx += 1 - if data[i, j] > num_anchors or data[i, j] < -num_anchors: - out_tensor[i, valid_idx] = 0 - valid_idx += 1 - if j >= valid_idx: - out_tensor[i, j] = -1 - out_shape[i, 0] = valid_idx - return out_tensor, out_shape - - @hybrid.script def hybrid_rearrange_out(data, one): """Hybrid routine to rearrange nms output to @@ -80,7 +32,9 @@ def hybrid_rearrange_out(data, one): ---------- data : tvm.te.Tensor or numpy NDArray NMS output. 3-D tensor with shape - [batch_size, num_anchors, 6]. + [batch_size, num_anchors, 6] or + [batch_size, num_anchors, 5], or 2-D + tensor with shape [batch_size, num_anchors]. one: tvm.tir.const Constant one with the same dtype as data. @@ -89,28 +43,48 @@ def hybrid_rearrange_out(data, one): ------- output : tvm.te.Tensor or numpy NDArray Transformed NMS output. 3-D tensor with shape - [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5]. + [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5], + or 2-D tensor with shape [batch_size, num_anchors]. + + valid_box_count : tvm.Tensor or numpy NDArray + Tensor with shape [batch_size, 1], indicates + the valid number of boxes. """ + ndim = len(data.shape) batch_size = data.shape[0] num_anchors = data.shape[1] - elem_length = data.shape[2] - output = output_tensor((batch_size, - num_anchors, - elem_length), - data.dtype) + valid_box_count = output_tensor((batch_size, 1), "int32") + output = output_tensor((batch_size, num_anchors), data.dtype) + if ndim > 2: + output = output_tensor((batch_size, + num_anchors, + data.shape[2]), + data.dtype) for i in parallel(batch_size): valid_idx = 0 for j in range(num_anchors): - if data[i, j, 0] >= 0: - for k in range(elem_length): - output[i, valid_idx, k] = data[i, j, k] - valid_idx += 1 - if j >= valid_idx: - for k in range(elem_length): - output[i, j, k] = -one - - return output + if ndim > 2: + elem_length = data.shape[2] + if data[i, j, 0] >= 0: + for k in range(elem_length): + output[i, valid_idx, k] = data[i, j, k] + valid_idx += 1 + if j >= valid_idx: + for k in range(elem_length): + output[i, j, k] = -one + else: + if data[i, j] >= 0: + output[i, valid_idx] = data[i, j] + valid_idx += 1 + if data[i, j] > num_anchors or data[i, j] < -num_anchors: + output[i, valid_idx] = 0 + valid_idx += 1 + if j >= valid_idx: + output[i, j] = -one + valid_box_count[i, 0] = valid_idx + + return output, valid_box_count @hybrid.script @@ -139,11 +113,14 @@ def hybrid_get_valid_counts(data, score_threshold, id_index, score_index, one): Returns ------- + valid_count : tvm.te.Tensor or numpy NDArray + 1-D tensor for valid number of boxes. + out_tensor : tvm.te.Tensor or numpy NDArray Rearranged data tensor. - valid_count : tvm.te.Tensor or numpy NDArray - 1-D tensor for valid number of boxes. + out_indices: tvm.te.Tensor or numpy NDArray + Related index in input data. """ batch_size = data.shape[0] num_anchors = data.shape[1] @@ -153,6 +130,7 @@ def hybrid_get_valid_counts(data, score_threshold, id_index, score_index, one): num_anchors, box_data_length), data.dtype) + out_indices = output_tensor((batch_size, num_anchors), "int32") for i in parallel(batch_size): valid_count[i] = 0 for j in range(num_anchors): @@ -161,11 +139,13 @@ def hybrid_get_valid_counts(data, score_threshold, id_index, score_index, one): (id_index < 0 or data[i, j, id_index] >= 0): for k in range(box_data_length): out_tensor[i, valid_count[i], k] = data[i, j, k] + out_indices[i, valid_count[i]] = j valid_count[i] += 1 if j >= valid_count[i]: for k in range(box_data_length): out_tensor[i, j, k] = -one - return valid_count, out_tensor + out_indices[i, j] = -1 + return valid_count, out_tensor, out_indices def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): @@ -189,11 +169,14 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): Returns ------- + valid_count : tvm.te.Tensor + 1-D tensor for valid number of boxes. + out_tensor : tvm.te.Tensor Rearranged data tensor. - valid_count : tvm.te.Tensor - 1-D tensor for valid number of boxes. + out_indices: tvm.te.Tensor or numpy NDArray + Related index in input data. """ score_threshold_const = tvm.tir.const(score_threshold, data.dtype) id_index_const = tvm.tir.const(id_index, "int32") @@ -204,8 +187,9 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): @hybrid.script -def hybrid_nms(data, sorted_index, valid_count, max_output_size, iou_threshold, - force_suppress, top_k, coord_start, id_index, score_index, zero, one): +def hybrid_nms(data, sorted_index, valid_count, indices, max_output_size, + iou_threshold, force_suppress, top_k, coord_start, score_index, + id_index, return_indices, zero, one): """Hybrid routing for non-maximum suppression. Parameters @@ -221,6 +205,9 @@ def hybrid_nms(data, sorted_index, valid_count, max_output_size, iou_threshold, valid_count : tvm.te.Tensor or numpy NDArray 1-D tensor for valid number of boxes. + indices : tvm.Tensor or numpy.NDArray + indices in original tensor, with shape [batch_size, num_anchors] + max_output_size : tvm.tir.const Max number of output valid boxes for each instance. By default all valid boxes are returned. @@ -237,11 +224,14 @@ def hybrid_nms(data, sorted_index, valid_count, max_output_size, iou_threshold, coord_start : tvm.tir.const Start index of the consecutive 4 coordinates. + score_index: tvm.tir.const + Index of the scores/confidence of boxes. + id_index : tvm.tir.const index of the class categories, -1 to disable. - score_index: tvm.tir.const - Index of the scores/confidence of boxes. + return_indices : tvm.tir.const + Whether to return box indices in input data. zero: tvm.tir.const Constant zero with the same dtype as data. @@ -258,9 +248,12 @@ def hybrid_nms(data, sorted_index, valid_count, max_output_size, iou_threshold, box_indices: tvm.te.Tensor 2-D tensor with shape [batch_size, num_anchors]. """ + batch_size = data.shape[0] num_anchors = data.shape[1] box_data_length = data.shape[2] + + # box_indices is the expected value, similar to TF & ONNX box_indices = output_tensor((batch_size, num_anchors), sorted_index.dtype) output = output_tensor((batch_size, num_anchors, @@ -282,9 +275,11 @@ def hybrid_nms(data, sorted_index, valid_count, max_output_size, iou_threshold, for k in range(box_data_length): output[i, j + nkeep, k] = -one box_indices[i, j + nkeep] = -1 + # Apply nms box_start_idx = coord_start batch_idx = i + for j in range(valid_count[i]): if output[i, j, score_index] > 0 and (id_index < 0 or output[i, j, id_index] >= 0): box_a_idx = j @@ -296,36 +291,62 @@ def hybrid_nms(data, sorted_index, valid_count, max_output_size, iou_threshold, check_iou = 1 elif id_index < 0 or output[i, j, id_index] == output[i, k, id_index]: check_iou = 1 + if check_iou > 0: - a_l = output[batch_idx, box_a_idx, box_start_idx] - a_t = output[batch_idx, box_a_idx, box_start_idx + 1] - a_r = output[batch_idx, box_a_idx, box_start_idx + 2] - a_b = output[batch_idx, box_a_idx, box_start_idx + 3] + # a_l: left, a_t: top, a_r: right, a_b: bottom + a_l = min(output[batch_idx, box_a_idx, box_start_idx], + output[batch_idx, box_a_idx, box_start_idx + 2]) + a_t = min(output[batch_idx, box_a_idx, box_start_idx + 1], + output[batch_idx, box_a_idx, box_start_idx + 3]) + a_r = max(output[batch_idx, box_a_idx, box_start_idx], + output[batch_idx, box_a_idx, box_start_idx + 2]) + a_b = max(output[batch_idx, box_a_idx, box_start_idx + 1], + output[batch_idx, box_a_idx, box_start_idx + 3]) + box_b_idx = k - b_t = output[batch_idx, box_b_idx, box_start_idx + 1] - b_b = output[batch_idx, box_b_idx, box_start_idx + 3] - b_l = output[batch_idx, box_b_idx, box_start_idx] - b_r = output[batch_idx, box_b_idx, box_start_idx + 2] + + # b_l: left, b_t: top, b_r: right, b_b: bottom + b_l = min(output[batch_idx, box_b_idx, box_start_idx], + output[batch_idx, box_b_idx, box_start_idx + 2]) + b_t = min(output[batch_idx, box_b_idx, box_start_idx + 1], + output[batch_idx, box_b_idx, box_start_idx + 3]) + b_r = max(output[batch_idx, box_b_idx, box_start_idx], + output[batch_idx, box_b_idx, box_start_idx + 2]) + b_b = max(output[batch_idx, box_b_idx, box_start_idx + 1], + output[batch_idx, box_b_idx, box_start_idx + 3]) + + # Overlapping width and height w = max(zero, min(a_r, b_r) - max(a_l, b_l)) h = max(zero, min(a_b, b_b) - max(a_t, b_t)) + + # Overlapping area area = h * w + + # total area of the figure formed by box a and box b + # except for overlapping area u = (a_r - a_l) * (a_b - a_t) + (b_r - b_l) * (b_b - b_t) - area + + # get the iou iou = zero if u <= zero else area / u + if iou >= iou_threshold: output[i, k, score_index] = -one if id_index >= 0: output[i, k, id_index] = -one box_indices[i, k] = -1 + else: for j in parallel(valid_count[i]): for k in range(box_data_length): output[i, j, k] = data[i, j, k] box_indices[i, j] = j + # Set invalid entry to be -1 for j in parallel(num_anchors - valid_count[i]): for k in range(box_data_length): output[i, j + valid_count[i], k] = -one box_indices[i, j + valid_count[i]] = -1 + # Only return max_output_size valid boxes num_valid_boxes = 0 if max_output_size > 0: @@ -337,138 +358,17 @@ def hybrid_nms(data, sorted_index, valid_count, max_output_size, iou_threshold, box_indices[i, j] = -1 else: num_valid_boxes += 1 - return output, box_indices - -@hybrid.script -def hybrid_dynamic_nms(data, sorted_index, max_output_size, score_threshold, - iou_threshold, score_index, zero, one): - """Hybrid routing for non-maximum suppression. - - Parameters - ---------- - data: tvm.te.Tensor or numpy NDArray - Bounding boxes with class and score. 3-D tensor with shape - [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5]. - - sorted_index : tvm.te.Tensor or numpy NDArray - Bounding box indexes sorted by score, with shape - [batch_size, num_anchors]. - - max_output_size : tvm.tir.const - Max number of output valid boxes for each instance. - By default all valid boxes are returned. - - score_threshold : tvm.tir.const - Lower limit of score for valid bounding boxes. - - iou_threshold : tvm.tir.const - Overlapping(IoU) threshold to suppress object with smaller score. - - score_index: tvm.tir.const - Index of the scores/confidence of boxes. - - zero: tvm.tir.const - Constant zero with the same dtype as data. - - one: tvm.tir.const - Constant one with the same dtype as data. - - Returns - ------- - box_indices: tvm.te.Tensor - 2-D tensor with shape [batch_size, num_anchors]. - """ - - batch_size = data.shape[0] - num_anchors = data.shape[1] - box_data_length = data.shape[2] - - # box_indices is the expected value, similar to TF & ONNX - box_indices = output_tensor((batch_size, num_anchors), sorted_index.dtype) - output = output_tensor((batch_size, - num_anchors, - box_data_length,), data.dtype) - - for i in range(batch_size): - if iou_threshold > 0: - # Reorder output - for j in parallel(num_anchors): - for k in range(box_data_length): - output[i, j, k] = data[i, sorted_index[i, j], k] - if output[i, j, score_index] > score_threshold: - box_indices[i, j] = sorted_index[i, j] - else: - box_indices[i, j] = -1 - - # Apply nms - box_start_idx = 1 - batch_idx = i - - for j in range(num_anchors): - # index sorted - j_sorted = sorted_index[i, j] - - box_a_idx = j - # l: left, t: top, r: right, b: bottom - a_l = min(output[batch_idx, box_a_idx, box_start_idx], - output[batch_idx, box_a_idx, box_start_idx + 2]) - a_t = min(output[batch_idx, box_a_idx, box_start_idx + 1], - output[batch_idx, box_a_idx, box_start_idx + 3]) - a_r = max(output[batch_idx, box_a_idx, box_start_idx], - output[batch_idx, box_a_idx, box_start_idx + 2]) - a_b = max(output[batch_idx, box_a_idx, box_start_idx + 1], - output[batch_idx, box_a_idx, box_start_idx + 3]) - - for k in parallel(j + 1, num_anchors): - k_sorted = sorted_index[i, k] - box_b_idx = k - # l: left, t: top, r: right, b: bottom - b_l = min(output[batch_idx, box_b_idx, box_start_idx], - output[batch_idx, box_b_idx, box_start_idx + 2]) - b_t = min(output[batch_idx, box_b_idx, box_start_idx + 1], - output[batch_idx, box_b_idx, box_start_idx + 3]) - b_r = max(output[batch_idx, box_b_idx, box_start_idx], - output[batch_idx, box_b_idx, box_start_idx + 2]) - b_b = max(output[batch_idx, box_b_idx, box_start_idx + 1], - output[batch_idx, box_b_idx, box_start_idx + 3]) - - # Overlapping width and height - w = max(zero, min(a_r, b_r) - max(a_l, b_l)) - h = max(zero, min(a_b, b_b) - max(a_t, b_t)) - - # Overlapping area - area = h * w - - # total area of the figure formed by box a and box b except for overlapping area - u = (a_r - a_l) * (a_b - a_t) + (b_r - b_l) * (b_b - b_t) - area - - # get the iou - iou = area / u - - # output[i, k, sorted_index] = iou - - if iou >= score_threshold: - box_indices[i, k] = -1 - - else: - for j in parallel(num_anchors): - box_indices[i, j] = sorted_index[i, j] - - # Only return max_output_size valid boxes - num_valid_boxes = 0 - if max_output_size > 0: - for j in parallel(num_anchors): - if num_valid_boxes == max_output_size: - box_indices[i, j] = -1 - else: - num_valid_boxes += 1 + if return_indices: + for j in range(valid_count[i]): + idx = box_indices[i, j] + if box_indices[i, j] >= 0: + box_indices[i, j] = indices[i, idx] return output, box_indices - @tvm.target.generic_func -def non_max_suppression(data, valid_count, max_output_size=-1, score_threshold=0.0, +def non_max_suppression(data, valid_count, indices, max_output_size=-1, iou_threshold=0.5, force_suppress=False, top_k=-1, coord_start=2, score_index=1, id_index=0, return_indices=True, invalid_to_bottom=False): @@ -482,13 +382,13 @@ def non_max_suppression(data, valid_count, max_output_size=-1, score_threshold=0 valid_count : tvm.te.Tensor 1-D tensor for valid number of boxes. + indices : tvm.Tensor + 2-D tensor with shape [batch_size, num_anchors]. + max_output_size : optional, int Max number of output valid boxes for each instance. By default all valid boxes are returned. - score_threshold : optional, float - Lower limit of score for valid bounding boxes. - iou_threshold : optional, float Non-maximum suppression threshold. @@ -517,7 +417,7 @@ def non_max_suppression(data, valid_count, max_output_size=-1, score_threshold=0 ------- out : tvm.te.Tensor or tuple of tvm.te.Tensor 3-D tensor with shape [batch_size, num_anchors, 6] - or [batch_size, num_anchors, 6]. Out is a tuple of tvm.te.Tensor + or [batch_size, num_anchors, 5]. Out is a tuple of tvm.te.Tensor if return_indices is True, the Tensor in the tuple is 2-D tensor with shape [batch_size, num_anchors] and shape [batch_size, num_valid_anchors] respectively. @@ -533,7 +433,7 @@ def non_max_suppression(data, valid_count, max_output_size=-1, score_threshold=0 iou_threshold = 0.7 force_suppress = True top_k = -1 - out = non_max_suppression(data, valid_count, iou_threshold=iou_threshold, + out = non_max_suppression(data, valid_count, indices, iou_threshold=iou_threshold, force_suppress=force_suppress, top_k=top_k) np_data = np.random.uniform(dshape) np_valid_count = np.array([4]) @@ -551,33 +451,24 @@ def non_max_suppression(data, valid_count, max_output_size=-1, score_threshold=0 score_shape = (batch_size, num_anchors) score_tensor = te.compute(score_shape, lambda i, j: data[i, j, score_axis]) sort_tensor = argsort(score_tensor, valid_count=valid_count, axis=1, is_ascend=False) - + out, box_indices = hybrid_nms(data, + sort_tensor, + valid_count, + indices, + tvm.const(max_output_size, dtype="int32"), + tvm.const(iou_threshold, dtype=data.dtype), + tvm.const(force_suppress, dtype="bool"), + tvm.const(top_k, dtype="int32"), + tvm.const(coord_start, dtype="int32"), + tvm.const(score_index, dtype="int32"), + tvm.const(id_index, dtype="int32"), + tvm.const(return_indices, dtype="bool"), + zero=tvm.const(0, dtype=data.dtype), + one=tvm.const(1, dtype=data.dtype)) if return_indices: - # return a tuple with two tensor, one is the computed valid indices of boxes, appending -1 as invalid boxes - # the other one is the number of valid boxes - out, box_indices = hybrid_dynamic_nms(data, - sort_tensor, - tvm.tir.const(max_output_size, dtype="int32"), - tvm.tir.const(score_threshold, dtype=data.dtype), - tvm.tir.const(iou_threshold, dtype=data.dtype), - tvm.tir.const(score_index, dtype="int32"), - zero=tvm.tir.const(0, dtype=data.dtype), - one=tvm.tir.const(1, dtype=data.dtype)) - box_indices, out_shape = hybrid_rearrange_idx(box_indices) - return [box_indices, out_shape] - else: - out, box_indices = hybrid_nms(data, - sort_tensor, - valid_count, - tvm.tir.const(max_output_size, dtype="int32"), - tvm.tir.const(iou_threshold, dtype=data.dtype), - tvm.tir.const(force_suppress, dtype="bool"), - tvm.tir.const(top_k, dtype="int32"), - tvm.tir.const(coord_start, dtype="int32"), - tvm.tir.const(id_index, dtype="int32"), - tvm.tir.const(score_index, dtype="int32"), - zero=tvm.tir.const(0, dtype=data.dtype), - one=tvm.tir.const(1, dtype=data.dtype)) - if invalid_to_bottom: - out = hybrid_rearrange_out(out, one=tvm.tir.const(1, dtype=data.dtype)) - return out + box_indices, out_shape = hybrid_rearrange_out(box_indices, one=tvm.const(1, dtype="int32")) + return tuple([box_indices, out_shape]) + + if invalid_to_bottom: + out, out_shape = hybrid_rearrange_out(out, one=tvm.const(1, dtype=data.dtype)) + return out diff --git a/topi/python/topi/vision/ssd/multibox.py b/topi/python/topi/vision/ssd/multibox.py index e1ddc7bab9b0..e5b92156bdc3 100644 --- a/topi/python/topi/vision/ssd/multibox.py +++ b/topi/python/topi/vision/ssd/multibox.py @@ -304,7 +304,7 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm """ inter_out = multibox_transform_loc(cls_prob, loc_pred, anchor, clip, threshold, variances) - out = non_max_suppression(inter_out[0], inter_out[1], max_output_size=-1, score_threshold=0, + out = non_max_suppression(inter_out[0], inter_out[1], inter_out[1], max_output_size=-1, iou_threshold=nms_threshold, force_suppress=force_suppress, top_k=nms_topk, return_indices=False) return out diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index 77cd2f1d7e65..bd71742c83ab 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -69,6 +69,7 @@ def verify_get_valid_counts(dshape, score_threshold, id_index, score_index): np_data = np.random.uniform(low=-2, high=2, size=dshape).astype(dtype) np_out1 = np.zeros(shape=(batch_size,)) np_out2 = np.zeros(shape=dshape).astype(dtype) + np_out3 = np.zeros(shape=(batch_size, num_anchor)) for i in range(batch_size): np_out1[i] = 0 inter_idx = 0 @@ -78,10 +79,12 @@ def verify_get_valid_counts(dshape, score_threshold, id_index, score_index): for k in range(elem_length): np_out2[i, inter_idx, k] = np_data[i, j, k] np_out1[i] += 1 + np_out3[i, inter_idx] = j inter_idx += 1 if j >= np_out1[i]: for k in range(elem_length): np_out2[i, j, k] = -1.0 + np_out3[i, j] = -1 def check_device(device): ctx = tvm.context(device, 0) @@ -98,10 +101,12 @@ def check_device(device): tvm_input_data = tvm.nd.array(np_data, ctx) tvm_out1 = tvm.nd.array(np.zeros(np_out1.shape, dtype="int32"), ctx) tvm_out2 = tvm.nd.array(np.zeros(np_out2.shape, dtype=dtype), ctx) - f = tvm.build(s, [data, outs[0], outs[1]], device) - f(tvm_input_data, tvm_out1, tvm_out2) + tvm_out3 = tvm.nd.array(np.zeros(np_out3.shape, dtype="int32"), ctx) + f = tvm.build(s, [data, outs[0], outs[1], outs[2]], device) + f(tvm_input_data, tvm_out1, tvm_out2, tvm_out3) tvm.testing.assert_allclose(tvm_out1.asnumpy(), np_out1, rtol=1e-3) tvm.testing.assert_allclose(tvm_out2.asnumpy(), np_out2, rtol=1e-3) + tvm.testing.assert_allclose(tvm_out3.asnumpy(), np_out3, rtol=1e-3) """ Skip this test as it is intermittent see https://github.com/apache/incubator-tvm/pull/4901#issuecomment-595040094 @@ -114,6 +119,7 @@ def check_device(device): def test_get_valid_counts(): + verify_get_valid_counts((1, 1000, 5), 0.5, -1, 0) verify_get_valid_counts((1, 2500, 6), 0, 0, 1) verify_get_valid_counts((1, 2500, 5), -1, -1, 0) verify_get_valid_counts((3, 1000, 6), 0.55, 1, 0) @@ -147,16 +153,18 @@ def check_device(device): tvm_data = tvm.nd.array(np_data, ctx) tvm_valid_count = tvm.nd.array(np_valid_count, ctx) + tvm_indices = tvm.nd.array(np_indices, ctx) tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data.dtype), ctx) - f = tvm.build(s, [data, valid_count, out], device) - f(tvm_data, tvm_valid_count, tvm_out) + f = tvm.build(s, [data, valid_count, indices, out], device) + f(tvm_data, tvm_valid_count, tvm_indices, tvm_out) tvm.testing.assert_allclose(tvm_out.asnumpy(), np_result, rtol=1e-4) tvm_indices_out = tvm.nd.array(np.zeros(indices_dshape, dtype="int32"), ctx) - f = tvm.build(indices_s, [data, valid_count, indices_out[0]], device) - f(tvm_data, tvm_valid_count, tvm_indices_out) - tvm.testing.assert_allclose(tvm_indices_out.asnumpy(), np_indices_result, rtol=1e-4) + f = tvm.build(indices_s, [data, valid_count, indices, indices_out[0]], device) + f(tvm_data, tvm_valid_count, tvm_indices, tvm_indices_out) + # TODO (yongwww): add dynamic nms for gpu + # tvm.testing.assert_allclose(tvm_indices_out.asnumpy(), np_indices_result, rtol=1e-4) for device in ['llvm', 'cuda', 'opencl']: check_device(device) @@ -167,23 +175,24 @@ def test_non_max_suppression(): [0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79], [1, 0.5, 100, 60, 70, 110]]]).astype("float32") np_valid_count = np.array([4]).astype("int32") + np_indices = np.array([[0, 1, 3, 4, -1]]).astype("int32") np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) - np_indices_result = np.array([[3, 0, -1, -1, -1]]) + np_indices_result = np.array([[4, 0, -1, -1, -1]]) - verify_non_max_suppression(np_data, np_valid_count, np_result, np_indices_result, 0.6, 0.7, True, 2, 2, 1, 0) + verify_non_max_suppression(np_data, np_valid_count, np_indices, np_result, np_indices_result, 0.6, True, 2, 2, 1, 0) np_data = np.array([[[0.8, 1, 20, 25, 45], [0.7, 30, 60, 50, 80], [0.4, 4, 21, 19, 40], [0.9, 35, 61, 52, 79], [0.5, 100, 60, 70, 110]]]).astype("float32") np_valid_count = np.array([4]).astype("int32") + np_indices = np.array([[0, 1, 3, 4, -1]]).astype("int32") np_result = np.array([[[0.9, 35, 61, 52, 79], [0.8, 1, 20, 25, 45], [-1, -1, -1, -1, -1], [-1, -1, -1, -1, -1], [-1, -1, -1, -1, -1]]]) - np_indices_result = np.array([[3, 0, -1, -1, -1]]) - verify_non_max_suppression(np_data, np_valid_count, np_result, np_indices_result, 0.6, 0.7, False, 2, 1, 0, -1) - + np_indices_result = np.array([[4, 0, -1, -1, -1]]) + verify_non_max_suppression(np_data, np_valid_count, np_indices, np_result, np_indices_result, 0.6, False, 2, 1, 0, -1) def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5, 0.5), clip=False): From c3be4817d7dc8c5f020026081799ebaad64bda55 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Sun, 17 Nov 2019 18:39:48 +0000 Subject: [PATCH 03/22] fix nnvm compatibility issues --- topi/python/topi/cuda/nms.py | 7 +++-- topi/python/topi/image/dilation2d.py | 12 ++++---- topi/python/topi/math.py | 12 ++++---- topi/python/topi/vision/nms.py | 6 ++-- topi/python/topi/x86/conv2d_alter_op.py | 4 ++- topi/python/topi/x86/conv3d.py | 6 ++-- topi/tests/python/test_topi_vision.py | 39 +++++++++++++++---------- 7 files changed, 50 insertions(+), 36 deletions(-) diff --git a/topi/python/topi/cuda/nms.py b/topi/python/topi/cuda/nms.py index d8be3bd1b886..255cf6fc30f7 100644 --- a/topi/python/topi/cuda/nms.py +++ b/topi/python/topi/cuda/nms.py @@ -335,7 +335,7 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx): return ib.get() -def non_max_suppression(data, valid_count, max_output_size=-1, +def non_max_suppression(data, valid_count, indices, max_output_size=-1, iou_threshold=0.5, force_suppress=False, top_k=-1, coord_start=2, score_index=1, id_index=0, return_indices=True, invalid_to_bottom=False): @@ -351,6 +351,9 @@ def non_max_suppression(data, valid_count, max_output_size=-1, valid_count : tvm.te.Tensor 1-D tensor for valid number of boxes. + indices : tvm.te.Tensor + 2-D tensor with shape [batch_size, num_anchors]. + max_output_size : optional, int Max number of output valid boxes for each instance. By default all valid boxes are returned. @@ -445,4 +448,4 @@ def non_max_suppression(data, valid_count, max_output_size=-1, if return_indices: return box_indices - return out + return out \ No newline at end of file diff --git a/topi/python/topi/image/dilation2d.py b/topi/python/topi/image/dilation2d.py index a71866e60a98..074ca6c02d08 100644 --- a/topi/python/topi/image/dilation2d.py +++ b/topi/python/topi/image/dilation2d.py @@ -29,10 +29,10 @@ def dilation2d_nchw(input, filter, stride, padding, dilations, out_dtype=None): Parameters ---------- - input : tvm.Tensor + input : tvm.te.Tensor 4-D with shape [batch, in_channel, in_height, in_width] - filter : tvm.Tensor + filter : tvm.te.Tensor 3-D with shape [ in_channel, filter_height, filter_width] stride : int or a list/tuple of two ints @@ -49,7 +49,7 @@ def dilation2d_nchw(input, filter, stride, padding, dilations, out_dtype=None): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, in_channel, out_height, out_width] """ if out_dtype is None: @@ -100,10 +100,10 @@ def dilation2d_nhwc(input, filter, stride, padding, dilations, out_dtype=None): Parameters ---------- - input : tvm.Tensor + input : tvm.te.Tensor 4-D with shape [batch, in_height, in_width, in_channel] - filter : tvm.Tensor + filter : tvm.te.Tensor 3-D with shape [filter_height, filter_width, in_channel] stride : int or a list/tuple of two ints @@ -120,7 +120,7 @@ def dilation2d_nhwc(input, filter, stride, padding, dilations, out_dtype=None): Returns ------- - Output : tvm.Tensor + Output : tvm.te.Tensor 4-D with shape [batch, out_height, out_width, in_channel] """ if out_dtype is None: diff --git a/topi/python/topi/math.py b/topi/python/topi/math.py index d715308573a4..b4228a4a9178 100644 --- a/topi/python/topi/math.py +++ b/topi/python/topi/math.py @@ -401,12 +401,12 @@ def isfinite(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return te.compute(x.shape, lambda *i: te.isfinite(x(*i))) @@ -418,12 +418,12 @@ def isinf(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return te.compute(x.shape, lambda *i: te.isinf(x(*i))) @@ -677,12 +677,12 @@ def fast_tanh(x): Parameters ---------- - x : tvm.Tensor + x : tvm.te.Tensor Input argument. Returns ------- - y : tvm.Tensor + y : tvm.te.Tensor The result. """ return cpp.fast_tanh(x, x.dtype, tag.ELEMWISE) diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 73e702447de5..fdf2430b3a02 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -46,7 +46,7 @@ def hybrid_rearrange_out(data, one): [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5], or 2-D tensor with shape [batch_size, num_anchors]. - valid_box_count : tvm.Tensor or numpy NDArray + valid_box_count : tvm.te.Tensor or numpy NDArray Tensor with shape [batch_size, 1], indicates the valid number of boxes. """ @@ -205,7 +205,7 @@ def hybrid_nms(data, sorted_index, valid_count, indices, max_output_size, valid_count : tvm.te.Tensor or numpy NDArray 1-D tensor for valid number of boxes. - indices : tvm.Tensor or numpy.NDArray + indices : tvm.te.Tensor or numpy.NDArray indices in original tensor, with shape [batch_size, num_anchors] max_output_size : tvm.tir.const @@ -382,7 +382,7 @@ def non_max_suppression(data, valid_count, indices, max_output_size=-1, valid_count : tvm.te.Tensor 1-D tensor for valid number of boxes. - indices : tvm.Tensor + indices : tvm.te.Tensor 2-D tensor with shape [batch_size, num_anchors]. max_output_size : optional, int diff --git a/topi/python/topi/x86/conv2d_alter_op.py b/topi/python/topi/x86/conv2d_alter_op.py index d1c607f6a3e5..e9fc4223a9ea 100644 --- a/topi/python/topi/x86/conv2d_alter_op.py +++ b/topi/python/topi/x86/conv2d_alter_op.py @@ -312,7 +312,9 @@ def _conv2d_legalize(attrs, inputs, arg_types): new_attrs['channels'] = new_out_channel out = tvm.relay.nn.conv2d(data, kernel, **new_attrs) original_out_shape = [x.value for x in output_tensor.shape] - out = relay.strided_slice(out, begin=(0, 0, 0, 0), end=original_out_shape) + out = relay.strided_slice(out, + begin=relay.const([0, 0, 0, 0], "int32"), + end=relay.const(original_out_shape, "int32")) else: out = relay.nn.conv2d(data, kernel, **new_attrs) diff --git a/topi/python/topi/x86/conv3d.py b/topi/python/topi/x86/conv3d.py index 27f48f8dc69a..f0dee31a9992 100644 --- a/topi/python/topi/x86/conv3d.py +++ b/topi/python/topi/x86/conv3d.py @@ -78,11 +78,11 @@ def conv3d_ncdhw(cfg, data, kernel, strides, padding, dilation, out_dtype): Parameters ---------- - input : tvm.Tensor + input : tvm.te.Tensor 5-D input data with shapes: [batch, in_channel, in_depth, in_height, in_width] for NCDHW layout - filter : tvm.Tensor + filter : tvm.te.Tensor 5-D filter with shape [out_channels, in_channels, kernel_depth, kernel_height, kernel_width] strides : int or a list/tuple of three ints @@ -96,7 +96,7 @@ def conv3d_ncdhw(cfg, data, kernel, strides, padding, dilation, out_dtype): Returns ------- - output : tvm.Tensor + output : tvm.te.Tensor 5-D with shape [batch, out_channel, out_depth, out_height, out_width] for NCDHW layout """ layout = "NCDHW" diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index bd71742c83ab..072fa8eb7e32 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -102,11 +102,17 @@ def check_device(device): tvm_out1 = tvm.nd.array(np.zeros(np_out1.shape, dtype="int32"), ctx) tvm_out2 = tvm.nd.array(np.zeros(np_out2.shape, dtype=dtype), ctx) tvm_out3 = tvm.nd.array(np.zeros(np_out3.shape, dtype="int32"), ctx) - f = tvm.build(s, [data, outs[0], outs[1], outs[2]], device) - f(tvm_input_data, tvm_out1, tvm_out2, tvm_out3) - tvm.testing.assert_allclose(tvm_out1.asnumpy(), np_out1, rtol=1e-3) - tvm.testing.assert_allclose(tvm_out2.asnumpy(), np_out2, rtol=1e-3) - tvm.testing.assert_allclose(tvm_out3.asnumpy(), np_out3, rtol=1e-3) + if device == "llvm": + f = tvm.build(s, [data, outs[0], outs[1], outs[2]], device) + f(tvm_input_data, tvm_out1, tvm_out2, tvm_out3) + tvm.testing.assert_allclose(tvm_out1.asnumpy(), np_out1, rtol=1e-3) + tvm.testing.assert_allclose(tvm_out2.asnumpy(), np_out2, rtol=1e-3) + tvm.testing.assert_allclose(tvm_out3.asnumpy(), np_out3, rtol=1e-3) + else: + f = tvm.build(s, [data, outs[0], outs[1]], device) + f(tvm_input_data, tvm_out1, tvm_out2) + tvm.testing.assert_allclose(tvm_out1.asnumpy(), np_out1, rtol=1e-3) + tvm.testing.assert_allclose(tvm_out2.asnumpy(), np_out2, rtol=1e-3) """ Skip this test as it is intermittent see https://github.com/apache/incubator-tvm/pull/4901#issuecomment-595040094 @@ -161,10 +167,13 @@ def check_device(device): tvm.testing.assert_allclose(tvm_out.asnumpy(), np_result, rtol=1e-4) tvm_indices_out = tvm.nd.array(np.zeros(indices_dshape, dtype="int32"), ctx) - f = tvm.build(indices_s, [data, valid_count, indices, indices_out[0]], device) - f(tvm_data, tvm_valid_count, tvm_indices, tvm_indices_out) - # TODO (yongwww): add dynamic nms for gpu - # tvm.testing.assert_allclose(tvm_indices_out.asnumpy(), np_indices_result, rtol=1e-4) + if device == 'llvm': + f = tvm.build(indices_s, [data, valid_count, indices, indices_out[0]], device) + f(tvm_data, tvm_valid_count, tvm_indices, tvm_indices_out) + else: + f = tvm.build(indices_s, [data, valid_count, indices, indices_out], device) + f(tvm_data, tvm_valid_count, tvm_indices, tvm_indices_out) + tvm.testing.assert_allclose(tvm_indices_out.asnumpy(), np_indices_result, rtol=1e-4) for device in ['llvm', 'cuda', 'opencl']: check_device(device) @@ -175,24 +184,24 @@ def test_non_max_suppression(): [0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79], [1, 0.5, 100, 60, 70, 110]]]).astype("float32") np_valid_count = np.array([4]).astype("int32") - np_indices = np.array([[0, 1, 3, 4, -1]]).astype("int32") + np_indices = np.array([[0, 1, 2, 3, 4]]).astype("int32") np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) - np_indices_result = np.array([[4, 0, -1, -1, -1]]) + np_indices_result = np.array([[3, 0, -1, -1, -1]]) - verify_non_max_suppression(np_data, np_valid_count, np_indices, np_result, np_indices_result, 0.6, True, 2, 2, 1, 0) + verify_non_max_suppression(np_data, np_valid_count, np_indices, np_result, np_indices_result, 0.7, True, 2, 2, 1, 0) np_data = np.array([[[0.8, 1, 20, 25, 45], [0.7, 30, 60, 50, 80], [0.4, 4, 21, 19, 40], [0.9, 35, 61, 52, 79], [0.5, 100, 60, 70, 110]]]).astype("float32") np_valid_count = np.array([4]).astype("int32") - np_indices = np.array([[0, 1, 3, 4, -1]]).astype("int32") + np_indices = np.array([[0, 1, 2, 3, 4]]).astype("int32") np_result = np.array([[[0.9, 35, 61, 52, 79], [0.8, 1, 20, 25, 45], [-1, -1, -1, -1, -1], [-1, -1, -1, -1, -1], [-1, -1, -1, -1, -1]]]) - np_indices_result = np.array([[4, 0, -1, -1, -1]]) - verify_non_max_suppression(np_data, np_valid_count, np_indices, np_result, np_indices_result, 0.6, False, 2, 1, 0, -1) + np_indices_result = np.array([[3, 0, -1, -1, -1]]) + verify_non_max_suppression(np_data, np_valid_count, np_indices, np_result, np_indices_result, 0.7, False, 2, 1, 0, -1) def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5, 0.5), clip=False): From 47faed9773d6757e64e356b018de4d78314c722c Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Mon, 18 Nov 2019 14:37:09 -0800 Subject: [PATCH 04/22] fix InferCorrectLayout --- python/tvm/relay/frontend/tensorflow.py | 6 +- python/tvm/relay/op/strategy/generic.py | 6 +- python/tvm/relay/op/vision/nms.py | 2 +- src/relay/op/tensor/transform.cc | 55 +++++++++++++------ .../transforms/combine_parallel_conv2d.cc | 15 +++-- src/relay/transforms/fuse_ops.cc | 17 ++++++ .../frontend/tensorflow/test_forward.py | 2 - tests/python/relay/test_any.py | 2 +- tests/python/relay/test_op_level5.py | 8 +-- .../python/relay/test_pass_alter_op_layout.py | 4 +- topi/python/topi/vision/nms.py | 24 ++++---- 11 files changed, 90 insertions(+), 51 deletions(-) diff --git a/python/tvm/relay/frontend/tensorflow.py b/python/tvm/relay/frontend/tensorflow.py index 59740d204744..d0dbdd7121b8 100644 --- a/python/tvm/relay/frontend/tensorflow.py +++ b/python/tvm/relay/frontend/tensorflow.py @@ -617,7 +617,7 @@ def _impl(inputs, attr, params, mod): return out def _nms(): - def _impl(inputs, attr, params): + def _impl(inputs, attr, params, mod): # Get parameter values max_output_size = int(np.atleast_1d(inputs[2].data.asnumpy().astype("int64"))[0]) iou_threshold = np.atleast_1d(inputs[3].data.asnumpy())[0] @@ -626,6 +626,7 @@ def _impl(inputs, attr, params): # Generate data with shape (1, num_anchors, 5) scores = AttrCvt(op_name="expand_dims", + ignores=['T_threshold'], extras={'axis': -1, 'num_newaxis': 1})([inputs[1]], attr) data = get_relay_op('concatenate')([scores, inputs[0]], -1) data = get_relay_op('expand_dims')(data, 0, 1) @@ -651,6 +652,7 @@ def _impl(inputs, attr, params): id_index=-1, return_indices=True, invalid_to_bottom=False) + # squeeze it, TF NMS is not batched end = get_relay_op("squeeze")(nms_ret[1], axis=[1]) data_slice = get_relay_op("squeeze")(nms_ret[0], axis=[0]) @@ -2531,7 +2533,7 @@ class LoopBound(ExprVisitor): .. code-block:: python i = tf.constant(0) - data = tf.compat.v1.placeholder(tf.float32, shape=(1024, 1024)) + data = tf.placeholder(tf.float32, shape=(1024, 1024)) slice = tf.strided_slice(data, 0, 512) def c(i): return tf.less(i, 10) def b(i): return [tf.add(i, 1), tf.add(i, 1) + slice] diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index 99439affee1a..0cedaa1c07f0 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -695,9 +695,9 @@ def _compute_nms(attrs, inputs, out_type): score_index = get_const_int(attrs.score_index) id_index = get_const_int(attrs.id_index) invalid_to_bottom = bool(get_const_int(attrs.invalid_to_bottom)) - return [topi_compute(inputs[0], inputs[1], max_output_size, iou_threshold, - force_suppress, top_k, coord_start, score_index, - id_index, return_indices, invalid_to_bottom)] + return [topi_compute(inputs[0], inputs[1], inputs[2], max_output_size, iou_threshold, + force_suppress, top_k, coord_start, score_index, id_index, + return_indices, invalid_to_bottom)] return _compute_nms @override_native_generic_func("non_max_suppression_strategy") diff --git a/python/tvm/relay/op/vision/nms.py b/python/tvm/relay/op/vision/nms.py index f2657f47c6b2..d599ea55d0ba 100644 --- a/python/tvm/relay/op/vision/nms.py +++ b/python/tvm/relay/op/vision/nms.py @@ -117,7 +117,7 @@ def non_max_suppression(data, ------- out : relay.Expr or relay.Tuple return relay.Expr if return_indices is disabled, a 3-D tensor - with shape [batch_size, num_anchors, 6] or [batch_size, num_anchors, 6]. + with shape [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5]. if return_indices is True, return relay.Tuple of two 2-D tensors, with shape [batch_size, num_anchors] and [batch_size, num_valid_anchors] respectively. """ diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index 4e1b05e04620..b464330c0bb9 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1732,7 +1732,7 @@ int64_t* ToVector(const runtime::NDArray& array) { return rel_vec; } } - LOG(FATAL) << "Unknown data type: " << tvm::runtime::TVMType2String(array->dtype); + LOG(FATAL) << "Unknown data type: " << tvm::runtime::DLDataType2String(array->dtype); return rel_vec; } @@ -1825,7 +1825,7 @@ bool StridedSliceRel(const Array& types, oshape[i] = tir::make_const(dshape[i].dtype(), (slice_range + step - 1) / step); } } else { - for (size_t i = 0; i < num_axis; ++i) { + for (int64_t i = 0; i < num_axis; ++i) { oshape[i] = Any::make(); } } @@ -1850,7 +1850,7 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, auto layout = old_in_layouts[0]; if (layout.defined() && new_in_layouts.defined()) { - CHECK_EQ(new_in_layouts.size(), 1); + CHECK_GE(new_in_layouts.size(), 1); auto new_layout = new_in_layouts[0]; auto shape = old_in_shapes[0]; @@ -1907,25 +1907,44 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, new_end.push_back(tvm::Integer(ed / factor)); } } + layout = new_layout; + + DLContext ctx; + ctx.device_type = kDLCPU; + ctx.device_id = 0; + auto begin_ndarray = runtime::NDArray::Empty({int64_t(new_begin.size())}, + DataType::Int(64), ctx); + auto end_ndarray = runtime::NDArray::Empty({int64_t(new_begin.size())}, + DataType::Int(64), ctx); + auto strides_ndarray = runtime::NDArray::Empty({int64_t(new_begin.size())}, + DataType::Int(64), ctx); + int64_t* begin_data = static_cast(begin_ndarray->data); + int64_t* end_data = static_cast(end_ndarray->data); + for (size_t i = 0; i < new_begin.size(); ++i) { + begin_data[i] = new_begin[i]; + end_data[i] = new_end[i]; + } + params->begin = Constant(begin_ndarray); + params->end = Constant(end_ndarray); } - return {{layout}, {layout}}; + return {{layout, Layout("C"), Layout("C"), Layout("C")}, {layout}}; } -inline Tensor DynamicStridedSlice(const tvm::Tensor& input, - const tvm::Tensor& begin, - const tvm::Tensor& end, - const tvm::Tensor& strides, +inline te::Tensor DynamicStridedSlice(const te::Tensor& input, + const te::Tensor& begin, + const te::Tensor& end, + const te::Tensor& strides, std::string name = "T_strided_slice_dynamic", std::string tag = topi::kInjective) { int64_t src_tensor_dim = input->shape.size(); - Array out_shape; + Array out_shape; for (int64_t i = 0; i < src_tensor_dim; ++i) { - out_shape.push_back(tvm::Var("dim")); + out_shape.push_back(tvm::tir::Var("dim")); } - // TODO(yongwww): move the compute into topi after nnvm is removed - return tvm::compute(out_shape, [&](const Array& indices) { - Array real_indices; + // TODO(yongwww): move the compute into topi + return te::compute(out_shape, [&](const Array& indices) { + Array real_indices; for (int32_t i = 0; i < src_tensor_dim; ++i) { real_indices.push_back(indices[i] * strides(i) + begin(i)); } @@ -1958,12 +1977,12 @@ Array StridedSliceCompute(const Attrs& attrs, const Array{ + return Array{ DynamicStridedSlice(data, begin, end, strides) }; } diff --git a/src/relay/transforms/combine_parallel_conv2d.cc b/src/relay/transforms/combine_parallel_conv2d.cc index 58ac315d3234..6c53b6706fc9 100644 --- a/src/relay/transforms/combine_parallel_conv2d.cc +++ b/src/relay/transforms/combine_parallel_conv2d.cc @@ -184,9 +184,12 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { DLContext ctx; ctx.device_type = kDLCPU; ctx.device_id = 0; - auto begin_ndarray = runtime::NDArray::Empty({1}, DataType::Int(64), ctx); - auto end_ndarray = runtime::NDArray::Empty({1}, DataType::Int(64), ctx); - auto strides_ndarray = runtime::NDArray::Empty({1}, DataType::Int(64), ctx); + auto begin_ndarray = runtime::NDArray::Empty({int64_t(begin.size())}, + DataType::Int(64), ctx); + auto end_ndarray = runtime::NDArray::Empty({int64_t(begin.size())}, + DataType::Int(64), ctx); + auto strides_ndarray = runtime::NDArray::Empty({int64_t(begin.size())}, + DataType::Int(64), ctx); int64_t* begin_data = static_cast(begin_ndarray->data); int64_t* end_data = static_cast(end_ndarray->data); @@ -196,9 +199,9 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { } auto slice = MakeStridedSlice(data, - ConstantNode::make(begin_ndarray), - ConstantNode::make(end_ndarray), - ConstantNode::make(strides_ndarray)); + Constant(begin_ndarray), + Constant(end_ndarray), + Constant(strides_ndarray)); subst_map->insert({GetRef(branch[depth]), slice}); } } diff --git a/src/relay/transforms/fuse_ops.cc b/src/relay/transforms/fuse_ops.cc index 01f1eeea30b3..13bfa7bb28de 100644 --- a/src/relay/transforms/fuse_ops.cc +++ b/src/relay/transforms/fuse_ops.cc @@ -249,6 +249,23 @@ class IndexedForwardGraph::Creator : private ExprVisitor { this->Update(call->op, node, kOpaque); } + if (call->attrs.as()) { + bool is_dyn{false}; + for (auto arg : call->args) { + auto arg_tt = arg->checked_type().as(); + if (arg_tt) { + for (auto dim : arg_tt->shape) { + if (dim.as()) { + is_dyn = true; + } + } + } + if (is_dyn) break; + } + if (!is_dyn) { + op_pattern = kInjective; + } + } node->pattern = op_pattern; this->Update(call->op, nullptr, kOpaque); const auto* rtype = call->checked_type().as(); diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py index 82c00e2db1d9..76d2fe13aa49 100644 --- a/tests/python/frontend/tensorflow/test_forward.py +++ b/tests/python/frontend/tensorflow/test_forward.py @@ -190,7 +190,6 @@ def name_without_num(name): target=device, out_names=out_name, num_output=len(out_name), opt_level=opt_level, mode=mode, cuda_layout=cuda_layout) - # since the names from tensorflow and relay runs are not exactly same, # first len(tf_output) will be compared for i in range(len(tf_output)): @@ -3321,7 +3320,6 @@ def test_forward_isfinite(): test_forward_space_to_batch_nd() test_forward_batch_to_space_nd() test_forward_dilation() - test_forward_nms_v3() # End to End test_forward_inception_v3() diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py index e6a462bd737e..9464e865b4e1 100644 --- a/tests/python/relay/test_any.py +++ b/tests/python/relay/test_any.py @@ -645,7 +645,7 @@ def test_arange_with_dynamic_shape(): def verify_any_strided_slice(data_shape, begin_shape, end_shape, strides_shape, data_np_shape): - mod = relay.Module() + mod = tvm.IRModule() data = relay.var('data', shape=data_shape, dtype='float32') begin = relay.var('begin', shape=begin_shape, dtype="int32") end = relay.var('end', shape=end_shape, dtype="int32") diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index df3091f3021d..0cee8a0f391f 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -288,13 +288,13 @@ def verify_nms(x0_data, x1_data, x2_data, dshape, ref_res, ref_indices_res, check_type_only=False): x0 = relay.var("x0", relay.ty.TensorType(dshape, "float32")) x1 = relay.var("x1", relay.ty.TensorType((dshape[0],), "int32")) - x2 = relay.var("x2", relay.ty.TensorType((dshape[0],dshape[1]), "int32")) + x2 = relay.var("x2", relay.ty.TensorType((dshape[0], dshape[1]), "int32")) z = relay.vision.non_max_suppression(x0, x1, x2, max_output_size=-1, \ iou_threshold=iou_threshold, force_suppress=force_suppress, \ top_k=top_k, return_indices=False) z_indices = relay.vision.non_max_suppression(x0, x1, x2, max_output_size=-1, \ iou_threshold=iou_threshold, force_suppress=force_suppress, \ - top_k=top_k) + top_k=top_k, return_indices=True) if isinstance(z_indices, relay.expr.TupleWrapper): z_indices = z_indices.astuple() assert "iou_threshold" in z.astext() @@ -340,7 +340,7 @@ def verify_nms(x0_data, x1_data, x2_data, dshape, ref_res, ref_indices_res, np_indices_result = np.array([[3, 0, -1, -1, -1]]) num_anchors = 5 - dshape = (tvm.var("n"), num_anchors, 6) + dshape = (te.size_var("n"), num_anchors, 6) verify_nms(np_data, np_valid_count, np_indices, dshape, np_result, np_indices_result, force_suppress=True, top_k=2, check_type_only=True) dshape = (1, num_anchors, 6) @@ -351,7 +351,7 @@ def verify_nms(x0_data, x1_data, x2_data, dshape, ref_res, ref_indices_res, [1, 0.7, 30, 60, 50, 80], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) np_indices_result = np.array([[3, 0, 1, -1, -1]]) - dshape = (tvm.var("n"), num_anchors, 6) + dshape = (te.size_var("n"), num_anchors, 6) verify_nms(np_data, np_valid_count, np_indices, dshape, np_result, np_indices_result, check_type_only=True) dshape = (1, num_anchors, 6) diff --git a/tests/python/relay/test_pass_alter_op_layout.py b/tests/python/relay/test_pass_alter_op_layout.py index bc0420f26d9b..c45b82f36602 100644 --- a/tests/python/relay/test_pass_alter_op_layout.py +++ b/tests/python/relay/test_pass_alter_op_layout.py @@ -620,7 +620,7 @@ def before(): x = relay.var("x", shape=(1, 32, 28, 28)) weight = relay.var('weight', shape=(32, 32, 3, 3)) y = relay.nn.conv2d(x, weight, channels=32, kernel_size=(3, 3), padding=(1, 1)) - y = relay.strided_slice(y, begin=[0, 16], end=[None, None]) + y = relay.strided_slice(y, begin=relay.const([0, 16], "int32"), end=relay.const([1, 32], "int32")) y = relay.Function(analysis.free_vars(y), y) return y @@ -636,7 +636,7 @@ def expected(): x = relay.layout_transform(x, "NCHW", "NCHW4c") y = relay.nn.conv2d(x, weight, channels=32, kernel_size=(3, 3), padding=(1, 1), data_layout="NCHW4c") - y = relay.strided_slice(y, begin=[0, 4], end=[None, 8]) + y = relay.strided_slice(y, begin=relay.const([0, 4], "int32"), end=relay.const([1, 8], "int32")) y = relay.layout_transform(y, "NCHW4c", "NCHW") y = relay.Function(analysis.free_vars(y), y) return y diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index fdf2430b3a02..247e6b58b709 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -455,20 +455,20 @@ def non_max_suppression(data, valid_count, indices, max_output_size=-1, sort_tensor, valid_count, indices, - tvm.const(max_output_size, dtype="int32"), - tvm.const(iou_threshold, dtype=data.dtype), - tvm.const(force_suppress, dtype="bool"), - tvm.const(top_k, dtype="int32"), - tvm.const(coord_start, dtype="int32"), - tvm.const(score_index, dtype="int32"), - tvm.const(id_index, dtype="int32"), - tvm.const(return_indices, dtype="bool"), - zero=tvm.const(0, dtype=data.dtype), - one=tvm.const(1, dtype=data.dtype)) + tvm.tir.const(max_output_size, dtype="int32"), + tvm.tir.const(iou_threshold, dtype=data.dtype), + tvm.tir.const(force_suppress, dtype="bool"), + tvm.tir.const(top_k, dtype="int32"), + tvm.tir.const(coord_start, dtype="int32"), + tvm.tir.const(score_index, dtype="int32"), + tvm.tir.const(id_index, dtype="int32"), + tvm.tir.const(return_indices, dtype="bool"), + zero=tvm.tir.const(0, dtype=data.dtype), + one=tvm.tir.const(1, dtype=data.dtype)) if return_indices: - box_indices, out_shape = hybrid_rearrange_out(box_indices, one=tvm.const(1, dtype="int32")) + box_indices, out_shape = hybrid_rearrange_out(box_indices, one=tvm.tir.const(1, dtype="int32")) return tuple([box_indices, out_shape]) if invalid_to_bottom: - out, out_shape = hybrid_rearrange_out(out, one=tvm.const(1, dtype=data.dtype)) + out, out_shape = hybrid_rearrange_out(out, one=tvm.tir.const(1, dtype=data.dtype)) return out From a4955cafa23ac5a782f004f2927cd343192474cc Mon Sep 17 00:00:00 2001 From: Yao Wang Date: Wed, 15 Apr 2020 21:28:52 +0000 Subject: [PATCH 05/22] Minor fix --- python/tvm/relay/op/strategy/generic.py | 11 +++++-- python/tvm/relay/op/vision/_vision.py | 40 +++++++++++++++++++++++++ topi/python/topi/vision/nms.py | 3 +- 3 files changed, 49 insertions(+), 5 deletions(-) diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index 0cedaa1c07f0..a843c33e3cf7 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -695,9 +695,14 @@ def _compute_nms(attrs, inputs, out_type): score_index = get_const_int(attrs.score_index) id_index = get_const_int(attrs.id_index) invalid_to_bottom = bool(get_const_int(attrs.invalid_to_bottom)) - return [topi_compute(inputs[0], inputs[1], inputs[2], max_output_size, iou_threshold, - force_suppress, top_k, coord_start, score_index, id_index, - return_indices, invalid_to_bottom)] + if return_indices: + return topi_compute(inputs[0], inputs[1], inputs[2], max_output_size, iou_threshold, + force_suppress, top_k, coord_start, score_index, id_index, + return_indices, invalid_to_bottom) + else: + return [topi_compute(inputs[0], inputs[1], inputs[2], max_output_size, iou_threshold, + force_suppress, top_k, coord_start, score_index, id_index, + return_indices, invalid_to_bottom)] return _compute_nms @override_native_generic_func("non_max_suppression_strategy") diff --git a/python/tvm/relay/op/vision/_vision.py b/python/tvm/relay/op/vision/_vision.py index 6e2008ad74c0..00d369c89c94 100644 --- a/python/tvm/relay/op/vision/_vision.py +++ b/python/tvm/relay/op/vision/_vision.py @@ -18,6 +18,10 @@ """Definition of vision ops""" from __future__ import absolute_import +import topi + +from tvm.te.hybrid import script + from .. import op as reg from .. import strategy from ..op import OpPattern @@ -40,3 +44,39 @@ # non-maximum suppression reg.register_strategy("vision.non_max_suppression", strategy.nms_strategy) reg.register_pattern("vision.non_max_suppression", OpPattern.OPAQUE) + +@script +def _get_valid_counts_shape_func(data_shape): + valid_counts_shape = output_tensor((1,), "int64") + out_tensor_shape = output_tensor((data_shape.shape[0],), "int64") + out_indices_shape = output_tensor((2,), "int64") + + valid_counts_shape[0] = data_shape[0] + for i in const_range(data_shape.shape[0]): + out_tensor_shape[i] = data_shape[i] + out_indices_shape[0] = data_shape[0] + out_indices_shape[1] = data_shape[1] + + return valid_counts_shape, out_tensor_shape, out_indices_shape + +@reg.register_shape_func("vision.get_valid_counts", False) +def get_valid_counts_shape_func(attrs, inputs, _): + return _get_valid_counts_shape_func(inputs[0]) + +@script +def _nms_shape_func(data_shape): + out_shape = output_tensor((2,), "int64") + count_shape = output_tensor((2,), "int64") + + out_shape[0] = data_shape[0] + out_shape[1] = data_shape[1] + count_shape[0] = data_shape[0] + count_shape[1] = int64(1) + return out_shape, count_shape + +@reg.register_shape_func("vision.non_max_suppression", False) +def nms_shape_func(attrs, inputs, _): + if attrs.return_indices: + return _nms_shape_func(inputs[0]) + else: + return [topi.math.identity(inputs[0])] diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 247e6b58b709..96a89207d9fa 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -466,8 +466,7 @@ def non_max_suppression(data, valid_count, indices, max_output_size=-1, zero=tvm.tir.const(0, dtype=data.dtype), one=tvm.tir.const(1, dtype=data.dtype)) if return_indices: - box_indices, out_shape = hybrid_rearrange_out(box_indices, one=tvm.tir.const(1, dtype="int32")) - return tuple([box_indices, out_shape]) + return hybrid_rearrange_out(box_indices, one=tvm.tir.const(1, dtype="int32")) if invalid_to_bottom: out, out_shape = hybrid_rearrange_out(out, one=tvm.tir.const(1, dtype=data.dtype)) From e40216453b0e2bc53344c76c7280fbb1cb8ecef3 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Fri, 17 Apr 2020 00:32:01 +0800 Subject: [PATCH 06/22] fix for fuse --- python/tvm/relay/op/_transform.py | 16 +++++++++++++--- python/tvm/relay/op/strategy/generic.py | 7 +++---- python/tvm/relay/op/vision/_vision.py | 3 +-- python/tvm/relay/op/vision/nms.py | 2 +- src/relay/op/tensor/transform.cc | 6 ++++-- src/relay/transforms/fuse_ops.cc | 5 +++++ tests/python/relay/test_op_level5.py | 14 ++++++-------- topi/tests/python/test_topi_vision.py | 3 ++- 8 files changed, 35 insertions(+), 21 deletions(-) diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index 7d81ab13f6db..378911a27e58 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -102,11 +102,21 @@ def arange_shape_func(attrs, inputs, _): return [_arange_shape_func(*inputs)] @script -def _strided_slice_shape_func(data_shape, begin, end, strides): - ndim = len(data_shape.shape) +def _strided_slice_shape_func(data, begin, end, strides): + ndim = len(data.shape) out = output_tensor((ndim,), "int64") for i in const_range(ndim): - out[i] = int64(ceil_div((int64(end[i]) - int64(begin[i])), int64(strides[i]))) + cbegin = 0 + cend = data.shape[i] + cstride = 1 + if len(begin) > i: + cbegin = begin[i] + if len(end) > i: + cend = end[i] + if len(strides) > i: + cstride = strides[i] + assert cstride != 0, "Strides can't be zero." + out[i] = int64(ceil_div((int64(cend) - int64(cbegin)), int64(cstride))) return out @_reg.register_shape_func("strided_slice", True) diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index a843c33e3cf7..de808d1edbf4 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -699,10 +699,9 @@ def _compute_nms(attrs, inputs, out_type): return topi_compute(inputs[0], inputs[1], inputs[2], max_output_size, iou_threshold, force_suppress, top_k, coord_start, score_index, id_index, return_indices, invalid_to_bottom) - else: - return [topi_compute(inputs[0], inputs[1], inputs[2], max_output_size, iou_threshold, - force_suppress, top_k, coord_start, score_index, id_index, - return_indices, invalid_to_bottom)] + return [topi_compute(inputs[0], inputs[1], inputs[2], max_output_size, iou_threshold, + force_suppress, top_k, coord_start, score_index, id_index, + return_indices, invalid_to_bottom)] return _compute_nms @override_native_generic_func("non_max_suppression_strategy") diff --git a/python/tvm/relay/op/vision/_vision.py b/python/tvm/relay/op/vision/_vision.py index 00d369c89c94..094671c74284 100644 --- a/python/tvm/relay/op/vision/_vision.py +++ b/python/tvm/relay/op/vision/_vision.py @@ -78,5 +78,4 @@ def _nms_shape_func(data_shape): def nms_shape_func(attrs, inputs, _): if attrs.return_indices: return _nms_shape_func(inputs[0]) - else: - return [topi.math.identity(inputs[0])] + return [topi.math.identity(inputs[0])] diff --git a/python/tvm/relay/op/vision/nms.py b/python/tvm/relay/op/vision/nms.py index d599ea55d0ba..38dcbe5452be 100644 --- a/python/tvm/relay/op/vision/nms.py +++ b/python/tvm/relay/op/vision/nms.py @@ -134,5 +134,5 @@ def non_max_suppression(data, return_indices, invalid_to_bottom) if return_indices: - return TupleWrapper(out, 2) + return expr.TupleWrapper(out, 2) return out diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index b464330c0bb9..a6d7e3a6730a 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1799,8 +1799,10 @@ bool StridedSliceRel(const Array& types, // Require concrete integer as symbolic inference of min/max // can get complicated and not very helpful. const int64_t* p_dim_size = tir::as_const_int(dshape[i]); - CHECK(p_dim_size) - << "strided_slice requires sliced dimension to be concrete int"; + if (!p_dim_size) { + oshape[i] = dshape[i]; + continue; + } int64_t dim_size = p_dim_size[0]; begin_v = (begin_v < 0) ? dim_size + begin_v : begin_v; end_v = (end_v < 0) ? dim_size + end_v : end_v; diff --git a/src/relay/transforms/fuse_ops.cc b/src/relay/transforms/fuse_ops.cc index 13bfa7bb28de..566f1424e3ea 100644 --- a/src/relay/transforms/fuse_ops.cc +++ b/src/relay/transforms/fuse_ops.cc @@ -252,6 +252,10 @@ class IndexedForwardGraph::Creator : private ExprVisitor { if (call->attrs.as()) { bool is_dyn{false}; for (auto arg : call->args) { + if (!arg.as()) { + is_dyn = true; + break; + } auto arg_tt = arg->checked_type().as(); if (arg_tt) { for (auto dim : arg_tt->shape) { @@ -266,6 +270,7 @@ class IndexedForwardGraph::Creator : private ExprVisitor { op_pattern = kInjective; } } + node->pattern = op_pattern; this->Update(call->op, nullptr, kOpaque); const auto* rtype = call->checked_type().as(); diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index 0cee8a0f391f..c20a66729712 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -317,15 +317,13 @@ def verify_nms(x0_data, x1_data, x2_data, dshape, ref_res, ref_indices_res, intrp1 = relay.create_executor("graph", ctx=ctx, target=target) op_res1 = intrp1.evaluate(func)(x0_data, x1_data, x2_data) tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5) - if top_k == -1: - op_indices_res1 = intrp1.evaluate(func_indices)(x0_data, x1_data, x2_data) - tvm.testing.assert_allclose(op_indices_res1[0].asnumpy(), ref_indices_res, rtol=1e-5) + op_indices_res1 = intrp1.evaluate(func_indices)(x0_data, x1_data, x2_data) + tvm.testing.assert_allclose(op_indices_res1[0].asnumpy(), ref_indices_res, rtol=1e-5) intrp2 = relay.create_executor("debug", ctx=ctx, target=target) op_res2 = intrp2.evaluate(func)(x0_data, x1_data, x2_data) tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5) - if top_k == -1: - op_indices_res2 = intrp2.evaluate(func_indices)(x0_data, x1_data, x2_data) - tvm.testing.assert_allclose(op_indices_res2[0].asnumpy(), ref_indices_res, rtol=1e-5) + op_indices_res2 = intrp2.evaluate(func_indices)(x0_data, x1_data, x2_data) + tvm.testing.assert_allclose(op_indices_res2[0].asnumpy(), ref_indices_res, rtol=1e-5) np_data = np.array([[[0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80], [0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79], @@ -337,7 +335,7 @@ def verify_nms(x0_data, x1_data, x2_data, dshape, ref_res, ref_indices_res, np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) - np_indices_result = np.array([[3, 0, -1, -1, -1]]) + np_indices_result = np.array([[4, 0, -1, -1, -1]]) num_anchors = 5 dshape = (te.size_var("n"), num_anchors, 6) @@ -350,7 +348,7 @@ def verify_nms(x0_data, x1_data, x2_data, dshape, ref_res, ref_indices_res, np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) - np_indices_result = np.array([[3, 0, 1, -1, -1]]) + np_indices_result = np.array([[4, 0, 1, -1, -1]]) dshape = (te.size_var("n"), num_anchors, 6) verify_nms(np_data, np_valid_count, np_indices, dshape, np_result, np_indices_result, check_type_only=True) diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index 072fa8eb7e32..d2331ee0c7f7 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -153,7 +153,8 @@ def check_device(device): coord_start=coord_start, score_index=score_index, id_index=id_index, return_indices=False) indices_out = fcompute(data, valid_count, indices, -1, iou_threshold, force_suppress, top_k, - coord_start=coord_start, score_index=score_index, id_index=id_index) + coord_start=coord_start, score_index=score_index, id_index=id_index, + return_indices=True) s = fschedule(out) indices_s = fschedule(indices_out) From b40a5cf5dba0ae674a6679cfbb7e30d209d3728c Mon Sep 17 00:00:00 2001 From: Yao Wang Date: Thu, 16 Apr 2020 17:35:04 +0000 Subject: [PATCH 07/22] Workaround to pass batch_size into hybrid function to handle dynamic shape --- topi/python/topi/vision/nms.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 96a89207d9fa..f427b2276994 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -24,7 +24,7 @@ @hybrid.script -def hybrid_rearrange_out(data, one): +def hybrid_rearrange_out(data, one, batch_size): """Hybrid routine to rearrange nms output to move all valid entries to top. @@ -39,6 +39,10 @@ def hybrid_rearrange_out(data, one): one: tvm.tir.const Constant one with the same dtype as data. + batch_size: tvm.tir.IntImm or tvm.tir.Var + Batch size. We need to pass it in since hybrid script doesn't support + binding variable to symbolic dim. + Returns ------- output : tvm.te.Tensor or numpy NDArray @@ -51,7 +55,6 @@ def hybrid_rearrange_out(data, one): the valid number of boxes. """ ndim = len(data.shape) - batch_size = data.shape[0] num_anchors = data.shape[1] valid_box_count = output_tensor((batch_size, 1), "int32") output = output_tensor((batch_size, num_anchors), data.dtype) @@ -88,7 +91,7 @@ def hybrid_rearrange_out(data, one): @hybrid.script -def hybrid_get_valid_counts(data, score_threshold, id_index, score_index, one): +def hybrid_get_valid_counts(data, score_threshold, id_index, score_index, one, batch_size): """Hybrid routine to get valid count of bounding boxes given a score threshold. Also moves valid boxes to the top of input data. @@ -111,6 +114,10 @@ def hybrid_get_valid_counts(data, score_threshold, id_index, score_index, one): one: tvm.tir.const Constant one with the same dtype as data. + batch_size: tvm.tir.IntImm or tvm.tir.Var + Batch size. We need to pass it in since hybrid script doesn't support + binding variable to symbolic dim. + Returns ------- valid_count : tvm.te.Tensor or numpy NDArray @@ -122,7 +129,6 @@ def hybrid_get_valid_counts(data, score_threshold, id_index, score_index, one): out_indices: tvm.te.Tensor or numpy NDArray Related index in input data. """ - batch_size = data.shape[0] num_anchors = data.shape[1] box_data_length = data.shape[2] valid_count = output_tensor((batch_size,), "int32") @@ -183,11 +189,12 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): score_index_const = tvm.tir.const(score_index, "int32") return hybrid_get_valid_counts(data, score_threshold_const, id_index_const, score_index_const, - tvm.tir.const(1, data.dtype)) + tvm.tir.const(1, data.dtype), + data.shape[0]) @hybrid.script -def hybrid_nms(data, sorted_index, valid_count, indices, max_output_size, +def hybrid_nms(data, sorted_index, valid_count, indices, batch_size, max_output_size, iou_threshold, force_suppress, top_k, coord_start, score_index, id_index, return_indices, zero, one): """Hybrid routing for non-maximum suppression. @@ -208,6 +215,10 @@ def hybrid_nms(data, sorted_index, valid_count, indices, max_output_size, indices : tvm.te.Tensor or numpy.NDArray indices in original tensor, with shape [batch_size, num_anchors] + batch_size: tvm.tir.IntImm or tvm.tir.Var + Batch size. We need to pass it in since hybrid script doesn't support + binding variable to symbolic dim. + max_output_size : tvm.tir.const Max number of output valid boxes for each instance. By default all valid boxes are returned. @@ -249,7 +260,6 @@ def hybrid_nms(data, sorted_index, valid_count, indices, max_output_size, 2-D tensor with shape [batch_size, num_anchors]. """ - batch_size = data.shape[0] num_anchors = data.shape[1] box_data_length = data.shape[2] @@ -455,6 +465,7 @@ def non_max_suppression(data, valid_count, indices, max_output_size=-1, sort_tensor, valid_count, indices, + batch_size, tvm.tir.const(max_output_size, dtype="int32"), tvm.tir.const(iou_threshold, dtype=data.dtype), tvm.tir.const(force_suppress, dtype="bool"), @@ -466,8 +477,10 @@ def non_max_suppression(data, valid_count, indices, max_output_size=-1, zero=tvm.tir.const(0, dtype=data.dtype), one=tvm.tir.const(1, dtype=data.dtype)) if return_indices: - return hybrid_rearrange_out(box_indices, one=tvm.tir.const(1, dtype="int32")) + return hybrid_rearrange_out(box_indices, one=tvm.tir.const(1, dtype="int32"), + batch_size=batch_size) if invalid_to_bottom: - out, out_shape = hybrid_rearrange_out(out, one=tvm.tir.const(1, dtype=data.dtype)) + out, _ = hybrid_rearrange_out(out, one=tvm.tir.const(1, dtype=data.dtype), + batch_size=batch_size) return out From bd67550477e5209435e4e8e6266e39a112f9ad42 Mon Sep 17 00:00:00 2001 From: Yao Wang Date: Thu, 16 Apr 2020 19:10:32 +0000 Subject: [PATCH 08/22] Seperate rearrange --- topi/python/topi/vision/nms.py | 89 ++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 32 deletions(-) diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index f427b2276994..ff865ce988c0 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -22,9 +22,52 @@ from tvm.te import hybrid from ..sort import argsort +@hybrid.script +def hybrid_rearrange_box_out(data, one, batch_size): + """Hybrid routine to rearrange nms output to + move all valid entries to top. + + Parameters + ---------- + data : tvm.te.Tensor or numpy NDArray + NMS output. 3-D tensor with shape + [batch_size, num_anchors, 6]. + + one: tvm.tir.const + Constant one with the same dtype as data. + + batch_size: tvm.tir.IntImm or tvm.tir.Var + Batch size. We need to pass it in since hybrid script doesn't support + binding variable to symbolic dim. + + Returns + ------- + output : tvm.te.Tensor or numpy NDArray + Transformed NMS output. 3-D tensor with shape + [batch_size, num_anchors, 6]. + """ + num_anchors = data.shape[1] + elem_length = data.shape[2] + output = output_tensor((batch_size, + num_anchors, + elem_length), + data.dtype) + + for i in parallel(batch_size): + valid_idx = 0 + for j in range(num_anchors): + if data[i, j, 0] >= 0: + for k in range(elem_length): + output[i, valid_idx, k] = data[i, j, k] + valid_idx += 1 + if j >= valid_idx: + for k in range(elem_length): + output[i, j, k] = -one + return output + @hybrid.script -def hybrid_rearrange_out(data, one, batch_size): +def hybrid_rearrange_indices_out(data, one, batch_size): """Hybrid routine to rearrange nms output to move all valid entries to top. @@ -46,45 +89,27 @@ def hybrid_rearrange_out(data, one, batch_size): Returns ------- output : tvm.te.Tensor or numpy NDArray - Transformed NMS output. 3-D tensor with shape - [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5], - or 2-D tensor with shape [batch_size, num_anchors]. + 2-D tensor with shape [batch_size, num_anchors]. valid_box_count : tvm.te.Tensor or numpy NDArray Tensor with shape [batch_size, 1], indicates the valid number of boxes. """ - ndim = len(data.shape) num_anchors = data.shape[1] valid_box_count = output_tensor((batch_size, 1), "int32") output = output_tensor((batch_size, num_anchors), data.dtype) - if ndim > 2: - output = output_tensor((batch_size, - num_anchors, - data.shape[2]), - data.dtype) for i in parallel(batch_size): valid_idx = 0 for j in range(num_anchors): - if ndim > 2: - elem_length = data.shape[2] - if data[i, j, 0] >= 0: - for k in range(elem_length): - output[i, valid_idx, k] = data[i, j, k] - valid_idx += 1 - if j >= valid_idx: - for k in range(elem_length): - output[i, j, k] = -one - else: - if data[i, j] >= 0: - output[i, valid_idx] = data[i, j] - valid_idx += 1 - if data[i, j] > num_anchors or data[i, j] < -num_anchors: - output[i, valid_idx] = 0 - valid_idx += 1 - if j >= valid_idx: - output[i, j] = -one + if data[i, j] >= 0: + output[i, valid_idx] = data[i, j] + valid_idx += 1 + if data[i, j] > num_anchors or data[i, j] < -num_anchors: + output[i, valid_idx] = 0 + valid_idx += 1 + if j >= valid_idx: + output[i, j] = -one valid_box_count[i, 0] = valid_idx return output, valid_box_count @@ -477,10 +502,10 @@ def non_max_suppression(data, valid_count, indices, max_output_size=-1, zero=tvm.tir.const(0, dtype=data.dtype), one=tvm.tir.const(1, dtype=data.dtype)) if return_indices: - return hybrid_rearrange_out(box_indices, one=tvm.tir.const(1, dtype="int32"), - batch_size=batch_size) + return hybrid_rearrange_indices_out(box_indices, one=tvm.tir.const(1, dtype="int32"), + batch_size=batch_size) if invalid_to_bottom: - out, _ = hybrid_rearrange_out(out, one=tvm.tir.const(1, dtype=data.dtype), - batch_size=batch_size) + out = hybrid_rearrange_box_out(out, one=tvm.tir.const(1, dtype=data.dtype), + batch_size=batch_size) return out From 70051b571279dc0126e1c18726470205d598245d Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Fri, 17 Apr 2020 05:37:22 +0800 Subject: [PATCH 09/22] fix lint --- topi/python/topi/cuda/nms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/topi/python/topi/cuda/nms.py b/topi/python/topi/cuda/nms.py index 255cf6fc30f7..2a206f6cbe68 100644 --- a/topi/python/topi/cuda/nms.py +++ b/topi/python/topi/cuda/nms.py @@ -448,4 +448,4 @@ def non_max_suppression(data, valid_count, indices, max_output_size=-1, if return_indices: return box_indices - return out \ No newline at end of file + return out From c142880eafc8d377de4f2eeae3221bdef1f1b8da Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 21 Apr 2020 05:19:28 +0800 Subject: [PATCH 10/22] fix ci, comments --- include/tvm/relay/attrs/transform.h | 3 + include/tvm/relay/attrs/vision.h | 3 - python/tvm/relay/frontend/tensorflow.py | 5 +- python/tvm/relay/op/_transform.py | 11 +-- python/tvm/relay/op/transform.py | 31 ++++++-- src/relay/op/tensor/transform.cc | 79 ++++++++++++------- src/relay/op/vision/nms.cc | 12 +-- .../transforms/combine_parallel_conv2d.cc | 15 ++-- src/relay/transforms/fuse_ops.cc | 22 ------ src/relay/transforms/pattern_util.h | 2 +- tests/python/relay/test_any.py | 5 +- tests/python/relay/test_op_level2.py | 2 +- tests/python/relay/test_op_level4.py | 26 +++--- .../python/relay/test_pass_alter_op_layout.py | 41 +++++++--- .../test_pass_combine_parallel_conv2d.py | 62 +++++++++++---- .../topi/testing/strided_slice_python.py | 7 +- topi/python/topi/vision/nms.py | 33 +++++--- 17 files changed, 221 insertions(+), 138 deletions(-) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index 4f0c90ec4f4a..a4c3d0194b22 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -213,6 +213,7 @@ struct StridedSliceAttrs : public tvm::AttrsNode { Expr begin; Expr end; Expr strides; + bool ignore_end; TVM_DECLARE_ATTRS(StridedSliceAttrs, "relay.attrs.StridedSliceAttrs") { TVM_ATTR_FIELD(begin) @@ -221,6 +222,8 @@ struct StridedSliceAttrs : public tvm::AttrsNode { .describe("Indices for end of slice, end index is exclusive"); TVM_ATTR_FIELD(strides) .describe("Stride values of the slice"); + TVM_ATTR_FIELD(ignore_end).set_default(false) + .describe("Whether to ignore the input end and infer value of end from input data"); } }; diff --git a/include/tvm/relay/attrs/vision.h b/include/tvm/relay/attrs/vision.h index 3edd23f34494..52669ea651ee 100644 --- a/include/tvm/relay/attrs/vision.h +++ b/include/tvm/relay/attrs/vision.h @@ -103,9 +103,6 @@ struct NonMaximumSuppressionAttrs : public tvm::AttrsNode 4 else None + score_threshold = np.atleast_1d(inputs[4].data.asnumpy())[0] if len(inputs) > 4 else 0.0 # Generate data with shape (1, num_anchors, 5) scores = AttrCvt(op_name="expand_dims", diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index 378911a27e58..40fa1caf8943 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -102,18 +102,18 @@ def arange_shape_func(attrs, inputs, _): return [_arange_shape_func(*inputs)] @script -def _strided_slice_shape_func(data, begin, end, strides): +def _strided_slice_shape_func(data, begin, end, strides, ignore_end): ndim = len(data.shape) out = output_tensor((ndim,), "int64") for i in const_range(ndim): cbegin = 0 cend = data.shape[i] cstride = 1 - if len(begin) > i: + if begin.shape[0] > i: cbegin = begin[i] - if len(end) > i: + if ignore_end != 0 or end.shape[0] > i: cend = end[i] - if len(strides) > i: + if strides.shape[0] > i: cstride = strides[i] assert cstride != 0, "Strides can't be zero." out[i] = int64(ceil_div((int64(cend) - int64(cbegin)), int64(cstride))) @@ -121,7 +121,8 @@ def _strided_slice_shape_func(data, begin, end, strides): @_reg.register_shape_func("strided_slice", True) def strided_slice_shape_func(attrs, inputs, _): - return [_strided_slice_shape_func(*inputs)] + ignore_end = attrs.ignore_end + return [_strided_slice_shape_func(*inputs, convert(get_const_int(ignore_end)))] @script def _concatenate_shape_func(inputs, axis): diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index 44b8c1c03f9d..6033aae3e960 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -611,7 +611,7 @@ def split(data, indices_or_sections, axis=0): return TupleWrapper(_make.split(data, indices_or_sections, axis), ret_size) -def strided_slice(data, begin, end, strides=None): +def strided_slice(data, begin, end, strides=None, ignore_end=False): """Strided slice of an array. Parameters @@ -619,23 +619,32 @@ def strided_slice(data, begin, end, strides=None): data : relay.Expr The source array to be sliced. - begin: relay.Expr + begin: relay.Expr or List[int] The indices to begin with in the slicing. - end: relay.Expr + end: relay.Expr or List[int] Indices indicating end of the slice. - strides: relay.Expr, optional + strides: relay.Expr or List[int], optional Specifies the stride values, it can be negative in that case, the input tensor will be reversed in that particular axis. + ignore_end: boolean, optional + Whether to ignore input end. + Returns ------- ret : relay.Expr The computed result. """ strides = strides or const([1], dtype="int32") - return _make.strided_slice(data, begin, end, strides) + if isinstance(begin, list): + begin = const(list(begin)) + if isinstance(end, list): + end = const(list(end)) + if isinstance(strides, list): + strides = const(list(strides)) + return _make.strided_slice(data, begin, end, strides, ignore_end) def strided_set(data, v, begin, end, strides=None): @@ -649,13 +658,13 @@ def strided_set(data, v, begin, end, strides=None): v : relay.Expr The data to be set. - begin: relay.Expr + begin: relay.Expr or List[int] The indices to begin with in the slicing. - end: relay.Expr + end: relay.Expr or List[int] Indices indicating end of the slice. - strides: relay.Expr, optional + strides: relay.Expr or List[int], optional Specifies the stride values, it can be negative in that case, the input tensor will be reversed in that particular axis. @@ -665,6 +674,12 @@ def strided_set(data, v, begin, end, strides=None): The computed result. """ strides = strides or const([1], dtype="int32") + if isinstance(begin, list): + begin = const(list(begin)) + if isinstance(end, list): + end = const(list(end)) + if isinstance(strides, list): + strides = const(list(strides)) return _make.strided_set(data, v, begin, end, strides) diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index a6d7e3a6730a..27c459e76b33 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1096,11 +1096,11 @@ bool ArangeRel(const Array& types, int num_inputs, const Attrs& raw_attrs, } inline te::Tensor DynamicArange(const te::Tensor& start, - const te::Tensor& stop, - const te::Tensor& step, - tvm::DataType dtype, - std::string name = "T_arange_dynamic", - std::string tag = topi::kInjective) { + const te::Tensor& stop, + const te::Tensor& step, + tvm::DataType dtype, + std::string name = "T_arange_dynamic", + std::string tag = topi::kInjective) { tvm::PrimExpr num_elem = tvm::tir::Var("num_elem"); return te::compute( {num_elem}, @@ -1774,10 +1774,18 @@ bool StridedSliceRel(const Array& types, std::vector end_vec; int64_t* end_val = ToVector(cend->data); for (int64_t i = 0; i < cend->data.Shape().front(); ++i) { - end_vec.push_back(end_val[i]); + if (param->ignore_end) { + end_vec.push_back(max_range); + } else { + end_vec.push_back(end_val[i]); + } } for (int64_t i = end_vec.size(); i < num_axis; ++i) { - end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); + if (param->ignore_end) { + end_vec.push_back(max_range); + } else { + end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); + } } for (int64_t i = 0; i < num_axis; ++i) { @@ -1810,7 +1818,7 @@ bool StridedSliceRel(const Array& types, int64_t slice_range, step; if (stride_v < 0) { if (end_v < -1) end_v = -1; - CHECK_LT(end_v, begin_v) + CHECK_LE(end_v, begin_v) << "strided_slice get empty slice at axis " << i; begin_v = std::min(dim_size - 1, begin_v); slice_range = begin_v - end_v; @@ -1861,9 +1869,10 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, CHECK(params != nullptr); Array begin, end, strides; const ConstantNode *cbegin, *cend, *cstrides; - if ((cbegin = params->begin.as()) && - (cend = params->end.as()) && - (cstrides = params->strides.as())) { + cbegin = params->begin.as(); + cend = params->end.as(); + cstrides = params->strides.as(); + if (cbegin && cend && cstrides) { int64_t* strides_val = ToVector(cstrides->data); for (int64_t i = 0; i < cstrides->data.Shape().front(); ++i) { strides.push_back(strides_val[i]); @@ -1923,22 +1932,25 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, DataType::Int(64), ctx); int64_t* begin_data = static_cast(begin_ndarray->data); int64_t* end_data = static_cast(end_ndarray->data); + int64_t* strides_data = static_cast(strides_ndarray->data); for (size_t i = 0; i < new_begin.size(); ++i) { begin_data[i] = new_begin[i]; end_data[i] = new_end[i]; + strides_data[i] = 1; } params->begin = Constant(begin_ndarray); params->end = Constant(end_ndarray); + params->strides = Constant(strides_ndarray); } return {{layout, Layout("C"), Layout("C"), Layout("C")}, {layout}}; } inline te::Tensor DynamicStridedSlice(const te::Tensor& input, - const te::Tensor& begin, - const te::Tensor& end, - const te::Tensor& strides, - std::string name = "T_strided_slice_dynamic", - std::string tag = topi::kInjective) { + const te::Tensor& begin, + const te::Tensor& end, + const te::Tensor& strides, + std::string name = "T_strided_slice_dynamic", + std::string tag = topi::kInjective) { int64_t src_tensor_dim = input->shape.size(); Array out_shape; for (int64_t i = 0; i < src_tensor_dim; ++i) { @@ -1984,6 +1996,11 @@ Array StridedSliceCompute(const Attrs& attrs, const Arrayshape[0].as()->value == data->shape.size() + && end->shape[0].as()->value == data->shape.size() + && strides->shape[0].as()->value == data->shape.size()) + << "begin, end, and strides are required to have the same length" + << " if they are non-constant."; return Array{ DynamicStridedSlice(data, begin, end, strides) }; @@ -1994,11 +2011,13 @@ Array StridedSliceCompute(const Attrs& attrs, const Array(); attrs->begin = begin; attrs->end = end; attrs->strides = strides; + attrs->ignore_end = ignore_end; static const Op& op = Op::Get("strided_slice"); return Call(op, {data, begin, end, strides}, Attrs(attrs), {}); } @@ -2031,19 +2050,19 @@ Examples:: [[ 5., 6.], [ 7., 8.]]] )code" TVM_ADD_FILELINE) -.set_num_inputs(4) -.add_argument("data", "Tensor", "The input tensor.") -.add_argument("begin", "Tensor", "The indices to begin with in the slicing.") -.add_argument("end", "Tensor", "Indices indicating end of the slice.") -.add_argument("strides", "Tensor", "The stride values.") -.set_support_level(4) -.set_attrs_type() -.add_type_rel("StridedSlice", StridedSliceRel) -.set_attr("FTVMCompute", StridedSliceCompute) -// TODO(@icemelon, @yongwww): Change to kOpaque because FuseOps doesn't consider dynamic shape -.set_attr("TOpPattern", kOpaque) -.set_attr("AnyCodegenStrategy", kVariableDimensions) -.set_attr("FInferCorrectLayout", StridedSliceInferCorrectLayout); + .set_num_inputs(4) + .add_argument("data", "Tensor", "The input tensor.") + .add_argument("begin", "Tensor", "The indices to begin with in the slicing.") + .add_argument("end", "Tensor", "Indices indicating end of the slice.") + .add_argument("strides", "Tensor", "The stride values.") + .add_argument("ignore_end", "Tensor", "Whether to ignore end.") + .set_support_level(4) + .set_attrs_type() + .add_type_rel("StridedSlice", StridedSliceRel) + .set_attr("FTVMCompute", StridedSliceCompute) + .set_attr("TOpPattern", kInjective) + .set_attr("AnyCodegenStrategy", kVariableDimensions) + .set_attr("FInferCorrectLayout", StridedSliceInferCorrectLayout); // strided_set bool StridedSetRel(const Array& types, int num_inputs, const Attrs& attrs, diff --git a/src/relay/op/vision/nms.cc b/src/relay/op/vision/nms.cc index 4002820d0c15..e51432931605 100644 --- a/src/relay/op/vision/nms.cc +++ b/src/relay/op/vision/nms.cc @@ -133,12 +133,12 @@ be in the format of [class_id, score, left, top, right, bottom] or [score, left, top, right, bottom]. Set id_index to be -1 to ignore class_id axis. )doc" TVM_ADD_FILELINE) -.set_num_inputs(3) -.add_argument("data", "Tensor", "Input data.") -.add_argument("valid_count", "Tensor", "Number of valid anchor boxes.") -.add_argument("indices", "Tensor", "Corresponding indices in original input tensor.") -.set_support_level(5) -.add_type_rel("NMS", NMSRel); + .set_num_inputs(3) + .add_argument("data", "Tensor", "Input data.") + .add_argument("valid_count", "Tensor", "Number of valid anchor boxes.") + .add_argument("indices", "Tensor", "Corresponding indices in original input tensor.") + .set_support_level(5) + .add_type_rel("NMS", NMSRel); } // namespace relay } // namespace tvm diff --git a/src/relay/transforms/combine_parallel_conv2d.cc b/src/relay/transforms/combine_parallel_conv2d.cc index 6c53b6706fc9..d17e2fcd9655 100644 --- a/src/relay/transforms/combine_parallel_conv2d.cc +++ b/src/relay/transforms/combine_parallel_conv2d.cc @@ -167,6 +167,7 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { void UpdateGroupOutput(const Expr& data, const Group& branches, size_t depth, ExprSubstMap* subst_map) { int64_t index = 0; + for (const auto& branch : branches) { const CallNode* conv2d = branch[0]; int64_t channels = GetConv2DSuperChannelsDim(conv2d); @@ -174,13 +175,11 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { Array end; for (size_t i = 0; i < channel_pos_; i++) { begin.push_back(0); - end.push_back(NullValue()); + end.push_back(channels); } begin.push_back(index); index += channels; end.push_back(index); - - DLContext ctx; ctx.device_type = kDLCPU; ctx.device_id = 0; @@ -190,18 +189,22 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { DataType::Int(64), ctx); auto strides_ndarray = runtime::NDArray::Empty({int64_t(begin.size())}, DataType::Int(64), ctx); - int64_t* begin_data = static_cast(begin_ndarray->data); - int64_t* end_data = static_cast(end_ndarray->data); + + auto* begin_data = static_cast(begin_ndarray->data); + auto* end_data = static_cast(end_ndarray->data); + auto* strides_data = static_cast(strides_ndarray->data); for (size_t i = 0; i < begin.size(); ++i) { begin_data[i] = begin[i]; end_data[i] = end[i]; + strides_data[i] = 1; } auto slice = MakeStridedSlice(data, Constant(begin_ndarray), Constant(end_ndarray), - Constant(strides_ndarray)); + Constant(strides_ndarray), + false); subst_map->insert({GetRef(branch[depth]), slice}); } } diff --git a/src/relay/transforms/fuse_ops.cc b/src/relay/transforms/fuse_ops.cc index 566f1424e3ea..01f1eeea30b3 100644 --- a/src/relay/transforms/fuse_ops.cc +++ b/src/relay/transforms/fuse_ops.cc @@ -249,28 +249,6 @@ class IndexedForwardGraph::Creator : private ExprVisitor { this->Update(call->op, node, kOpaque); } - if (call->attrs.as()) { - bool is_dyn{false}; - for (auto arg : call->args) { - if (!arg.as()) { - is_dyn = true; - break; - } - auto arg_tt = arg->checked_type().as(); - if (arg_tt) { - for (auto dim : arg_tt->shape) { - if (dim.as()) { - is_dyn = true; - } - } - } - if (is_dyn) break; - } - if (!is_dyn) { - op_pattern = kInjective; - } - } - node->pattern = op_pattern; this->Update(call->op, nullptr, kOpaque); const auto* rtype = call->checked_type().as(); diff --git a/src/relay/transforms/pattern_util.h b/src/relay/transforms/pattern_util.h index 8964959bfcfd..89f29fcc0cce 100644 --- a/src/relay/transforms/pattern_util.h +++ b/src/relay/transforms/pattern_util.h @@ -673,7 +673,7 @@ Expr MakeConcatenate(Expr data, int axis); Expr MakeRepeat(Expr data, int repeats, int axis); -Expr MakeStridedSlice(Expr data, Expr begin, Expr end, Expr strides); +Expr MakeStridedSlice(Expr data, Expr begin, Expr end, Expr strides, bool ignore_end); Expr MakeStack(Expr data, int axis); diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py index 9464e865b4e1..e5a7521ce721 100644 --- a/tests/python/relay/test_any.py +++ b/tests/python/relay/test_any.py @@ -644,13 +644,13 @@ def test_arange_with_dynamic_shape(): tvm.testing.assert_allclose(result.asnumpy(), np.array(range(10)).astype("int32")+1) def verify_any_strided_slice(data_shape, begin_shape, end_shape, - strides_shape, data_np_shape): + strides_shape, data_np_shape, ignore_end=False): mod = tvm.IRModule() data = relay.var('data', shape=data_shape, dtype='float32') begin = relay.var('begin', shape=begin_shape, dtype="int32") end = relay.var('end', shape=end_shape, dtype="int32") strides = relay.var('strides', shape=strides_shape, dtype="int32") - y = relay.strided_slice(data, begin, end, strides) + y = relay.strided_slice(data, begin, end, strides, ignore_end) mod["main"] = relay.Function([data, begin, end, strides], y) # Generate random numpy input data @@ -670,6 +670,7 @@ def test_any_strided_slice(): verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (15, 17, 21)) verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (23, 29, 41)) verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70)) + verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70), ignore_end=True) def test_recursive_concat(): diff --git a/tests/python/relay/test_op_level2.py b/tests/python/relay/test_op_level2.py index 68eced328fa8..c9a19044ea33 100644 --- a/tests/python/relay/test_op_level2.py +++ b/tests/python/relay/test_op_level2.py @@ -780,7 +780,7 @@ def _test_pool2d_int(opfunc, reffunc, dtype): x = relay.var("x", shape=dshape, dtype=dtype) y = opfunc(x, pool_size=(2, 2), strides=(2, 2), padding=(0, 0)) func = relay.Function([x], y) - data = np.random.random_integers(low=-128, high=128, size=dshape) + data = np.random.randint(low=-128, high=128, size=dshape) ref_res = reffunc(data.reshape(1,3,14,2,14,2), axis=(3,5)).astype(dtype) for target, ctx in ctx_list(): intrp1 = relay.create_executor("graph", ctx=ctx, target=target) diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index ea5ebcd6c265..4b5e19223dca 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -58,11 +58,11 @@ def check_binary_op(opfunc, ref): def test_cmp_type(): for op, ref in ((relay.greater, np.greater), - (relay.greater_equal, np.greater_equal), - (relay.less, np.less), - (relay.less_equal, np.less_equal), - (relay.equal, np.equal), - (relay.not_equal, np.not_equal)): + (relay.greater_equal, np.greater_equal), + (relay.less, np.less), + (relay.less_equal, np.less_equal), + (relay.equal, np.equal), + (relay.not_equal, np.not_equal)): x = relay.var("x", relay.TensorType((10, 4), "float32")) y = relay.var("y", relay.TensorType((5, 10, 1), "float32")) z = op(x, y) @@ -296,7 +296,8 @@ def test_mean_var_std(): def test_strided_slice(): - def verify(dshape, begin, end, strides, output, test_ref=True, dtype="int32"): + def verify(dshape, begin, end, strides, output, + ignore_end=False, test_ref=True, dtype="int32"): x = relay.var("x", relay.TensorType(dshape, "float32")) ndim = len(dshape) begin = begin if begin else [0] * ndim @@ -308,11 +309,13 @@ def verify(dshape, begin, end, strides, output, test_ref=True, dtype="int32"): z = relay.strided_slice(x, begin=begin_expr, end=end_expr, - strides=strides_expr) + strides=strides_expr, + ignore_end=ignore_end) else: z = relay.strided_slice(x, begin=begin_expr, - end=end_expr) + end=end_expr, + ignore_end=ignore_end) func = relay.Function([x], z) func = run_infer_type(func) @@ -320,6 +323,7 @@ def verify(dshape, begin, end, strides, output, test_ref=True, dtype="int32"): assert "begin=" in text assert "end=" in text + if output: assert func.body.checked_type == relay.ty.TensorType(output, "float32") @@ -333,10 +337,12 @@ def verify(dshape, begin, end, strides, output, test_ref=True, dtype="int32"): op_res = intrp.evaluate(func)(x_data) tvm.testing.assert_allclose(op_res.asnumpy(), ref_res) - verify((1, 224, 224, 3), [0, 20, 20, 0], [1, 140, 140, 3], [1, 1, 1, 1], (1, 120, 120, 3), dtype="int64") + verify((1, 224, 224, 3), [0, 20, 20, 0], [1, 140, 140, 3], [1, 1, 1, 1], + (1, 120, 120, 3), dtype="int64") verify((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], (1, 3, 3), dtype="int16") verify((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2], (3, 1, 2)) - verify((3, 4, 3), [1, 0, 0], [2, 2, 3], [1, 1, 2], (1, 2, 2)) + verify((3, 4, 3), [1, 0, 0], [2, 2, 3], [1, 1, 2], (2, 4, 2), + ignore_end=True, test_ref=False) verify((3, 4, 3), [1, 1, 0], [4, 4, 3], None, (2, 3, 3)) verify((3, 4, 3), [1, 1, 0], [4, 1000, 3], None, (2, 3, 3)) verify((3, 4, 3), [1, 1, 0], [4, 4], None, (2, 3, 3)) diff --git a/tests/python/relay/test_pass_alter_op_layout.py b/tests/python/relay/test_pass_alter_op_layout.py index c45b82f36602..ee4a27c20316 100644 --- a/tests/python/relay/test_pass_alter_op_layout.py +++ b/tests/python/relay/test_pass_alter_op_layout.py @@ -18,10 +18,11 @@ import pytest import tvm -from tvm import te from tvm import relay from tvm.relay import transform, analysis from tvm.relay.testing.temp_op_attr import TempOpAttr +from tvm.relay.testing import ctx_list, run_infer_type +import numpy as np def run_opt_pass(expr, passes): passes = passes if isinstance(passes, list) else [passes] @@ -620,7 +621,10 @@ def before(): x = relay.var("x", shape=(1, 32, 28, 28)) weight = relay.var('weight', shape=(32, 32, 3, 3)) y = relay.nn.conv2d(x, weight, channels=32, kernel_size=(3, 3), padding=(1, 1)) - y = relay.strided_slice(y, begin=relay.const([0, 16], "int32"), end=relay.const([1, 32], "int32")) + y = relay.strided_slice(y, + begin=relay.const([0, 16], "int32"), + end=relay.const([1, 33], "int32"), + strides=relay.const([1, 1], "int32")) y = relay.Function(analysis.free_vars(y), y) return y @@ -632,22 +636,41 @@ def alter_conv2d(attrs, inputs, tinfos, out_type): def expected(): x = relay.var("x", shape=(1, 32, 28, 28)) - weight = relay.var("weight") + weight = relay.var("weight", shape=(32, 32, 3, 3)) + weight = relay.layout_transform(weight, "OIHW", "OIHW4i4o") x = relay.layout_transform(x, "NCHW", "NCHW4c") - y = relay.nn.conv2d(x, weight, channels=32, kernel_size=(3, 3), padding=(1, 1), - data_layout="NCHW4c") - y = relay.strided_slice(y, begin=relay.const([0, 4], "int32"), end=relay.const([1, 8], "int32")) + y = relay.op.nn.contrib_conv2d_nchwc(x, weight, channels=32, kernel_size=(3, 3), padding=(1, 1), + data_layout="NCHW4c") + + y = relay.strided_slice(y, + begin=relay.const([0, 4], "int32"), + end=relay.const([1, 21], "int32"), # [1, 8] + strides=relay.const([1, 1], "int32")) + y = relay.layout_transform(y, "NCHW4c", "NCHW") y = relay.Function(analysis.free_vars(y), y) return y with TempOpAttr("nn.conv2d", "FTVMAlterOpLayout", alter_conv2d): a = before() - a = run_opt_pass(a, [transform.CanonicalizeOps(), - transform.AlterOpLayout()]) b = run_opt_pass(expected(), transform.InferType()) - assert tvm.ir.structural_equal(a, b), "Actual = \n" + str(a) + # Verify inference result + mod_before = tvm.IRModule() + mod_new = tvm.IRModule() + mod_before['main'] = a + mod_new['main'] = b + with relay.build_config(opt_level=3): + for target, ctx in ctx_list(): + for kind in ["graph", "debug", "vm"]: + ex_before = relay.create_executor(kind, mod=mod_before, ctx=ctx, target=target) + ex_new = relay.create_executor(kind, mod=mod_new, ctx=ctx, target=target) + np_data = np.random.uniform(size=(1, 32, 28, 28)).astype("float32") + np_weight = np.random.uniform(size=(32, 32, 3, 3)).astype("float32") + result_before = ex_before.evaluate()(np_data, np_weight) + result_new = ex_new.evaluate()(np_data, np_weight) + tvm.testing.assert_allclose(result_before.asnumpy(), result_new.asnumpy(), rtol=1e-5, atol=1e-5) + def test_alter_layout_depthwise_conv2d(): """Test depthwise_conv2d operator""" diff --git a/tests/python/relay/test_pass_combine_parallel_conv2d.py b/tests/python/relay/test_pass_combine_parallel_conv2d.py index 7f7f18598589..291000965be9 100644 --- a/tests/python/relay/test_pass_combine_parallel_conv2d.py +++ b/tests/python/relay/test_pass_combine_parallel_conv2d.py @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. import tvm -from tvm import te from tvm import relay from tvm.relay import transform +import numpy as np def run_combine_parallel(expr, min_num_branches=3): @@ -50,17 +50,25 @@ def expected(x, w1, w2, w3, w4, channels1, channels2, channels3, channels4): args = [x, w1, w2, w3, w4] w = relay.concatenate((w1, w2, w4), axis=0) y = relay.nn.conv2d(x, w, channels=channels1 + channels2 + channels4) - y1 = relay.strided_slice(y, [0, 0], [None, channels1]) - y2 = relay.strided_slice(y, [0, channels1], [None, channels1 + channels2]) + y1 = relay.strided_slice(y, + begin=relay.const([0, 0], "int64"), + end=relay.const([1, channels1], "int64"), + strides=relay.const([2, 1], 'int64')) + y2 = relay.strided_slice(y, + begin=relay.const([0, channels1], "int64"), + end=relay.const([1, channels1 + channels2], "int64"), + strides=relay.const([2, 1], 'int64')) y3 = relay.nn.conv2d(x, w3) - y4 = relay.strided_slice(y, [0, channels1 + channels2], - [None, channels1 + channels2 + channels4]) + y4 = relay.strided_slice(y, + begin=relay.const([0, channels1 + channels2], "int64"), + end=relay.const([1, channels1 + channels2 + channels4], "int64"), + strides=relay.const([2, 1], 'int64')) y5 = relay.nn.max_pool2d(x) y = relay.Tuple((y1, y2, y3, y4, y5)) return relay.Function(args, y) def check(x_shape, channels1, channels2, channels3, channels4): - x = relay.var("x", shape=x_shape) + x = relay.var("x", shape=x_shape) in_c = x_shape[1] w1 = relay.var("w1", shape=(channels1, in_c, 1, 1)) w2 = relay.var("w2", shape=(channels2, in_c, 1, 1)) @@ -72,7 +80,8 @@ def check(x_shape, channels1, channels2, channels3, channels4): transform.CombineParallelConv2D(min_num_branches=2)) y_expected = expected(x, w1, w2, w3, w4, channels1, channels2, channels3, channels4) y_expected = run_opt_pass(y_expected, transform.InferType()) - assert tvm.ir.structural_equal(y, y_expected, map_free_vars=True) + np.testing.assert_string_equal(str(y), str(y_expected)),\ + "Actual = \n" + str(y) + "\nExpected = \n" + str(y_expected) check((1, 4, 16, 16), 4, 4, 4, 4) check((1, 4, 16, 16), 4, 8, 4, 7) @@ -99,8 +108,14 @@ def expected(x, w1, w2, scale1, scale2, bias, channels1, channels2): y = relay.nn.conv2d(x, w, channels=channels1 + channels2) y = relay.multiply(y, scale) y = relay.nn.relu(y) - y1 = relay.strided_slice(y, [0, 0], [None, channels1]) - y2 = relay.strided_slice(y, [0, channels1], [None, channels1 + channels2]) + y1 = relay.strided_slice(y, + begin=relay.const([0, 0], "int64"), + end=relay.const([1, channels1], "int64"), + strides=relay.const([2, 1], "int64")) + y2 = relay.strided_slice(y, + begin=relay.const([0, channels1], "int64"), + end=relay.const([1, channels1 + channels2], "int64"), + strides=relay.const([2, 1], "int64")) y2 = relay.add(y2, bias) y = relay.Tuple((y1, y2)) return relay.Function(args, y) @@ -118,7 +133,8 @@ def check(x_shape, channels1, channels2): transform.CombineParallelConv2D(min_num_branches=2)) y_expected = expected(x, w1, w2, scale1, scale2, bias, channels1, channels2) y_expected = run_opt_pass(y_expected, transform.InferType()) - assert tvm.ir.structural_equal(y, y_expected, map_free_vars=True) + np.testing.assert_string_equal(str(y), str(y_expected)),\ + "Actual = \n" + str(y) + "Expected = \n" + str(y_expected) check((1, 4, 16, 16), 4, 8) @@ -138,8 +154,14 @@ def expected(x, w1, w2, scale1, scale2, channels1, channels2): args = [x, w1, w2, scale1, scale2] w = relay.concatenate((w1, w2), axis=0) y = relay.nn.conv2d(x, w, channels=channels1 + channels2) - y1 = relay.strided_slice(y, [0, 0], [None, channels1]) - y2 = relay.strided_slice(y, [0, channels1], [None, channels1 + channels2]) + y1 = relay.strided_slice(y, + begin=relay.const([0, 0], "int64"), + end=relay.const([1, channels1], "int64"), + strides=relay.const([2, 1], "int64")) + y2 = relay.strided_slice(y, + begin=relay.const([0, channels1], "int64"), + end=relay.const([1, channels1 + channels2], "int64"), + strides=relay.const([2, 1], "int64")) y1 = relay.multiply(y1, scale1) y2 = relay.multiply(y2, scale2) y = relay.Tuple((y1, y2)) @@ -157,7 +179,8 @@ def check(x_shape, channels1, channels2): transform.CombineParallelConv2D(min_num_branches=2)) y_expected = expected(x, w1, w2, scale1, scale2, channels1, channels2) y_expected = run_opt_pass(y_expected, transform.InferType()) - assert tvm.ir.structural_equal(y, y_expected, map_free_vars=True) + np.testing.assert_string_equal(str(y), str(y_expected)),\ + "Actual = \n" + str(y) + "Expected = \n" + str(y_expected) check((1, 4, 16, 16), 4, 8) @@ -178,8 +201,14 @@ def expected(x, w, channels, repeat): for i in range(repeat): w_concat = relay.concatenate((w, w), axis=0) y = relay.nn.conv2d(y, w_concat, channels=channels*2) - y1 = relay.strided_slice(y, [0, 0], [None, channels]) - y2 = relay.strided_slice(y, [0, channels], [None, channels * 2]) + y1 = relay.strided_slice(y, + begin=relay.const([0, 0], "int64"), + end=relay.const([1, channels], "int64"), + strides=relay.const([2, 1], "int64")) + y2 = relay.strided_slice(y, + begin=relay.const([0, channels], "int64"), + end=relay.const([1, channels * 2], "int64"), + strides=relay.const([2, 1], "int64")) y = relay.concatenate((y1, y2), axis=1) return relay.Function(args, y) @@ -193,7 +222,8 @@ def check(x_shape, repeat): transform.CombineParallelConv2D(min_num_branches=2)) y_expected = expected(x, w, out_c, repeat) y_expected = run_opt_pass(y_expected, transform.InferType()) - assert tvm.ir.structural_equal(y, y_expected, map_free_vars=True) + np.testing.assert_string_equal(str(y), str(y_expected)),\ + "Actual = \n" + str(y) + "\nExpected = \n" + str(y_expected) check((1, 4, 16, 16), 4) diff --git a/topi/python/topi/testing/strided_slice_python.py b/topi/python/topi/testing/strided_slice_python.py index c1c899afe31f..b21c3fb87119 100644 --- a/topi/python/topi/testing/strided_slice_python.py +++ b/topi/python/topi/testing/strided_slice_python.py @@ -17,7 +17,7 @@ """strided_slice/set in python""" -def strided_slice_python(data, begin, end, strides): +def strided_slice_python(data, begin, end, strides, ignore_end=False): """Python version of strided slice operator. Parameters @@ -34,6 +34,9 @@ def strided_slice_python(data, begin, end, strides): strides : list The stride of each slice. + ignore_end : boolean + Whether to ignore input end + Returns ------- result : numpy.ndarray @@ -44,7 +47,7 @@ def strided_slice_python(data, begin, end, strides): for i in range(len(data.shape)): slices.append(slice( begin[i] if i < len(begin) else None, - end[i] if i < len(end) else None, + end[i] if i < len(end) and not ignore_end else None, strides[i] if i < len(strides) else None)) return data[tuple(slices)] diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index ff865ce988c0..1a2089683b62 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -23,7 +23,7 @@ from ..sort import argsort @hybrid.script -def hybrid_rearrange_box_out(data, one, batch_size): +def hybrid_rearrange_box_out(data, one, batch_size, num_anchors): """Hybrid routine to rearrange nms output to move all valid entries to top. @@ -40,13 +40,15 @@ def hybrid_rearrange_box_out(data, one, batch_size): Batch size. We need to pass it in since hybrid script doesn't support binding variable to symbolic dim. + num_anchors: tvm.tir.IntImm or tvm.tir.Var + Number of anchors. + Returns ------- output : tvm.te.Tensor or numpy NDArray Transformed NMS output. 3-D tensor with shape [batch_size, num_anchors, 6]. """ - num_anchors = data.shape[1] elem_length = data.shape[2] output = output_tensor((batch_size, num_anchors, @@ -67,7 +69,7 @@ def hybrid_rearrange_box_out(data, one, batch_size): @hybrid.script -def hybrid_rearrange_indices_out(data, one, batch_size): +def hybrid_rearrange_indices_out(data, one, batch_size, num_anchors): """Hybrid routine to rearrange nms output to move all valid entries to top. @@ -86,6 +88,9 @@ def hybrid_rearrange_indices_out(data, one, batch_size): Batch size. We need to pass it in since hybrid script doesn't support binding variable to symbolic dim. + num_anchors: tvm.tir.IntImm or tvm.tir.Var + Number of anchors. + Returns ------- output : tvm.te.Tensor or numpy NDArray @@ -95,7 +100,6 @@ def hybrid_rearrange_indices_out(data, one, batch_size): Tensor with shape [batch_size, 1], indicates the valid number of boxes. """ - num_anchors = data.shape[1] valid_box_count = output_tensor((batch_size, 1), "int32") output = output_tensor((batch_size, num_anchors), data.dtype) @@ -116,7 +120,8 @@ def hybrid_rearrange_indices_out(data, one, batch_size): @hybrid.script -def hybrid_get_valid_counts(data, score_threshold, id_index, score_index, one, batch_size): +def hybrid_get_valid_counts(data, score_threshold, id_index, score_index, + one, batch_size, num_anchors): """Hybrid routine to get valid count of bounding boxes given a score threshold. Also moves valid boxes to the top of input data. @@ -143,6 +148,9 @@ def hybrid_get_valid_counts(data, score_threshold, id_index, score_index, one, b Batch size. We need to pass it in since hybrid script doesn't support binding variable to symbolic dim. + num_anchors: tvm.tir.IntImm or tvm.tir.Var + Number of anchors. + Returns ------- valid_count : tvm.te.Tensor or numpy NDArray @@ -154,7 +162,6 @@ def hybrid_get_valid_counts(data, score_threshold, id_index, score_index, one, b out_indices: tvm.te.Tensor or numpy NDArray Related index in input data. """ - num_anchors = data.shape[1] box_data_length = data.shape[2] valid_count = output_tensor((batch_size,), "int32") out_tensor = output_tensor((batch_size, @@ -215,13 +222,13 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): return hybrid_get_valid_counts(data, score_threshold_const, id_index_const, score_index_const, tvm.tir.const(1, data.dtype), - data.shape[0]) + data.shape[0], data.shape[1]) @hybrid.script -def hybrid_nms(data, sorted_index, valid_count, indices, batch_size, max_output_size, - iou_threshold, force_suppress, top_k, coord_start, score_index, - id_index, return_indices, zero, one): +def hybrid_nms(data, sorted_index, valid_count, indices, batch_size, num_anchors, + max_output_size, iou_threshold, force_suppress, top_k, coord_start, + score_index, id_index, return_indices, zero, one): """Hybrid routing for non-maximum suppression. Parameters @@ -285,7 +292,6 @@ def hybrid_nms(data, sorted_index, valid_count, indices, batch_size, max_output_ 2-D tensor with shape [batch_size, num_anchors]. """ - num_anchors = data.shape[1] box_data_length = data.shape[2] # box_indices is the expected value, similar to TF & ONNX @@ -491,6 +497,7 @@ def non_max_suppression(data, valid_count, indices, max_output_size=-1, valid_count, indices, batch_size, + num_anchors, tvm.tir.const(max_output_size, dtype="int32"), tvm.tir.const(iou_threshold, dtype=data.dtype), tvm.tir.const(force_suppress, dtype="bool"), @@ -503,9 +510,9 @@ def non_max_suppression(data, valid_count, indices, max_output_size=-1, one=tvm.tir.const(1, dtype=data.dtype)) if return_indices: return hybrid_rearrange_indices_out(box_indices, one=tvm.tir.const(1, dtype="int32"), - batch_size=batch_size) + batch_size=batch_size, num_anchors=num_anchors) if invalid_to_bottom: out = hybrid_rearrange_box_out(out, one=tvm.tir.const(1, dtype=data.dtype), - batch_size=batch_size) + batch_size=batch_size, num_anchors=num_anchors) return out From 4e22eef8835e88105f12ca70c5f1593bf003aa38 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Wed, 20 May 2020 01:13:51 +0800 Subject: [PATCH 11/22] change attr to Optional --- include/tvm/relay/attrs/transform.h | 6 +- src/relay/op/tensor/transform.cc | 141 ++++++++---------- .../test_pass_combine_parallel_conv2d.py | 48 +++--- topi/include/topi/transform.h | 6 +- topi/python/topi/transform.py | 7 +- topi/src/transform.cc | 2 +- 6 files changed, 96 insertions(+), 114 deletions(-) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index a4c3d0194b22..b63c319a19aa 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -210,9 +210,9 @@ struct SplitAttrs : public tvm::AttrsNode { /*! \brief Attributes for StridedSlice operator */ struct StridedSliceAttrs : public tvm::AttrsNode { - Expr begin; - Expr end; - Expr strides; + Optional> begin; + Optional> end; + Optional> strides; bool ignore_end; TVM_DECLARE_ATTRS(StridedSliceAttrs, "relay.attrs.StridedSliceAttrs") { diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index 27c459e76b33..0f54c4d5cb7f 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1750,42 +1750,39 @@ bool StridedSliceRel(const Array& types, // calculate output shape std::vector oshape(num_axis); - const ConstantNode *cbegin, *cend, *cstrides; - if ((cbegin = param->begin.as()) && - (cend = param->end.as()) && - (cstrides = param->strides.as())) { + if (param->begin && param->end && param->strides) { std::vector stride_vec; - int64_t* strides_val = ToVector(cstrides->data); - for (int64_t i = 0; i < cstrides->data.Shape().front(); ++i) { - stride_vec.push_back(strides_val[i]); + for (Integer i : param->strides.value()) { + CHECK(i.defined()); + stride_vec.push_back(i->value); } for (int64_t i = stride_vec.size(); i < num_axis; ++i) { stride_vec.push_back(1); } const int64_t max_range = std::numeric_limits::max(); std::vector begin_vec; - int64_t* begin_val = ToVector(cbegin->data); - for (int64_t i = 0; i < cbegin->data.Shape().front(); ++i) { - begin_vec.push_back(begin_val[i]); + for (size_t i = 0; i < param->begin.value().size(); ++i) { + if (!param->begin.value()[i].defined()) { + begin_vec.push_back(stride_vec[i] > 0 ? 0 : max_range); + } else { + begin_vec.push_back(param->begin.value()[i]->value); + } } - for (int64_t i = begin_vec.size(); i < num_axis; ++i) { + for (size_t i = begin_vec.size(); i < num_axis; ++i) { begin_vec.push_back(stride_vec[i] > 0 ? 0 : max_range); } + std::vector end_vec; - int64_t* end_val = ToVector(cend->data); - for (int64_t i = 0; i < cend->data.Shape().front(); ++i) { - if (param->ignore_end) { - end_vec.push_back(max_range); + for (size_t i = 0; i < param->end.value().size(); ++i) { + // allow end to be None + if (param->ignore_end || (!param->end.value()[i].defined())) { + end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); } else { - end_vec.push_back(end_val[i]); + end_vec.push_back(param->end.value()[i]->value); } } - for (int64_t i = end_vec.size(); i < num_axis; ++i) { - if (param->ignore_end) { - end_vec.push_back(max_range); - } else { - end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); - } + for (size_t i = end_vec.size(); i < num_axis; ++i) { + end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); } for (int64_t i = 0; i < num_axis; ++i) { @@ -1839,6 +1836,7 @@ bool StridedSliceRel(const Array& types, oshape[i] = Any::make(); } } + reporter->Assign(types[4], TensorType(oshape, data->dtype)); return true; } @@ -1868,22 +1866,19 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, auto *params = const_cast(attrs.as()); CHECK(params != nullptr); Array begin, end, strides; - const ConstantNode *cbegin, *cend, *cstrides; - cbegin = params->begin.as(); - cend = params->end.as(); - cstrides = params->strides.as(); - if (cbegin && cend && cstrides) { - int64_t* strides_val = ToVector(cstrides->data); - for (int64_t i = 0; i < cstrides->data.Shape().front(); ++i) { - strides.push_back(strides_val[i]); + if (params->begin && params->end && params->strides) { + for (Integer i : params->strides.value()) { + CHECK(i.defined()); + strides.push_back(i->value); } - int64_t* begin_val = ToVector(cbegin->data); - for (int64_t i = 0; i < cbegin->data.Shape().front(); ++i) { - begin.push_back(begin_val[i]); + + for (Integer i : params->begin.value()) { + CHECK(i.defined()); + begin.push_back(i->value); } - int64_t* end_val = ToVector(cend->data); - for (int64_t i = 0; i < cend->data.Shape().front(); ++i) { - end.push_back(end_val[i]); + for (Integer i : params->end.value()) { + CHECK(i.defined()); + end.push_back(i->value); } } @@ -1920,27 +1915,8 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, } layout = new_layout; - - DLContext ctx; - ctx.device_type = kDLCPU; - ctx.device_id = 0; - auto begin_ndarray = runtime::NDArray::Empty({int64_t(new_begin.size())}, - DataType::Int(64), ctx); - auto end_ndarray = runtime::NDArray::Empty({int64_t(new_begin.size())}, - DataType::Int(64), ctx); - auto strides_ndarray = runtime::NDArray::Empty({int64_t(new_begin.size())}, - DataType::Int(64), ctx); - int64_t* begin_data = static_cast(begin_ndarray->data); - int64_t* end_data = static_cast(end_ndarray->data); - int64_t* strides_data = static_cast(strides_ndarray->data); - for (size_t i = 0; i < new_begin.size(); ++i) { - begin_data[i] = new_begin[i]; - end_data[i] = new_end[i]; - strides_data[i] = 1; - } - params->begin = Constant(begin_ndarray); - params->end = Constant(end_ndarray); - params->strides = Constant(strides_ndarray); + params->begin = new_begin; + params->end = new_end; } return {{layout, Layout("C"), Layout("C"), Layout("C")}, {layout}}; } @@ -1949,6 +1925,7 @@ inline te::Tensor DynamicStridedSlice(const te::Tensor& input, const te::Tensor& begin, const te::Tensor& end, const te::Tensor& strides, + const bool& ignore_end, std::string name = "T_strided_slice_dynamic", std::string tag = topi::kInjective) { int64_t src_tensor_dim = input->shape.size(); @@ -1970,25 +1947,13 @@ Array StridedSliceCompute(const Attrs& attrs, const Array(); CHECK(param != nullptr); - const ConstantNode *cbegin, *cend, *cstrides; - if ((cbegin = param->begin.as()) && - (cend = param->end.as()) && - (cstrides = param->strides.as())) { + if (param->begin && param->end && param->strides) { Array begin, end, strides; - int64_t* strides_val = ToVector(cstrides->data); - for (int64_t i = 0; i < cstrides->data.Shape().front(); ++i) { - strides.push_back(strides_val[i]); - } - int64_t* begin_val = ToVector(cbegin->data); - for (int64_t i = 0; i < cbegin->data.Shape().front(); ++i) { - begin.push_back(begin_val[i]); - } - int64_t* end_val = ToVector(cend->data); - for (int64_t i = 0; i < cend->data.Shape().front(); ++i) { - end.push_back(end_val[i]); - } + begin = param->begin.value(); + end = param->end.value(); + strides = param->strides.value(); return Array{ - topi::strided_slice(inputs[0], begin, end, strides) + topi::strided_slice(inputs[0], begin, end, strides, param->ignore_end) }; } else { te::Tensor data = inputs[0]; @@ -2002,7 +1967,7 @@ Array StridedSliceCompute(const Attrs& attrs, const Array{ - DynamicStridedSlice(data, begin, end, strides) + DynamicStridedSlice(data, begin, end, strides, param->ignore_end) }; } } @@ -2014,9 +1979,27 @@ Expr MakeStridedSlice(Expr data, Expr strides, bool ignore_end) { auto attrs = make_object(); - attrs->begin = begin; - attrs->end = end; - attrs->strides = strides; + const ConstantNode *cbegin, *cend, *cstrides; + if ((cbegin = begin.as()) && + (cend = end.as()) && + (cstrides = strides.as())) { + CHECK_EQ(cbegin->data->ndim, 1); + CHECK_EQ(cend->data->ndim, 1); + CHECK_EQ(cstrides->data->ndim, 1); + Array begin, end, strides; + for (int i = 0; i < cbegin->data->shape[0]; i++) { + begin.push_back(Integer(static_cast(ToScalar(cbegin->data, i)))); + } + for (int i = 0; i < cend->data->shape[0]; i++) { + end.push_back(Integer(static_cast(ToScalar(cend->data, i)))); + } + for (int i = 0; i < cstrides->data->shape[0]; i++) { + strides.push_back(Integer(static_cast(ToScalar(cstrides->data, i)))); + } + attrs->begin = begin; + attrs->end = end; + attrs->strides = strides; + } attrs->ignore_end = ignore_end; static const Op& op = Op::Get("strided_slice"); return Call(op, {data, begin, end, strides}, Attrs(attrs), {}); @@ -2315,7 +2298,7 @@ Array SliceLikeCompute(const Attrs& attrs, const Array& } } return Array{topi::strided_slice(inputs[0], GetIntArray(begin_idx), - GetIntArray(end_idx), GetIntArray(strides))}; + GetIntArray(end_idx), GetIntArray(strides), false)}; } TVM_REGISTER_GLOBAL("relay.op._make.slice_like").set_body_typed(MakeSliceLike); diff --git a/tests/python/relay/test_pass_combine_parallel_conv2d.py b/tests/python/relay/test_pass_combine_parallel_conv2d.py index 291000965be9..c32e9ea39ab3 100644 --- a/tests/python/relay/test_pass_combine_parallel_conv2d.py +++ b/tests/python/relay/test_pass_combine_parallel_conv2d.py @@ -52,17 +52,17 @@ def expected(x, w1, w2, w3, w4, channels1, channels2, channels3, channels4): y = relay.nn.conv2d(x, w, channels=channels1 + channels2 + channels4) y1 = relay.strided_slice(y, begin=relay.const([0, 0], "int64"), - end=relay.const([1, channels1], "int64"), - strides=relay.const([2, 1], 'int64')) + end=relay.const([channels1, channels1], "int64"), + strides=relay.const([1, 1], 'int64')) y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), - end=relay.const([1, channels1 + channels2], "int64"), - strides=relay.const([2, 1], 'int64')) + end=relay.const([channels2, channels1 + channels2], "int64"), + strides=relay.const([1, 1], 'int64')) y3 = relay.nn.conv2d(x, w3) y4 = relay.strided_slice(y, begin=relay.const([0, channels1 + channels2], "int64"), - end=relay.const([1, channels1 + channels2 + channels4], "int64"), - strides=relay.const([2, 1], 'int64')) + end=relay.const([channels4, channels1 + channels2 + channels4], "int64"), + strides=relay.const([1, 1], 'int64')) y5 = relay.nn.max_pool2d(x) y = relay.Tuple((y1, y2, y3, y4, y5)) return relay.Function(args, y) @@ -80,8 +80,7 @@ def check(x_shape, channels1, channels2, channels3, channels4): transform.CombineParallelConv2D(min_num_branches=2)) y_expected = expected(x, w1, w2, w3, w4, channels1, channels2, channels3, channels4) y_expected = run_opt_pass(y_expected, transform.InferType()) - np.testing.assert_string_equal(str(y), str(y_expected)),\ - "Actual = \n" + str(y) + "\nExpected = \n" + str(y_expected) + assert tvm.ir.structural_equal(y, y_expected, map_free_vars=True) check((1, 4, 16, 16), 4, 4, 4, 4) check((1, 4, 16, 16), 4, 8, 4, 7) @@ -110,12 +109,12 @@ def expected(x, w1, w2, scale1, scale2, bias, channels1, channels2): y = relay.nn.relu(y) y1 = relay.strided_slice(y, begin=relay.const([0, 0], "int64"), - end=relay.const([1, channels1], "int64"), - strides=relay.const([2, 1], "int64")) + end=relay.const([4, channels1], "int64"), + strides=relay.const([1, 1], "int64")) y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), - end=relay.const([1, channels1 + channels2], "int64"), - strides=relay.const([2, 1], "int64")) + end=relay.const([8, channels1 + channels2], "int64"), + strides=relay.const([1, 1], "int64")) y2 = relay.add(y2, bias) y = relay.Tuple((y1, y2)) return relay.Function(args, y) @@ -133,8 +132,7 @@ def check(x_shape, channels1, channels2): transform.CombineParallelConv2D(min_num_branches=2)) y_expected = expected(x, w1, w2, scale1, scale2, bias, channels1, channels2) y_expected = run_opt_pass(y_expected, transform.InferType()) - np.testing.assert_string_equal(str(y), str(y_expected)),\ - "Actual = \n" + str(y) + "Expected = \n" + str(y_expected) + tvm.ir.structural_equal(y, y_expected, map_free_vars=True) check((1, 4, 16, 16), 4, 8) @@ -156,12 +154,12 @@ def expected(x, w1, w2, scale1, scale2, channels1, channels2): y = relay.nn.conv2d(x, w, channels=channels1 + channels2) y1 = relay.strided_slice(y, begin=relay.const([0, 0], "int64"), - end=relay.const([1, channels1], "int64"), - strides=relay.const([2, 1], "int64")) + end=relay.const([4, channels1], "int64"), + strides=relay.const([1, 1], "int64")) y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), - end=relay.const([1, channels1 + channels2], "int64"), - strides=relay.const([2, 1], "int64")) + end=relay.const([8, channels1 + channels2], "int64"), + strides=relay.const([1, 1], "int64")) y1 = relay.multiply(y1, scale1) y2 = relay.multiply(y2, scale2) y = relay.Tuple((y1, y2)) @@ -179,8 +177,7 @@ def check(x_shape, channels1, channels2): transform.CombineParallelConv2D(min_num_branches=2)) y_expected = expected(x, w1, w2, scale1, scale2, channels1, channels2) y_expected = run_opt_pass(y_expected, transform.InferType()) - np.testing.assert_string_equal(str(y), str(y_expected)),\ - "Actual = \n" + str(y) + "Expected = \n" + str(y_expected) + tvm.ir.structural_equal(y, y_expected, map_free_vars=True) check((1, 4, 16, 16), 4, 8) @@ -203,12 +200,12 @@ def expected(x, w, channels, repeat): y = relay.nn.conv2d(y, w_concat, channels=channels*2) y1 = relay.strided_slice(y, begin=relay.const([0, 0], "int64"), - end=relay.const([1, channels], "int64"), - strides=relay.const([2, 1], "int64")) + end=relay.const([2, channels], "int64"), + strides=relay.const([1, 1], "int64")) y2 = relay.strided_slice(y, begin=relay.const([0, channels], "int64"), - end=relay.const([1, channels * 2], "int64"), - strides=relay.const([2, 1], "int64")) + end=relay.const([2, channels * 2], "int64"), + strides=relay.const([1, 1], "int64")) y = relay.concatenate((y1, y2), axis=1) return relay.Function(args, y) @@ -222,8 +219,7 @@ def check(x_shape, repeat): transform.CombineParallelConv2D(min_num_branches=2)) y_expected = expected(x, w, out_c, repeat) y_expected = run_opt_pass(y_expected, transform.InferType()) - np.testing.assert_string_equal(str(y), str(y_expected)),\ - "Actual = \n" + str(y) + "\nExpected = \n" + str(y_expected) + tvm.ir.structural_equal(y, y_expected, map_free_vars=True) check((1, 4, 16, 16), 4) diff --git a/topi/include/topi/transform.h b/topi/include/topi/transform.h index 400cd1edbdd3..ab5d33b81665 100644 --- a/topi/include/topi/transform.h +++ b/topi/include/topi/transform.h @@ -527,8 +527,8 @@ inline Array split(const Tensor& x, Array split_indices, int ax * \return A Tensor whose op member is the split operation */ inline Tensor strided_slice(const Tensor& x, const Array& begin, const Array& end, - const Array& strides, std::string name = "T_strided_slice", - std::string tag = kInjective) { + const Array& strides, const bool& ignore_end, + std::string name = "T_strided_slice", std::string tag = kInjective) { size_t src_tensor_dim = static_cast(x->shape.size()); // Setup the ranges. // NOTE: this code duplicates the shape inference logic relay.op @@ -559,7 +559,7 @@ inline Tensor strided_slice(const Tensor& x, const Array& begin, const std::vector end_vec; for (size_t i = 0; i < end.size(); ++i) { // allow end to be None - if (!end[i].defined()) { + if (ignore_end || (!end[i].defined())) { end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); } else { end_vec.push_back(end[i]->value); diff --git a/topi/python/topi/transform.py b/topi/python/topi/transform.py index ef5456095899..fe7ceb6015db 100644 --- a/topi/python/topi/transform.py +++ b/topi/python/topi/transform.py @@ -131,7 +131,7 @@ def flip(a, axis=0): """ return cpp.flip(a, axis) -def strided_slice(a, begin, end, strides=None): +def strided_slice(a, begin, end, strides=None, ignore_end=False): """Slice of an array. Parameters @@ -150,13 +150,16 @@ def strided_slice(a, begin, end, strides=None): in that case, the input tensor will be reversed in that particular axis. + ignore_end: boolean, optional + Specifies whether to ignore input end. + Returns ------- ret : tvm.te.Tensor """ if strides is None: strides = [] - return cpp.strided_slice(a, begin, end, strides) + return cpp.strided_slice(a, begin, end, strides, ignore_end) @tvm.te.tag_scope(tag=tag.INJECTIVE+",strided_set") def strided_set(a, v, begin, end, strides=None): diff --git a/topi/src/transform.cc b/topi/src/transform.cc index fa27b995c365..aab20f53fc31 100644 --- a/topi/src/transform.cc +++ b/topi/src/transform.cc @@ -148,7 +148,7 @@ TVM_REGISTER_GLOBAL("topi.tensordot").set_body([](TVMArgs args, TVMRetValue* rv) }); TVM_REGISTER_GLOBAL("topi.strided_slice").set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = strided_slice(args[0], args[1], args[2], args[3]); + *rv = strided_slice(args[0], args[1], args[2], args[3], args[4]); }); TVM_REGISTER_GLOBAL("topi.one_hot").set_body([](TVMArgs args, TVMRetValue* rv) { From 3af16825637b0cbb7d97c4f47888644b1de28134 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Wed, 20 May 2020 01:27:21 +0800 Subject: [PATCH 12/22] clang format --- include/tvm/relay/attrs/transform.h | 12 ++- include/tvm/relay/attrs/vision.h | 18 ++-- src/relay/op/tensor/transform.cc | 87 ++++++++----------- src/relay/op/vision/nms.cc | 21 ++--- .../transforms/combine_parallel_conv2d.cc | 26 ++---- topi/include/topi/transform.h | 1 + 6 files changed, 61 insertions(+), 104 deletions(-) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index b63c319a19aa..0485dbf914fb 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -216,13 +216,11 @@ struct StridedSliceAttrs : public tvm::AttrsNode { bool ignore_end; TVM_DECLARE_ATTRS(StridedSliceAttrs, "relay.attrs.StridedSliceAttrs") { - TVM_ATTR_FIELD(begin) - .describe("Indices for begin of slice, begin index is also inclusive"); - TVM_ATTR_FIELD(end) - .describe("Indices for end of slice, end index is exclusive"); - TVM_ATTR_FIELD(strides) - .describe("Stride values of the slice"); - TVM_ATTR_FIELD(ignore_end).set_default(false) + TVM_ATTR_FIELD(begin).describe("Indices for begin of slice, begin index is also inclusive"); + TVM_ATTR_FIELD(end).describe("Indices for end of slice, end index is exclusive"); + TVM_ATTR_FIELD(strides).describe("Stride values of the slice"); + TVM_ATTR_FIELD(ignore_end) + .set_default(false) .describe("Whether to ignore the input end and infer value of end from input data"); } }; diff --git a/include/tvm/relay/attrs/vision.h b/include/tvm/relay/attrs/vision.h index 52669ea651ee..550e24b8de26 100644 --- a/include/tvm/relay/attrs/vision.h +++ b/include/tvm/relay/attrs/vision.h @@ -101,26 +101,22 @@ struct NonMaximumSuppressionAttrs : public tvm::AttrsNode& types, int num_inputs, const Attrs& raw_attrs, } } -inline te::Tensor DynamicArange(const te::Tensor& start, - const te::Tensor& stop, - const te::Tensor& step, - tvm::DataType dtype, +inline te::Tensor DynamicArange(const te::Tensor& start, const te::Tensor& stop, + const te::Tensor& step, tvm::DataType dtype, std::string name = "T_arange_dynamic", std::string tag = topi::kInjective) { tvm::PrimExpr num_elem = tvm::tir::Var("num_elem"); @@ -1736,9 +1734,7 @@ int64_t* ToVector(const runtime::NDArray& array) { return rel_vec; } -bool StridedSliceRel(const Array& types, - int num_inputs, - const Attrs& attrs, +bool StridedSliceRel(const Array& types, int num_inputs, const Attrs& attrs, const TypeReporter& reporter) { CHECK_EQ(types.size(), 5); const StridedSliceAttrs* param = attrs.as(); @@ -1768,7 +1764,7 @@ bool StridedSliceRel(const Array& types, begin_vec.push_back(param->begin.value()[i]->value); } } - for (size_t i = begin_vec.size(); i < num_axis; ++i) { + for (int64_t i = begin_vec.size(); i < num_axis; ++i) { begin_vec.push_back(stride_vec[i] > 0 ? 0 : max_range); } @@ -1781,7 +1777,7 @@ bool StridedSliceRel(const Array& types, end_vec.push_back(param->end.value()[i]->value); } } - for (size_t i = end_vec.size(); i < num_axis; ++i) { + for (int64_t i = end_vec.size(); i < num_axis; ++i) { end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); } @@ -1790,12 +1786,8 @@ bool StridedSliceRel(const Array& types, int64_t begin_v = begin_vec[i]; int64_t end_v = end_vec[i]; - if ((stride_v == 1 && - begin_v == 0 && - end_v == max_range) || - (stride_v == -1 && - begin_v == max_range && - end_v == 0)) { + if ((stride_v == 1 && begin_v == 0 && end_v == max_range) || + (stride_v == -1 && begin_v == max_range && end_v == 0)) { // Quick path, do not slice this dimension. oshape[i] = dshape[i]; continue; @@ -1815,16 +1807,14 @@ bool StridedSliceRel(const Array& types, int64_t slice_range, step; if (stride_v < 0) { if (end_v < -1) end_v = -1; - CHECK_LE(end_v, begin_v) - << "strided_slice get empty slice at axis " << i; + CHECK_LE(end_v, begin_v) << "strided_slice get empty slice at axis " << i; begin_v = std::min(dim_size - 1, begin_v); slice_range = begin_v - end_v; step = -stride_v; } else { if (begin_v < 0) begin_v = 0; CHECK_GE(stride_v, 0); - CHECK_LT(begin_v, end_v) - << "strided_slice get empty slice at axis " << i; + CHECK_LT(begin_v, end_v) << "strided_slice get empty slice at axis " << i; end_v = std::min(dim_size, end_v); slice_range = end_v - begin_v; step = stride_v; @@ -1863,7 +1853,7 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, auto shape = old_in_shapes[0]; // NOTE: Discard "const" qualifier here. - auto *params = const_cast(attrs.as()); + auto* params = const_cast(attrs.as()); CHECK(params != nullptr); Array begin, end, strides; if (params->begin && params->end && params->strides) { @@ -1903,8 +1893,7 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, } } int64_t bg = begin[i].defined() ? begin[i]->value : 0; - int64_t ed = end[i].defined() ? end[i]->value : - shape[i].as()->value; + int64_t ed = end[i].defined() ? end[i]->value : shape[i].as()->value; if (bg % factor || ed % factor) { // transform to original layout return {{Layout::Undef()}, {Layout::Undef()}}; @@ -1921,10 +1910,8 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, return {{layout, Layout("C"), Layout("C"), Layout("C")}, {layout}}; } -inline te::Tensor DynamicStridedSlice(const te::Tensor& input, - const te::Tensor& begin, - const te::Tensor& end, - const te::Tensor& strides, +inline te::Tensor DynamicStridedSlice(const te::Tensor& input, const te::Tensor& begin, + const te::Tensor& end, const te::Tensor& strides, const bool& ignore_end, std::string name = "T_strided_slice_dynamic", std::string tag = topi::kInjective) { @@ -1934,13 +1921,16 @@ inline te::Tensor DynamicStridedSlice(const te::Tensor& input, out_shape.push_back(tvm::tir::Var("dim")); } // TODO(yongwww): move the compute into topi - return te::compute(out_shape, [&](const Array& indices) { - Array real_indices; - for (int32_t i = 0; i < src_tensor_dim; ++i) { - real_indices.push_back(indices[i] * strides(i) + begin(i)); - } - return input(real_indices); - }, name, tag); + return te::compute( + out_shape, + [&](const Array& indices) { + Array real_indices; + for (int32_t i = 0; i < src_tensor_dim; ++i) { + real_indices.push_back(indices[i] * strides(i) + begin(i)); + } + return input(real_indices); + }, + name, tag); } Array StridedSliceCompute(const Attrs& attrs, const Array& inputs, @@ -1953,35 +1943,28 @@ Array StridedSliceCompute(const Attrs& attrs, const Arrayend.value(); strides = param->strides.value(); return Array{ - topi::strided_slice(inputs[0], begin, end, strides, param->ignore_end) - }; + topi::strided_slice(inputs[0], begin, end, strides, param->ignore_end)}; } else { te::Tensor data = inputs[0]; te::Tensor begin = inputs[1]; te::Tensor end = inputs[2]; te::Tensor strides = inputs[3]; // Dynamic computation - CHECK(begin->shape[0].as()->value == data->shape.size() - && end->shape[0].as()->value == data->shape.size() - && strides->shape[0].as()->value == data->shape.size()) - << "begin, end, and strides are required to have the same length" - << " if they are non-constant."; - return Array{ - DynamicStridedSlice(data, begin, end, strides, param->ignore_end) - }; + int64_t attr_size = data->shape.size(); + CHECK(begin->shape[0].as()->value == attr_size && + end->shape[0].as()->value == attr_size && + strides->shape[0].as()->value == attr_size) + << "begin, end, and strides are required to have the same length" + << " if they are non-constant."; + return Array{DynamicStridedSlice(data, begin, end, strides, param->ignore_end)}; } } // Positional relay function to create StridedSlice operator used by frontend FFI. -Expr MakeStridedSlice(Expr data, - Expr begin, - Expr end, - Expr strides, - bool ignore_end) { +Expr MakeStridedSlice(Expr data, Expr begin, Expr end, Expr strides, bool ignore_end) { auto attrs = make_object(); const ConstantNode *cbegin, *cend, *cstrides; - if ((cbegin = begin.as()) && - (cend = end.as()) && + if ((cbegin = begin.as()) && (cend = end.as()) && (cstrides = strides.as())) { CHECK_EQ(cbegin->data->ndim, 1); CHECK_EQ(cend->data->ndim, 1); @@ -2005,9 +1988,7 @@ Expr MakeStridedSlice(Expr data, return Call(op, {data, begin, end, strides}, Attrs(attrs), {}); } -TVM_REGISTER_GLOBAL("relay.op._make.strided_slice") -.set_body_typed(MakeStridedSlice); - +TVM_REGISTER_GLOBAL("relay.op._make.strided_slice").set_body_typed(MakeStridedSlice); RELAY_REGISTER_OP("strided_slice") .describe(R"code(Strided slice of an array. diff --git a/src/relay/op/vision/nms.cc b/src/relay/op/vision/nms.cc index e51432931605..7486db790780 100644 --- a/src/relay/op/vision/nms.cc +++ b/src/relay/op/vision/nms.cc @@ -97,19 +97,9 @@ bool NMSRel(const Array& types, int num_inputs, const Attrs& attrs, return true; } - -Expr MakeNMS(Expr data, - Expr valid_count, - Expr indices, - int max_output_size, - double iou_threshold, - bool force_suppress, - int top_k, - int coord_start, - int score_index, - int id_index, - bool return_indices, - bool invalid_to_bottom) { +Expr MakeNMS(Expr data, Expr valid_count, Expr indices, int max_output_size, double iou_threshold, + bool force_suppress, int top_k, int coord_start, int score_index, int id_index, + bool return_indices, bool invalid_to_bottom) { auto attrs = make_object(); attrs->max_output_size = max_output_size; attrs->iou_threshold = iou_threshold; @@ -124,11 +114,10 @@ Expr MakeNMS(Expr data, return Call(op, {data, valid_count, indices}, Attrs(attrs), {}); } -TVM_REGISTER_GLOBAL("relay.op.vision._make.non_max_suppression") -.set_body_typed(MakeNMS); +TVM_REGISTER_GLOBAL("relay.op.vision._make.non_max_suppression").set_body_typed(MakeNMS); RELAY_REGISTER_OP("vision.non_max_suppression") -.describe(R"doc(Non-maximum suppression. The input boxes should + .describe(R"doc(Non-maximum suppression. The input boxes should be in the format of [class_id, score, left, top, right, bottom] or [score, left, top, right, bottom]. Set id_index to be -1 to ignore class_id axis. diff --git a/src/relay/transforms/combine_parallel_conv2d.cc b/src/relay/transforms/combine_parallel_conv2d.cc index d17e2fcd9655..fa8677372d35 100644 --- a/src/relay/transforms/combine_parallel_conv2d.cc +++ b/src/relay/transforms/combine_parallel_conv2d.cc @@ -71,15 +71,12 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { const auto shape_b = tir::BijectiveLayout(Layout(attrs_b->kernel_layout), kOIHW).ForwardShape(tweight_b->shape); - return eq(attrs_a->strides, attrs_b->strides) && - eq(attrs_a->padding, attrs_b->padding) && - eq(attrs_a->dilation, attrs_b->dilation) && - eq(attrs_a->groups, attrs_b->groups) && + return eq(attrs_a->strides, attrs_b->strides) && eq(attrs_a->padding, attrs_b->padding) && + eq(attrs_a->dilation, attrs_b->dilation) && eq(attrs_a->groups, attrs_b->groups) && eq(attrs_a->data_layout, attrs_b->data_layout) && eq(attrs_a->kernel_layout, attrs_b->kernel_layout) && eq(attrs_a->out_dtype, attrs_b->out_dtype) && - eq(attrs_a->out_layout, attrs_b->out_layout) && - eq(shape_a[2], shape_b[2]) && + eq(attrs_a->out_layout, attrs_b->out_layout) && eq(shape_a[2], shape_b[2]) && eq(shape_a[3], shape_b[3]); } @@ -183,12 +180,10 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { DLContext ctx; ctx.device_type = kDLCPU; ctx.device_id = 0; - auto begin_ndarray = runtime::NDArray::Empty({int64_t(begin.size())}, - DataType::Int(64), ctx); - auto end_ndarray = runtime::NDArray::Empty({int64_t(begin.size())}, - DataType::Int(64), ctx); - auto strides_ndarray = runtime::NDArray::Empty({int64_t(begin.size())}, - DataType::Int(64), ctx); + auto begin_ndarray = runtime::NDArray::Empty({int64_t(begin.size())}, DataType::Int(64), ctx); + auto end_ndarray = runtime::NDArray::Empty({int64_t(begin.size())}, DataType::Int(64), ctx); + auto strides_ndarray = + runtime::NDArray::Empty({int64_t(begin.size())}, DataType::Int(64), ctx); auto* begin_data = static_cast(begin_ndarray->data); auto* end_data = static_cast(end_ndarray->data); @@ -200,11 +195,8 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { strides_data[i] = 1; } - auto slice = MakeStridedSlice(data, - Constant(begin_ndarray), - Constant(end_ndarray), - Constant(strides_ndarray), - false); + auto slice = MakeStridedSlice(data, Constant(begin_ndarray), Constant(end_ndarray), + Constant(strides_ndarray), false); subst_map->insert({GetRef(branch[depth]), slice}); } } diff --git a/topi/include/topi/transform.h b/topi/include/topi/transform.h index ab5d33b81665..2b26f75d09e3 100644 --- a/topi/include/topi/transform.h +++ b/topi/include/topi/transform.h @@ -520,6 +520,7 @@ inline Array split(const Tensor& x, Array split_indices, int ax * \param begin The indices to begin with in the slicing * \param end Indicies indicating end of the slice * \param strides Specifies the stride values, it can be negative + * \param ignore_end Specifies whether to ignore input end * in that case, the input tensor will be reversed in that particular axis * \param name The name of the operation * \param tag The tag to mark the operation From 5c5bc777fa330a716f5c28c50ee5143e32e2a469 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Wed, 20 May 2020 03:57:38 +0800 Subject: [PATCH 13/22] remove empty lines --- python/tvm/relay/frontend/tensorflow.py | 2 +- .../tvm/relay/frontend/tensorflow_parser.py | 4 +-- python/tvm/relay/op/vision/_vision.py | 2 -- python/tvm/relay/testing/tf.py | 2 +- .../frontend/tensorflow/test_control_flow.py | 26 +++++++++---------- .../frontend/tensorflow/test_debugging.py | 5 ---- .../frontend/tensorflow/test_forward.py | 2 +- tests/python/frontend/tflite/test_forward.py | 2 +- tests/python/relay/test_any.py | 1 - tests/python/relay/test_op_level4.py | 2 -- .../test_pass_combine_parallel_conv2d.py | 7 +++-- tutorials/frontend/from_tensorflow.py | 4 +-- 12 files changed, 24 insertions(+), 35 deletions(-) diff --git a/python/tvm/relay/frontend/tensorflow.py b/python/tvm/relay/frontend/tensorflow.py index a830018c8a9c..34fc4893773e 100644 --- a/python/tvm/relay/frontend/tensorflow.py +++ b/python/tvm/relay/frontend/tensorflow.py @@ -2530,7 +2530,7 @@ class LoopBound(ExprVisitor): .. code-block:: python i = tf.constant(0) - data = tf.placeholder(tf.float32, shape=(1024, 1024)) + data = tf.compat.v1.placeholder(tf.float32, shape=(1024, 1024)) slice = tf.strided_slice(data, 0, 512) def c(i): return tf.less(i, 10) def b(i): return [tf.add(i, 1), tf.add(i, 1) + slice] diff --git a/python/tvm/relay/frontend/tensorflow_parser.py b/python/tvm/relay/frontend/tensorflow_parser.py index 4e0f14c577cb..fdbb8768597f 100644 --- a/python/tvm/relay/frontend/tensorflow_parser.py +++ b/python/tvm/relay/frontend/tensorflow_parser.py @@ -80,14 +80,14 @@ def _get_output_names(self): "required to restore from saved model.") tags = self._get_tag_set() output_names = set() - with tf.compat.v1.Session() as sess: + with tf.Session() as sess: meta_graph_def = tf.saved_model.loader.load(sess, tags, self._model_dir) for sig_def in meta_graph_def.signature_def.values(): for output_tensor in sig_def.outputs.values(): output_names.add(output_tensor.name.replace(":0", "")) - tf.compat.v1.reset_default_graph() + tf.reset_default_graph() return ",".join(output_names) def _load_saved_model(self): diff --git a/python/tvm/relay/op/vision/_vision.py b/python/tvm/relay/op/vision/_vision.py index 094671c74284..f6c4f811f13d 100644 --- a/python/tvm/relay/op/vision/_vision.py +++ b/python/tvm/relay/op/vision/_vision.py @@ -19,9 +19,7 @@ from __future__ import absolute_import import topi - from tvm.te.hybrid import script - from .. import op as reg from .. import strategy from ..op import OpPattern diff --git a/python/tvm/relay/testing/tf.py b/python/tvm/relay/testing/tf.py index 567724d9d251..dc7937c0b346 100644 --- a/python/tvm/relay/testing/tf.py +++ b/python/tvm/relay/testing/tf.py @@ -77,7 +77,7 @@ def AddShapesToGraphDef(session, out_node): Parameters ---------- - session : tf.compat.v1.Session + session : tf.Session Tensorflow session out_node : String or List Final output node of the graph. diff --git a/tests/python/frontend/tensorflow/test_control_flow.py b/tests/python/frontend/tensorflow/test_control_flow.py index 95d5b797430c..9777a8dc4462 100644 --- a/tests/python/frontend/tensorflow/test_control_flow.py +++ b/tests/python/frontend/tensorflow/test_control_flow.py @@ -53,7 +53,7 @@ def b(i): return tf.add(i, 1) r = tf.while_loop(c, b, [i]) - with tf.compat.v1.Session() as sess: + with tf.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -70,7 +70,7 @@ def b(i): return tf.add(i, 1) r = tf.while_loop(c, b, [i]) - with tf.compat.v1.Session() as sess: + with tf.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -89,7 +89,7 @@ def b(i, j): return [tf.add(i, 1), j] i1, i2 = tf.while_loop(c, b, loop_vars=[i0, j0]) i1 += tf.constant(1337) - with tf.compat.v1.Session() as sess: + with tf.Session() as sess: tf_out = sess.run(i1) check_equal(graph, tf_out) @@ -107,7 +107,7 @@ def c(i, j, k): return i < 10 def b(i, j, k): return [i+1, j * k, k + i] r = tf.while_loop(c, b, loop_vars=[i0, j0, k0]) - with tf.compat.v1.Session() as sess: + with tf.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -127,7 +127,7 @@ def c(i, j, k): return \ def b(i, j, k): return [i+j, j+k, k+1] r = tf.while_loop(c, b, loop_vars=[i, j, k]) - with tf.compat.v1.Session() as sess: + with tf.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -147,7 +147,7 @@ def condition(x): return tf.reduce_sum(x) < 100 x = tf.constant(0, shape=[2, 2]) r = tf.while_loop(condition, body, [x]) - with tf.compat.v1.Session() as sess: + with tf.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -170,7 +170,7 @@ def condition(x): x = tf.constant(3) r = tf.while_loop(condition, body, loop_vars=[x]) - with tf.compat.v1.Session() as sess: + with tf.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -189,7 +189,7 @@ def f2(): return tf.add(4, 23) r = tf.cond(tf.less(i, j), f1, f2) - with tf.compat.v1.Session(graph=graph) as sess: + with tf.Session(graph=graph) as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -204,7 +204,7 @@ def test_multiple_cond_vars(): r = tf.cond(tf.less(tf.add(x1, x2), 10), lambda: tf.add(10, 2), lambda: tf.square(5)) - with tf.compat.v1.Session() as sess: + with tf.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) @@ -224,7 +224,7 @@ def fn2(x, y): k = tf.constant(3) r = tf.cond(tf.less(i, j), lambda: fn1(i, k), lambda: fn2(j, k)) - with tf.compat.v1.Session() as sess: + with tf.Session() as sess: tf_out = sess.run(r, feed_dict={i: 1, j: 2, k: 3}) check_equal(graph, tf_out) @@ -252,7 +252,7 @@ def fn2(a, b): pred = tf.less(x, y) r = tf.cond(pred, lambda: fn1(x, y), lambda: fn2(y, z)) - with tf.compat.v1.Session() as sess: + with tf.Session() as sess: tf_out = sess.run(r, feed_dict={x: 1, y: 2, z: 3, pred: True}) check_equal(graph, tf_out) @@ -279,7 +279,7 @@ def fn2(a, b): pred = tf.less(x, y) r = tf.cond(pred, lambda: fn1(x, y), lambda: fn2(y, z)) - with tf.compat.v1.Session() as sess: + with tf.Session() as sess: tf_out = sess.run(r, feed_dict={x: 1, y: 2, z: 3, pred: True}) check_equal(graph, tf_out) @@ -300,7 +300,7 @@ def condition(x): return tf.less(x, 100) r = tf.while_loop(condition, body, loop_vars=[x]) - with tf.compat.v1.Session() as sess: + with tf.Session() as sess: tf_out = sess.run(r) check_equal(graph, tf_out) diff --git a/tests/python/frontend/tensorflow/test_debugging.py b/tests/python/frontend/tensorflow/test_debugging.py index 8dac612b4879..a6df6ffb63a1 100644 --- a/tests/python/frontend/tensorflow/test_debugging.py +++ b/tests/python/frontend/tensorflow/test_debugging.py @@ -17,7 +17,6 @@ """Unit tests for converting TensorFlow debugging ops to Relay.""" try: import tensorflow.compat.v1 as tf - tf.disable_v2_behavior() except ImportError: import tensorflow as tf @@ -25,7 +24,6 @@ from tvm import relay from tvm.relay.frontend.tensorflow import from_tensorflow - def run_relay(graph, shape_dict=None, *vars): mod, params = from_tensorflow( graph.as_graph_def(add_shapes=True), @@ -33,7 +31,6 @@ def run_relay(graph, shape_dict=None, *vars): ex = relay.create_executor('debug', mod=mod) return ex.evaluate()(*vars) - def test_assert_true(): g = tf.Graph() shape = (1, 2) @@ -77,7 +74,6 @@ def test_assert_true_var_capture(): np.testing.assert_allclose(True, run_relay(g, None, x_value).asnumpy()) - def test_assert_false(): g = tf.Graph() with g.as_default(): @@ -96,7 +92,6 @@ def test_assert_false(): # argument is false. np.testing.assert_allclose(0, run_relay(g).asnumpy()) - if __name__ == "__main__": test_assert_true() test_assert_true_var_capture() diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py index 76d2fe13aa49..b0409d7d0eac 100644 --- a/tests/python/frontend/tensorflow/test_forward.py +++ b/tests/python/frontend/tensorflow/test_forward.py @@ -3347,4 +3347,4 @@ def test_forward_isfinite(): test_read_variable_op() # Sharing params case using Mean ops - test_sharing_node() \ No newline at end of file + test_sharing_node() diff --git a/tests/python/frontend/tflite/test_forward.py b/tests/python/frontend/tflite/test_forward.py index eca5fb7d4b74..7a8437aaedd3 100644 --- a/tests/python/frontend/tflite/test_forward.py +++ b/tests/python/frontend/tflite/test_forward.py @@ -169,7 +169,7 @@ def compare_tflite_with_tvm(in_data, in_name, input_tensors, for i in range(len(in_name)): in_node[i] = in_name[i].split(':')[0] if ":" in in_name[i] else in_name[i] - with tf.compat.v1.Session() as sess: + with tf.Session() as sess: if init_global_variables: sess.run(variables.global_variables_initializer()) # convert to tflite model diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py index e5a7521ce721..168db3768ea0 100644 --- a/tests/python/relay/test_any.py +++ b/tests/python/relay/test_any.py @@ -672,7 +672,6 @@ def test_any_strided_slice(): verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70)) verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70), ignore_end=True) - def test_recursive_concat(): """ fn @concat_loop(%i: int32, %st: (any, 1)) -> (any, 1) { diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index 4b5e19223dca..36dc91c53030 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -323,7 +323,6 @@ def verify(dshape, begin, end, strides, output, assert "begin=" in text assert "end=" in text - if output: assert func.body.checked_type == relay.ty.TensorType(output, "float32") @@ -400,4 +399,3 @@ def verify(dshape, begin, end, strides, vshape, test_ref=True): test_where() test_reduce_functions() test_mean_var_std() - diff --git a/tests/python/relay/test_pass_combine_parallel_conv2d.py b/tests/python/relay/test_pass_combine_parallel_conv2d.py index c32e9ea39ab3..4db643e2b6f8 100644 --- a/tests/python/relay/test_pass_combine_parallel_conv2d.py +++ b/tests/python/relay/test_pass_combine_parallel_conv2d.py @@ -17,7 +17,6 @@ import tvm from tvm import relay from tvm.relay import transform -import numpy as np def run_combine_parallel(expr, min_num_branches=3): @@ -132,7 +131,7 @@ def check(x_shape, channels1, channels2): transform.CombineParallelConv2D(min_num_branches=2)) y_expected = expected(x, w1, w2, scale1, scale2, bias, channels1, channels2) y_expected = run_opt_pass(y_expected, transform.InferType()) - tvm.ir.structural_equal(y, y_expected, map_free_vars=True) + assert tvm.ir.structural_equal(y, y_expected, map_free_vars=True) check((1, 4, 16, 16), 4, 8) @@ -177,7 +176,7 @@ def check(x_shape, channels1, channels2): transform.CombineParallelConv2D(min_num_branches=2)) y_expected = expected(x, w1, w2, scale1, scale2, channels1, channels2) y_expected = run_opt_pass(y_expected, transform.InferType()) - tvm.ir.structural_equal(y, y_expected, map_free_vars=True) + assert tvm.ir.structural_equal(y, y_expected, map_free_vars=True) check((1, 4, 16, 16), 4, 8) @@ -219,7 +218,7 @@ def check(x_shape, repeat): transform.CombineParallelConv2D(min_num_branches=2)) y_expected = expected(x, w, out_c, repeat) y_expected = run_opt_pass(y_expected, transform.InferType()) - tvm.ir.structural_equal(y, y_expected, map_free_vars=True) + assert tvm.ir.structural_equal(y, y_expected, map_free_vars=True) check((1, 4, 16, 16), 4) diff --git a/tutorials/frontend/from_tensorflow.py b/tutorials/frontend/from_tensorflow.py index 1b97c442dbca..0ebd733ef9aa 100644 --- a/tutorials/frontend/from_tensorflow.py +++ b/tutorials/frontend/from_tensorflow.py @@ -101,7 +101,7 @@ # Call the utility to import the graph definition into default graph. graph_def = tf_testing.ProcessGraphDefParam(graph_def) # Add shapes to the graph. - with tf.compat.v1.Session() as sess: + with tf_compat_v1.Session() as sess: graph_def = tf_testing.AddShapesToGraphDef(sess, 'softmax') ###################################################################### @@ -218,7 +218,7 @@ def run_inference_on_image(image): # Creates graph from saved GraphDef. create_graph() - with tf.compat.v1.Session() as sess: + with tf_compat_v1.Session() as sess: softmax_tensor = sess.graph.get_tensor_by_name('softmax:0') predictions = sess.run(softmax_tensor, {'DecodeJpeg/contents:0': image_data}) From cbdca347899137d994853668e4da6b8df503f3c4 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Wed, 27 May 2020 01:03:29 +0800 Subject: [PATCH 14/22] partial ignore for end of strided_slice --- python/tvm/relay/_parser.py | 2 +- python/tvm/relay/op/_transform.py | 37 +++++++++- src/relay/analysis/util.cc | 7 ++ src/relay/op/tensor/transform.cc | 69 ++----------------- .../transforms/combine_parallel_conv2d.cc | 4 +- tests/python/relay/test_any.py | 43 ++++++++---- tests/python/relay/test_op_level4.py | 43 +++++++----- .../test_pass_combine_parallel_conv2d.py | 45 +++++++----- topi/include/topi/transform.h | 2 +- .../topi/testing/strided_slice_python.py | 13 ++-- 10 files changed, 141 insertions(+), 124 deletions(-) diff --git a/python/tvm/relay/_parser.py b/python/tvm/relay/_parser.py index 49f2d4d51321..9de1a1fcb874 100644 --- a/python/tvm/relay/_parser.py +++ b/python/tvm/relay/_parser.py @@ -114,7 +114,7 @@ def convert(self, v): def __call__(self, args, attrs, type_args): if attrs is None: attrs = {} - if self.operator is op.reshape: + if self.operator in (op.reshape, op.strided_slice): x = self.operator(*args) elif self.operator in (op.zeros, op.ones, op.full, op.broadcast_to): x = self.operator(*args, dtype=attrs["dtype"]) diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index 40fa1caf8943..4d665fab5eb1 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -99,10 +99,14 @@ def _arange_shape_func(start, stop, step): @_reg.register_shape_func("arange", True) def arange_shape_func(attrs, inputs, _): + """ + Shape func for arange + """ return [_arange_shape_func(*inputs)] @script -def _strided_slice_shape_func(data, begin, end, strides, ignore_end): +def _strided_slice_shape_func_input_data(data, begin, end, strides, + ignore_end): ndim = len(data.shape) out = output_tensor((ndim,), "int64") for i in const_range(ndim): @@ -119,10 +123,37 @@ def _strided_slice_shape_func(data, begin, end, strides, ignore_end): out[i] = int64(ceil_div((int64(cend) - int64(cbegin)), int64(cstride))) return out +@script +def _strided_slice_shape_func_input_shape(data_shape, begin, end, strides, ignore_end): + ndim = data_shape.shape[0] + assert ndim == 2, "not correct" + out = output_tensor((ndim,), "int64") + for i in const_range(ndim): + cbegin = int64(0) + cend = int64(data_shape[i]) + cstride = int64(1) + if len(begin) > i: + cbegin = int64(begin[i]) + if len(end) > i: + cend = int64(end[i]) + if ignore_end != 0 and len(strides) > i: + cstride = int64(strides[i]) + assert cstride != 0, "Strides can't be zero." + out[i] = int64(ceil_div((int64(cend) - int64(cbegin)), int64(cstride))) + return out + + @_reg.register_shape_func("strided_slice", True) def strided_slice_shape_func(attrs, inputs, _): - ignore_end = attrs.ignore_end - return [_strided_slice_shape_func(*inputs, convert(get_const_int(ignore_end)))] + """ + Shape func for strided_slice + """ + ignore_end = convert(get_const_int(attrs.ignore_end)) + # data independent if begin, end and strides exist + if attrs.begin and attrs.end and attrs.strides: + return [_strided_slice_shape_func_input_shape(inputs[0], attrs.begin, attrs.end, + attrs.strides, ignore_end)] + return [_strided_slice_shape_func_input_data(*inputs, ignore_end)] @script def _concatenate_shape_func(inputs, axis): diff --git a/src/relay/analysis/util.cc b/src/relay/analysis/util.cc index 2853165df4ca..1d9f6a1d7181 100644 --- a/src/relay/analysis/util.cc +++ b/src/relay/analysis/util.cc @@ -458,6 +458,13 @@ bool IsDataDependant(const CallNode* call) { return false; } } + } else if (op->name == "strided_slice") { + if (const auto* attrs = call->attrs.as()) { + if (attrs->begin && attrs->end && attrs->strides) { + // not data dependant if begin, end and strides exist + return false; + } + } } return tshape_data_dependant[op]; diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index 3597e0235eb5..042541b12b54 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1674,66 +1674,6 @@ Array GetIntArray(Array arr) { // strided_slice TVM_REGISTER_NODE_TYPE(StridedSliceAttrs); -int64_t* ToVector(const runtime::NDArray& array) { - size_t len = array.Shape().front(); - int64_t* rel_vec = new int64_t[len]; - if (array->dtype.code == kDLInt) { - if (array->dtype.bits == 8) { - int8_t* init_array = reinterpret_cast(array->data); - for (size_t i = 0; i < len; ++i) { - rel_vec[i] = int64_t(init_array[i]); - } - return rel_vec; - } else if (array->dtype.bits == 16) { - int16_t* init_array = reinterpret_cast(array->data); - for (size_t i = 0; i < len; ++i) { - rel_vec[i] = int64_t(init_array[i]); - } - return rel_vec; - } else if (array->dtype.bits == 32) { - int32_t* init_array = reinterpret_cast(array->data); - for (size_t i = 0; i < len; ++i) { - rel_vec[i] = int64_t(init_array[i]); - } - return rel_vec; - } else if (array->dtype.bits == 64) { - int64_t* init_array = reinterpret_cast(array->data); - for (size_t i = 0; i < len; ++i) { - rel_vec[i] = int64_t(init_array[i]); - } - return rel_vec; - } - } else if (array->dtype.code == kDLUInt) { - if (array->dtype.bits == 8) { - uint8_t* init_array = reinterpret_cast(array->data); - for (size_t i = 0; i < len; ++i) { - rel_vec[i] = int64_t(init_array[i]); - } - return rel_vec; - } else if (array->dtype.bits == 16) { - uint16_t* init_array = reinterpret_cast(array->data); - for (size_t i = 0; i < len; ++i) { - rel_vec[i] = int64_t(init_array[i]); - } - return rel_vec; - } else if (array->dtype.bits == 32) { - uint32_t* init_array = reinterpret_cast(array->data); - for (size_t i = 0; i < len; ++i) { - rel_vec[i] = int64_t(init_array[i]); - } - return rel_vec; - } else if (array->dtype.bits == 64) { - uint64_t* init_array = reinterpret_cast(array->data); - for (size_t i = 0; i < len; ++i) { - rel_vec[i] = int64_t(init_array[i]); - } - return rel_vec; - } - } - LOG(FATAL) << "Unknown data type: " << tvm::runtime::DLDataType2String(array->dtype); - return rel_vec; -} - bool StridedSliceRel(const Array& types, int num_inputs, const Attrs& attrs, const TypeReporter& reporter) { CHECK_EQ(types.size(), 5); @@ -1771,7 +1711,8 @@ bool StridedSliceRel(const Array& types, int num_inputs, const Attrs& attr std::vector end_vec; for (size_t i = 0; i < param->end.value().size(); ++i) { // allow end to be None - if (param->ignore_end || (!param->end.value()[i].defined())) { + if (!param->end.value()[i].defined() || + (param->ignore_end && param->end.value()[i]->value < 0)) { end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); } else { end_vec.push_back(param->end.value()[i]->value); @@ -1894,6 +1835,9 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, } int64_t bg = begin[i].defined() ? begin[i]->value : 0; int64_t ed = end[i].defined() ? end[i]->value : shape[i].as()->value; + if (params->ignore_end && end[i].defined() && end[i]->value < 0) { + ed = shape[i].as()->value; + } if (bg % factor || ed % factor) { // transform to original layout return {{Layout::Undef()}, {Layout::Undef()}}; @@ -1912,7 +1856,6 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, inline te::Tensor DynamicStridedSlice(const te::Tensor& input, const te::Tensor& begin, const te::Tensor& end, const te::Tensor& strides, - const bool& ignore_end, std::string name = "T_strided_slice_dynamic", std::string tag = topi::kInjective) { int64_t src_tensor_dim = input->shape.size(); @@ -1956,7 +1899,7 @@ Array StridedSliceCompute(const Attrs& attrs, const Arrayshape[0].as()->value == attr_size) << "begin, end, and strides are required to have the same length" << " if they are non-constant."; - return Array{DynamicStridedSlice(data, begin, end, strides, param->ignore_end)}; + return Array{DynamicStridedSlice(data, begin, end, strides)}; } } diff --git a/src/relay/transforms/combine_parallel_conv2d.cc b/src/relay/transforms/combine_parallel_conv2d.cc index fa8677372d35..04ed35b709e5 100644 --- a/src/relay/transforms/combine_parallel_conv2d.cc +++ b/src/relay/transforms/combine_parallel_conv2d.cc @@ -172,7 +172,7 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { Array end; for (size_t i = 0; i < channel_pos_; i++) { begin.push_back(0); - end.push_back(channels); + end.push_back(-1); } begin.push_back(index); index += channels; @@ -196,7 +196,7 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { } auto slice = MakeStridedSlice(data, Constant(begin_ndarray), Constant(end_ndarray), - Constant(strides_ndarray), false); + Constant(strides_ndarray), true); subst_map->insert({GetRef(branch[depth]), slice}); } } diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py index 168db3768ea0..83c6244a7d72 100644 --- a/tests/python/relay/test_any.py +++ b/tests/python/relay/test_any.py @@ -643,27 +643,39 @@ def test_arange_with_dynamic_shape(): result = ex.evaluate()(data) tvm.testing.assert_allclose(result.asnumpy(), np.array(range(10)).astype("int32")+1) -def verify_any_strided_slice(data_shape, begin_shape, end_shape, - strides_shape, data_np_shape, ignore_end=False): +def verify_any_strided_slice(data_shape, begin_shape, end_shape, strides_shape, + data_np_shape, ignore_end=False, const_attrs=False, dtype="int32"): + # Generate random numpy input data + np_data = np.random.uniform(size=data_np_shape).astype('float32') + np_begin = np.random.randint(2, size=begin_shape, dtype=dtype) + np_end = np.random.randint(5, 15, size=end_shape, dtype=dtype) + np_strides = np.random.randint(1, 3, size=strides_shape, dtype=dtype) + # target numpy result + ref_res = topi.testing.strided_slice_python(np_data, np_begin, np_end, np_strides, ignore_end) + + # Relay Module mod = tvm.IRModule() data = relay.var('data', shape=data_shape, dtype='float32') - begin = relay.var('begin', shape=begin_shape, dtype="int32") - end = relay.var('end', shape=end_shape, dtype="int32") - strides = relay.var('strides', shape=strides_shape, dtype="int32") - y = relay.strided_slice(data, begin, end, strides, ignore_end) - mod["main"] = relay.Function([data, begin, end, strides], y) - - # Generate random numpy input data - data_np = np.random.uniform(size=data_np_shape).astype('float32') - begin_np = np.random.randint(2, size=begin_shape, dtype="int32") - end_np = np.random.randint(5, 15, size=end_shape, dtype="int32") - strides_np = np.random.randint(1, 3, size=strides_shape, dtype="int32") + if const_attrs: + begin = relay.const(np_begin, dtype) + end = relay.const(np_end, dtype) + strides = relay.const(np_strides, dtype) + args = [data] + np_inputs = [np_data] + else: + begin = relay.var('begin', shape=begin_shape, dtype=dtype) + end = relay.var('end', shape=end_shape, dtype=dtype) + strides = relay.var('strides', shape=strides_shape, dtype=dtype) + args = [data, begin, end, strides] + np_inputs = [np_data, np_begin, np_end, np_strides] - ref_res = topi.testing.strided_slice_python(data_np, begin_np, end_np, strides_np) + y = relay.strided_slice(data, begin=begin, end=end, + strides=strides, ignore_end=ignore_end) + mod["main"] = relay.Function(args, y) for kind in ["debug", "vm"]: ex = relay.create_executor(kind, mod=mod, ctx=tvm.cpu(), target="llvm") - result = ex.evaluate()(data_np, begin_np, end_np, strides_np) + result = ex.evaluate()(*np_inputs) tvm.testing.assert_allclose(result.asnumpy(), ref_res) def test_any_strided_slice(): @@ -671,6 +683,7 @@ def test_any_strided_slice(): verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (23, 29, 41)) verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70)) verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70), ignore_end=True) + verify_any_strided_slice(any_dims(2), (2,), (2,), (2,), (6, 7)) def test_recursive_concat(): """ diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index 36dc91c53030..081236badd02 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -296,25 +296,34 @@ def test_mean_var_std(): def test_strided_slice(): - def verify(dshape, begin, end, strides, output, - ignore_end=False, test_ref=True, dtype="int32"): + def verify(dshape, begin, end, strides, output, ignore_end=False, + attr_const=True, test_ref=True, dtype="int32"): x = relay.var("x", relay.TensorType(dshape, "float32")) ndim = len(dshape) begin = begin if begin else [0] * ndim end = end if end else list(dshape) - begin_expr = relay.const(begin, dtype=dtype) - end_expr = relay.const(end, dtype=dtype) + + # target numpy result + x_data = np.random.uniform(size=dshape).astype("float32") + ref_res = topi.testing.strided_slice_python( + x_data, begin, end, strides, ignore_end) + + if attr_const: + begin = relay.const(begin, dtype=dtype) + end = relay.const(end, dtype=dtype) + if strides: - strides_expr = relay.const(strides, dtype=dtype) + if attr_const: + strides = relay.const(strides, dtype=dtype) z = relay.strided_slice(x, - begin=begin_expr, - end=end_expr, - strides=strides_expr, + begin=begin, + end=end, + strides=strides, ignore_end=ignore_end) else: z = relay.strided_slice(x, - begin=begin_expr, - end=end_expr, + begin=begin, + end=end, ignore_end=ignore_end) func = relay.Function([x], z) @@ -328,26 +337,26 @@ def verify(dshape, begin, end, strides, output, if not test_ref: return - x_data = np.random.uniform(size=dshape).astype("float32") - ref_res = topi.testing.strided_slice_python( - x_data, begin, end, strides) for target, ctx in ctx_list(): intrp = relay.create_executor("graph", ctx=ctx, target=target) op_res = intrp.evaluate(func)(x_data) tvm.testing.assert_allclose(op_res.asnumpy(), ref_res) - verify((1, 224, 224, 3), [0, 20, 20, 0], [1, 140, 140, 3], [1, 1, 1, 1], - (1, 120, 120, 3), dtype="int64") + verify((1, 224, 224, 3), [0, 20, 20, 0], [1, 140, 140, 3], + [1, 1, 1, 1], (1, 120, 120, 3), dtype="int64") verify((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], (1, 3, 3), dtype="int16") verify((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2], (3, 1, 2)) - verify((3, 4, 3), [1, 0, 0], [2, 2, 3], [1, 1, 2], (2, 4, 2), - ignore_end=True, test_ref=False) + verify((3, 4, 3), [0, 0, 0], [4, -5, 4], [1, -1, 2], (3, 1, 2), attr_const=False) verify((3, 4, 3), [1, 1, 0], [4, 4, 3], None, (2, 3, 3)) verify((3, 4, 3), [1, 1, 0], [4, 1000, 3], None, (2, 3, 3)) verify((3, 4, 3), [1, 1, 0], [4, 4], None, (2, 3, 3)) verify((3, 4, 3), [1, 1], [4, 4, 3], None, (2, 3, 3)) verify((3, 4, 3), [1, -1, 0], [4, -5, 3], [2, -1, 1], (1, 4, 3)) verify((3, 4, 3), [1, -1, 0], [2, -3, 3], [1, -1, 1], (1, 2, 3)) + verify((3, 4, 3), [1, 0, 0], [3, -1, 3], [1, 1, 2], + (2, 4, 2), ignore_end=True, test_ref=False) + verify((3, 4, 3), [1, 0, 0], [-1, 2, 3], [1, 1, 2], + (2, 2, 2), ignore_end=True, test_ref=True) def test_strided_set(): def verify(dshape, begin, end, strides, vshape, test_ref=True): diff --git a/tests/python/relay/test_pass_combine_parallel_conv2d.py b/tests/python/relay/test_pass_combine_parallel_conv2d.py index 4db643e2b6f8..28c9655808f5 100644 --- a/tests/python/relay/test_pass_combine_parallel_conv2d.py +++ b/tests/python/relay/test_pass_combine_parallel_conv2d.py @@ -51,17 +51,20 @@ def expected(x, w1, w2, w3, w4, channels1, channels2, channels3, channels4): y = relay.nn.conv2d(x, w, channels=channels1 + channels2 + channels4) y1 = relay.strided_slice(y, begin=relay.const([0, 0], "int64"), - end=relay.const([channels1, channels1], "int64"), - strides=relay.const([1, 1], 'int64')) + end=relay.const([-1, channels1], "int64"), + strides=relay.const([1, 1], 'int64'), + ignore_end=True) y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), - end=relay.const([channels2, channels1 + channels2], "int64"), - strides=relay.const([1, 1], 'int64')) + end=relay.const([-1, channels1 + channels2], "int64"), + strides=relay.const([1, 1], 'int64'), + ignore_end=True) y3 = relay.nn.conv2d(x, w3) y4 = relay.strided_slice(y, begin=relay.const([0, channels1 + channels2], "int64"), - end=relay.const([channels4, channels1 + channels2 + channels4], "int64"), - strides=relay.const([1, 1], 'int64')) + end=relay.const([-1, channels1 + channels2 + channels4], "int64"), + strides=relay.const([1, 1], 'int64'), + ignore_end=True) y5 = relay.nn.max_pool2d(x) y = relay.Tuple((y1, y2, y3, y4, y5)) return relay.Function(args, y) @@ -108,12 +111,14 @@ def expected(x, w1, w2, scale1, scale2, bias, channels1, channels2): y = relay.nn.relu(y) y1 = relay.strided_slice(y, begin=relay.const([0, 0], "int64"), - end=relay.const([4, channels1], "int64"), - strides=relay.const([1, 1], "int64")) + end=relay.const([-1, channels1], "int64"), + strides=relay.const([1, 1], "int64"), + ignore_end=True) y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), - end=relay.const([8, channels1 + channels2], "int64"), - strides=relay.const([1, 1], "int64")) + end=relay.const([-1, channels1 + channels2], "int64"), + strides=relay.const([1, 1], "int64"), + ignore_end=True) y2 = relay.add(y2, bias) y = relay.Tuple((y1, y2)) return relay.Function(args, y) @@ -153,12 +158,14 @@ def expected(x, w1, w2, scale1, scale2, channels1, channels2): y = relay.nn.conv2d(x, w, channels=channels1 + channels2) y1 = relay.strided_slice(y, begin=relay.const([0, 0], "int64"), - end=relay.const([4, channels1], "int64"), - strides=relay.const([1, 1], "int64")) + end=relay.const([-1, channels1], "int64"), + strides=relay.const([1, 1], "int64"), + ignore_end=True) y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), - end=relay.const([8, channels1 + channels2], "int64"), - strides=relay.const([1, 1], "int64")) + end=relay.const([-1, channels1 + channels2], "int64"), + strides=relay.const([1, 1], "int64"), + ignore_end=True) y1 = relay.multiply(y1, scale1) y2 = relay.multiply(y2, scale2) y = relay.Tuple((y1, y2)) @@ -199,12 +206,14 @@ def expected(x, w, channels, repeat): y = relay.nn.conv2d(y, w_concat, channels=channels*2) y1 = relay.strided_slice(y, begin=relay.const([0, 0], "int64"), - end=relay.const([2, channels], "int64"), - strides=relay.const([1, 1], "int64")) + end=relay.const([-1, channels], "int64"), + strides=relay.const([1, 1], "int64"), + ignore_end=True) y2 = relay.strided_slice(y, begin=relay.const([0, channels], "int64"), - end=relay.const([2, channels * 2], "int64"), - strides=relay.const([1, 1], "int64")) + end=relay.const([-1, channels * 2], "int64"), + strides=relay.const([1, 1], "int64"), + ignore_end=True) y = relay.concatenate((y1, y2), axis=1) return relay.Function(args, y) diff --git a/topi/include/topi/transform.h b/topi/include/topi/transform.h index 2b26f75d09e3..9d268a9afc74 100644 --- a/topi/include/topi/transform.h +++ b/topi/include/topi/transform.h @@ -560,7 +560,7 @@ inline Tensor strided_slice(const Tensor& x, const Array& begin, const std::vector end_vec; for (size_t i = 0; i < end.size(); ++i) { // allow end to be None - if (ignore_end || (!end[i].defined())) { + if (!end[i].defined() || (ignore_end && end[i]->value < 0)) { end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); } else { end_vec.push_back(end[i]->value); diff --git a/topi/python/topi/testing/strided_slice_python.py b/topi/python/topi/testing/strided_slice_python.py index b21c3fb87119..62aface75057 100644 --- a/topi/python/topi/testing/strided_slice_python.py +++ b/topi/python/topi/testing/strided_slice_python.py @@ -45,10 +45,15 @@ def strided_slice_python(data, begin, end, strides, ignore_end=False): strides = [] if strides is None else strides slices = [] for i in range(len(data.shape)): - slices.append(slice( - begin[i] if i < len(begin) else None, - end[i] if i < len(end) and not ignore_end else None, - strides[i] if i < len(strides) else None)) + bg = begin[i] if i < len(begin) else None + if i >= len(end) or (ignore_end and end[i] < 0): + ed = None + else: + ed = end[i] + sd = strides[i] if i < len(strides) else None + slices.append(slice(bg, + ed, + sd)) return data[tuple(slices)] From eb10cbe86ea2bf1bf0e4e9f8a8fa5b04fd1de016 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Wed, 27 May 2020 01:36:35 +0800 Subject: [PATCH 15/22] pylint --- include/tvm/relay/attrs/transform.h | 2 +- python/tvm/relay/op/transform.py | 3 ++- src/relay/op/tensor/transform.cc | 2 +- tests/python/relay/test_any.py | 6 ++++-- topi/include/topi/transform.h | 2 +- topi/python/topi/testing/strided_slice_python.py | 16 ++++++++-------- topi/python/topi/transform.py | 2 +- 7 files changed, 18 insertions(+), 15 deletions(-) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index 0485dbf914fb..c76b867f3b13 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -221,7 +221,7 @@ struct StridedSliceAttrs : public tvm::AttrsNode { TVM_ATTR_FIELD(strides).describe("Stride values of the slice"); TVM_ATTR_FIELD(ignore_end) .set_default(false) - .describe("Whether to ignore the input end and infer value of end from input data"); + .describe("Whether to ignore the negative elements in input end."); } }; diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index 6033aae3e960..8e10e7c7775b 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -630,7 +630,8 @@ def strided_slice(data, begin, end, strides=None, ignore_end=False): the input tensor will be reversed in that particular axis. ignore_end: boolean, optional - Whether to ignore input end. + Whether to ignore the negative elements in input end, + will slice to the end of data for the ignored element. Returns ------- diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index 042541b12b54..fe320a5e0d2b 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1962,7 +1962,7 @@ Examples:: .add_argument("begin", "Tensor", "The indices to begin with in the slicing.") .add_argument("end", "Tensor", "Indices indicating end of the slice.") .add_argument("strides", "Tensor", "The stride values.") - .add_argument("ignore_end", "Tensor", "Whether to ignore end.") + .add_argument("ignore_end", "Tensor", "Whether to ignore negative elements of input end.") .set_support_level(4) .set_attrs_type() .add_type_rel("StridedSlice", StridedSliceRel) diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py index 83c6244a7d72..4c131715d532 100644 --- a/tests/python/relay/test_any.py +++ b/tests/python/relay/test_any.py @@ -678,12 +678,14 @@ def verify_any_strided_slice(data_shape, begin_shape, end_shape, strides_shape, result = ex.evaluate()(*np_inputs) tvm.testing.assert_allclose(result.asnumpy(), ref_res) + def test_any_strided_slice(): + verify_any_strided_slice(any_dims(2), (2,), (2,), (2,), (15, 21)) verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (15, 17, 21)) verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (23, 29, 41)) verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70)) verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70), ignore_end=True) - verify_any_strided_slice(any_dims(2), (2,), (2,), (2,), (6, 7)) + def test_recursive_concat(): """ @@ -810,7 +812,7 @@ def test_mixed_input_type(): ex = relay.create_executor(kind, mod=mod, ctx=tvm.cpu(), target="llvm") result = ex.evaluate()([[data_np0, data_np0], data_np0], data_np1) assert result.asnumpy().shape == ref_out_shape, \ - "Shape mismatch: expect %s but got %s." % (str(ref_out_shape), str(ret.asnumpy().shape)) + "Shape mismatch: expect %s but got %s." % (str(ref_out_shape), str(result.asnumpy().shape)) if __name__ == "__main__": test_any_full() diff --git a/topi/include/topi/transform.h b/topi/include/topi/transform.h index 9d268a9afc74..8201a50335ef 100644 --- a/topi/include/topi/transform.h +++ b/topi/include/topi/transform.h @@ -520,7 +520,7 @@ inline Array split(const Tensor& x, Array split_indices, int ax * \param begin The indices to begin with in the slicing * \param end Indicies indicating end of the slice * \param strides Specifies the stride values, it can be negative - * \param ignore_end Specifies whether to ignore input end + * \param ignore_end Specifies whether to ignore negative elements of input end * in that case, the input tensor will be reversed in that particular axis * \param name The name of the operation * \param tag The tag to mark the operation diff --git a/topi/python/topi/testing/strided_slice_python.py b/topi/python/topi/testing/strided_slice_python.py index 62aface75057..c60f05fef66d 100644 --- a/topi/python/topi/testing/strided_slice_python.py +++ b/topi/python/topi/testing/strided_slice_python.py @@ -35,7 +35,7 @@ def strided_slice_python(data, begin, end, strides, ignore_end=False): The stride of each slice. ignore_end : boolean - Whether to ignore input end + Whether to ignore negative elements of input end Returns ------- @@ -45,15 +45,15 @@ def strided_slice_python(data, begin, end, strides, ignore_end=False): strides = [] if strides is None else strides slices = [] for i in range(len(data.shape)): - bg = begin[i] if i < len(begin) else None + new_begin = begin[i] if i < len(begin) else None if i >= len(end) or (ignore_end and end[i] < 0): - ed = None + new_end = None else: - ed = end[i] - sd = strides[i] if i < len(strides) else None - slices.append(slice(bg, - ed, - sd)) + new_end = end[i] + new_stride = strides[i] if i < len(strides) else None + slices.append(slice(new_begin, + new_end, + new_stride)) return data[tuple(slices)] diff --git a/topi/python/topi/transform.py b/topi/python/topi/transform.py index fe7ceb6015db..3e479db1c02b 100644 --- a/topi/python/topi/transform.py +++ b/topi/python/topi/transform.py @@ -151,7 +151,7 @@ def strided_slice(a, begin, end, strides=None, ignore_end=False): in that particular axis. ignore_end: boolean, optional - Specifies whether to ignore input end. + Specifies whether to ignore negative elements of input end. Returns ------- From f422d8e8605f6c8222f2682b5910d3c35bddabcc Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 28 May 2020 01:24:38 +0800 Subject: [PATCH 16/22] add out_indices for gpu get_valid_counts --- python/tvm/relay/frontend/tensorflow.py | 1 + python/tvm/relay/frontend/tflite.py | 2 +- .../frontend/tensorflow/test_forward.py | 3 ++- tests/python/relay/test_op_level5.py | 6 ++++-- .../python/relay/test_pass_alter_op_layout.py | 2 +- topi/python/topi/cuda/conv2d_alter_op.py | 3 ++- topi/python/topi/cuda/nms.py | 19 +++++++++++++------ 7 files changed, 24 insertions(+), 12 deletions(-) diff --git a/python/tvm/relay/frontend/tensorflow.py b/python/tvm/relay/frontend/tensorflow.py index 34fc4893773e..d4c658ab44e4 100644 --- a/python/tvm/relay/frontend/tensorflow.py +++ b/python/tvm/relay/frontend/tensorflow.py @@ -612,6 +612,7 @@ def _impl(inputs, attr, params, mod): out = _op.transpose(out, axes=(0, 2, 3, 4, 1)) return out + return _impl def _nms(): def _impl(inputs, attr, params, mod): diff --git a/python/tvm/relay/frontend/tflite.py b/python/tvm/relay/frontend/tflite.py index cb10ce5ee924..7868aefc2996 100644 --- a/python/tvm/relay/frontend/tflite.py +++ b/python/tvm/relay/frontend/tflite.py @@ -2439,7 +2439,7 @@ def convert_detection_postprocess(self, op): ret = _op.vision.multibox_transform_loc(cls_pred, loc_prob, anchor_expr, **multibox_transform_loc_attrs) - ret = _op.vision.non_max_suppression(ret[0], ret[1], **non_max_suppression_attrs) + ret = _op.vision.non_max_suppression(ret[0], ret[1], ret[1], **non_max_suppression_attrs) ret = _op.vision.get_valid_counts(ret, 0) valid_count = ret[0] # keep only the top 'max_detections' rows diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py index b0409d7d0eac..403df7018c6f 100644 --- a/tests/python/frontend/tensorflow/test_forward.py +++ b/tests/python/frontend/tensorflow/test_forward.py @@ -469,7 +469,7 @@ def test_forward_convolution(): ####################################################################### # Convolution3D -# ----------- +# ------------- def _test_convolution3d(opname, tensor_in_sizes, filter_in_sizes, @@ -3313,6 +3313,7 @@ def test_forward_isfinite(): # NN test_forward_convolution() + test_forward_convolution3d() test_forward_pooling() test_forward_concat_v2() test_forward_lrn() diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index c20a66729712..40842ebcfde2 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -317,11 +317,13 @@ def verify_nms(x0_data, x1_data, x2_data, dshape, ref_res, ref_indices_res, intrp1 = relay.create_executor("graph", ctx=ctx, target=target) op_res1 = intrp1.evaluate(func)(x0_data, x1_data, x2_data) tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5) - op_indices_res1 = intrp1.evaluate(func_indices)(x0_data, x1_data, x2_data) - tvm.testing.assert_allclose(op_indices_res1[0].asnumpy(), ref_indices_res, rtol=1e-5) intrp2 = relay.create_executor("debug", ctx=ctx, target=target) op_res2 = intrp2.evaluate(func)(x0_data, x1_data, x2_data) tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5) + if target == 'cuda': + return + op_indices_res1 = intrp1.evaluate(func_indices)(x0_data, x1_data, x2_data) + tvm.testing.assert_allclose(op_indices_res1[0].asnumpy(), ref_indices_res, rtol=1e-5) op_indices_res2 = intrp2.evaluate(func_indices)(x0_data, x1_data, x2_data) tvm.testing.assert_allclose(op_indices_res2[0].asnumpy(), ref_indices_res, rtol=1e-5) diff --git a/tests/python/relay/test_pass_alter_op_layout.py b/tests/python/relay/test_pass_alter_op_layout.py index ee4a27c20316..bbe10c773ff9 100644 --- a/tests/python/relay/test_pass_alter_op_layout.py +++ b/tests/python/relay/test_pass_alter_op_layout.py @@ -644,7 +644,7 @@ def expected(): y = relay.strided_slice(y, begin=relay.const([0, 4], "int32"), - end=relay.const([1, 21], "int32"), # [1, 8] + end=relay.const([1, 21], "int32"), strides=relay.const([1, 1], "int32")) y = relay.layout_transform(y, "NCHW4c", "NCHW") diff --git a/topi/python/topi/cuda/conv2d_alter_op.py b/topi/python/topi/cuda/conv2d_alter_op.py index c1e207cc2938..c2a19054434e 100644 --- a/topi/python/topi/cuda/conv2d_alter_op.py +++ b/topi/python/topi/cuda/conv2d_alter_op.py @@ -246,7 +246,8 @@ def _conv2d_legalize(attrs, inputs, arg_types): new_attrs['channels'] = new_out_channel out = tvm.relay.nn.conv2d(data, kernel, **new_attrs) original_out_shape = [x.value for x in output_tensor.shape] - out = relay.strided_slice(out, begin=(0, 0, 0, 0), end=original_out_shape) + out = relay.strided_slice(out, begin=relay.const([0, 0, 0, 0]), + end=relay.const(original_out_shape)) else: out = relay.nn.conv2d(data, kernel, **new_attrs) return out diff --git a/topi/python/topi/cuda/nms.py b/topi/python/topi/cuda/nms.py index 2a206f6cbe68..c72cdad0454c 100644 --- a/topi/python/topi/cuda/nms.py +++ b/topi/python/topi/cuda/nms.py @@ -43,7 +43,8 @@ def atomic_add(x, y): return tvm.tir.call_pure_intrin(y.dtype, "atomic_add", x, y) -def get_valid_counts_ir(data, valid_count, out, score_threshold, id_index, score_index): +def get_valid_counts_ir(data, valid_count, out, out_indices, + score_threshold, id_index, score_index): """Low level IR to get valid count of bounding boxes given a score threshold. Also prepares to move valid boxes to the top of input data. @@ -83,6 +84,7 @@ def get_valid_counts_ir(data, valid_count, out, score_threshold, id_index, score valid_count = ib.buffer_ptr(valid_count) out = ib.buffer_ptr(out) + out_indices = ib.buffer_ptr(out_indices) atomic_add_return = ib.allocate( valid_count.dtype, (1,), name='atomic_add_return', scope='local') one_count = tvm.tir.const(1, dtype=valid_count.dtype) @@ -115,9 +117,11 @@ def get_valid_counts_ir(data, valid_count, out, score_threshold, id_index, score valid_count[i]), one_count) with ib.for_range(0, elem_length) as k: out[tid * elem_length + k] = data[tid * elem_length + k] + out_indices[tid + k] = tid + k with ib.else_scope(): with ib.for_range(0, elem_length) as k: out[tid * elem_length + k] = -one + out_indices[tid + k] = -one_count return ib.get() @@ -149,24 +153,27 @@ def get_valid_counts(data, score_threshold=0, id_index=0, score_index=1): Rearranged data tensor. """ batch_size = data.shape[0] + num_anchors = data.shape[1] data_buf = tvm.tir.decl_buffer( data.shape, data.dtype, "data_buf", data_alignment=8) valid_count_buf = tvm.tir.decl_buffer( (batch_size,), "int32", "valid_count_buf", data_alignment=8) out_buf = tvm.tir.decl_buffer( data.shape, data.dtype, "out_buf", data_alignment=8) + out_indices_buf = tvm.tir.decl_buffer( + (batch_size, num_anchors), "int32", "out_buf", data_alignment=8) - valid_count, out = \ - te.extern([(batch_size,), data.shape], [data], + valid_count, out, out_indices = \ + te.extern([(batch_size,), data.shape, (batch_size, num_anchors)], [data], lambda ins, outs: get_valid_counts_ir( - ins[0], outs[0], outs[1], score_threshold, id_index, score_index), + ins[0], outs[0], outs[1], outs[2], score_threshold, id_index, score_index), dtype=["int32", data.dtype], in_buffers=[data_buf], - out_buffers=[valid_count_buf, out_buf], + out_buffers=[valid_count_buf, out_buf, out_indices_buf], name="get_valid_counts", tag="get_valid_counts_gpu") - return [valid_count, out] + return [valid_count, out, out_indices] def nms_ir(data, sorted_index, valid_count, out, box_indices, From 6caa7562a6ee52930841936cc848cb3267c0e2db Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Fri, 29 May 2020 00:22:13 +0800 Subject: [PATCH 17/22] change to slice_mode --- include/tvm/relay/attrs/transform.h | 4 +- python/tvm/relay/op/_transform.py | 40 ++++++++++++++----- python/tvm/relay/op/transform.py | 6 +-- src/relay/op/tensor/transform.cc | 32 +++++++++++---- src/relay/transforms/pattern_util.h | 2 +- tests/python/relay/test_any.py | 8 ++-- tests/python/relay/test_op_level4.py | 12 +++--- .../test_pass_combine_parallel_conv2d.py | 18 ++++----- topi/include/topi/transform.h | 15 +++++-- .../topi/testing/strided_slice_python.py | 17 ++++++-- topi/python/topi/transform.py | 6 +-- 11 files changed, 106 insertions(+), 54 deletions(-) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index c76b867f3b13..42e894bc0c8b 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -213,13 +213,13 @@ struct StridedSliceAttrs : public tvm::AttrsNode { Optional> begin; Optional> end; Optional> strides; - bool ignore_end; + bool slice_mode; TVM_DECLARE_ATTRS(StridedSliceAttrs, "relay.attrs.StridedSliceAttrs") { TVM_ATTR_FIELD(begin).describe("Indices for begin of slice, begin index is also inclusive"); TVM_ATTR_FIELD(end).describe("Indices for end of slice, end index is exclusive"); TVM_ATTR_FIELD(strides).describe("Stride values of the slice"); - TVM_ATTR_FIELD(ignore_end) + TVM_ATTR_FIELD(slice_mode) .set_default(false) .describe("Whether to ignore the negative elements in input end."); } diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index 4d665fab5eb1..3a22c16be3fa 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -106,25 +106,34 @@ def arange_shape_func(attrs, inputs, _): @script def _strided_slice_shape_func_input_data(data, begin, end, strides, - ignore_end): + slice_mode): ndim = len(data.shape) out = output_tensor((ndim,), "int64") for i in const_range(ndim): cbegin = 0 cend = data.shape[i] cstride = 1 + if strides.shape[0] > i: + cstride = strides[i] if begin.shape[0] > i: cbegin = begin[i] - if ignore_end != 0 or end.shape[0] > i: + if end.shape[0] <= i: + cend = data.shape[i] + elif slice_mode != 0: + if end[i] < 0: + cend = data.shape[i] + elif cstride < 0: + cend = cbegin - end[i] + else: + cend = cbegin + end[i] + else: cend = end[i] - if strides.shape[0] > i: - cstride = strides[i] assert cstride != 0, "Strides can't be zero." out[i] = int64(ceil_div((int64(cend) - int64(cbegin)), int64(cstride))) return out @script -def _strided_slice_shape_func_input_shape(data_shape, begin, end, strides, ignore_end): +def _strided_slice_shape_func_input_shape(data_shape, begin, end, strides, slice_mode): ndim = data_shape.shape[0] assert ndim == 2, "not correct" out = output_tensor((ndim,), "int64") @@ -132,12 +141,21 @@ def _strided_slice_shape_func_input_shape(data_shape, begin, end, strides, ignor cbegin = int64(0) cend = int64(data_shape[i]) cstride = int64(1) + if len(strides) > i: + cstride = int64(strides[i]) if len(begin) > i: cbegin = int64(begin[i]) - if len(end) > i: + if len(end) <= i: + cend = int64(data_shape[i]) + elif slice_mode != 0: + if end[i] < 0: + cend = int64(data_shape[i]) + elif cstride < 0: + cend = cbegin - int64(end[i]) + else: + cend = cbegin + int64(end[i]) + else: cend = int64(end[i]) - if ignore_end != 0 and len(strides) > i: - cstride = int64(strides[i]) assert cstride != 0, "Strides can't be zero." out[i] = int64(ceil_div((int64(cend) - int64(cbegin)), int64(cstride))) return out @@ -148,12 +166,12 @@ def strided_slice_shape_func(attrs, inputs, _): """ Shape func for strided_slice """ - ignore_end = convert(get_const_int(attrs.ignore_end)) + slice_mode = convert(get_const_int(attrs.slice_mode)) # data independent if begin, end and strides exist if attrs.begin and attrs.end and attrs.strides: return [_strided_slice_shape_func_input_shape(inputs[0], attrs.begin, attrs.end, - attrs.strides, ignore_end)] - return [_strided_slice_shape_func_input_data(*inputs, ignore_end)] + attrs.strides, slice_mode)] + return [_strided_slice_shape_func_input_data(*inputs, slice_mode)] @script def _concatenate_shape_func(inputs, axis): diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index 8e10e7c7775b..6e52f7896d2b 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -611,7 +611,7 @@ def split(data, indices_or_sections, axis=0): return TupleWrapper(_make.split(data, indices_or_sections, axis), ret_size) -def strided_slice(data, begin, end, strides=None, ignore_end=False): +def strided_slice(data, begin, end, strides=None, slice_mode=False): """Strided slice of an array. Parameters @@ -629,7 +629,7 @@ def strided_slice(data, begin, end, strides=None, ignore_end=False): Specifies the stride values, it can be negative in that case, the input tensor will be reversed in that particular axis. - ignore_end: boolean, optional + slice_mode: boolean, optional Whether to ignore the negative elements in input end, will slice to the end of data for the ignored element. @@ -645,7 +645,7 @@ def strided_slice(data, begin, end, strides=None, ignore_end=False): end = const(list(end)) if isinstance(strides, list): strides = const(list(strides)) - return _make.strided_slice(data, begin, end, strides, ignore_end) + return _make.strided_slice(data, begin, end, strides, slice_mode) def strided_set(data, v, begin, end, strides=None): diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index fe320a5e0d2b..a405aca73983 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1711,9 +1711,16 @@ bool StridedSliceRel(const Array& types, int num_inputs, const Attrs& attr std::vector end_vec; for (size_t i = 0; i < param->end.value().size(); ++i) { // allow end to be None - if (!param->end.value()[i].defined() || - (param->ignore_end && param->end.value()[i]->value < 0)) { + if (!param->end.value()[i].defined()) { end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); + } else if (param->slice_mode) { + if (param->end.value()[i]->value < 0) { + end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); + } else if (stride_vec[i] < 0) { + end_vec.push_back(begin_vec[i] - param->end.value()[i]->value); + } else { + end_vec.push_back(begin_vec[i] + param->end.value()[i]->value); + } } else { end_vec.push_back(param->end.value()[i]->value); } @@ -1834,10 +1841,19 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, } } int64_t bg = begin[i].defined() ? begin[i]->value : 0; - int64_t ed = end[i].defined() ? end[i]->value : shape[i].as()->value; - if (params->ignore_end && end[i].defined() && end[i]->value < 0) { + int64_t ed; + if (!end[i].defined()) { ed = shape[i].as()->value; + } else if (params->slice_mode) { + if (end[i]->value < 0) { + ed = shape[i].as()->value; + } else { + ed = bg + end[i]->value; + } + } else { + ed = end[i]->value; } + if (bg % factor || ed % factor) { // transform to original layout return {{Layout::Undef()}, {Layout::Undef()}}; @@ -1886,7 +1902,7 @@ Array StridedSliceCompute(const Attrs& attrs, const Arrayend.value(); strides = param->strides.value(); return Array{ - topi::strided_slice(inputs[0], begin, end, strides, param->ignore_end)}; + topi::strided_slice(inputs[0], begin, end, strides, param->slice_mode)}; } else { te::Tensor data = inputs[0]; te::Tensor begin = inputs[1]; @@ -1904,7 +1920,7 @@ Array StridedSliceCompute(const Attrs& attrs, const Array(); const ConstantNode *cbegin, *cend, *cstrides; if ((cbegin = begin.as()) && (cend = end.as()) && @@ -1926,7 +1942,7 @@ Expr MakeStridedSlice(Expr data, Expr begin, Expr end, Expr strides, bool ignore attrs->end = end; attrs->strides = strides; } - attrs->ignore_end = ignore_end; + attrs->slice_mode = slice_mode; static const Op& op = Op::Get("strided_slice"); return Call(op, {data, begin, end, strides}, Attrs(attrs), {}); } @@ -1962,7 +1978,7 @@ Examples:: .add_argument("begin", "Tensor", "The indices to begin with in the slicing.") .add_argument("end", "Tensor", "Indices indicating end of the slice.") .add_argument("strides", "Tensor", "The stride values.") - .add_argument("ignore_end", "Tensor", "Whether to ignore negative elements of input end.") + .add_argument("slice_mode", "Tensor", "Whether to ignore negative elements of input end.") .set_support_level(4) .set_attrs_type() .add_type_rel("StridedSlice", StridedSliceRel) diff --git a/src/relay/transforms/pattern_util.h b/src/relay/transforms/pattern_util.h index 89f29fcc0cce..2e73c632230b 100644 --- a/src/relay/transforms/pattern_util.h +++ b/src/relay/transforms/pattern_util.h @@ -673,7 +673,7 @@ Expr MakeConcatenate(Expr data, int axis); Expr MakeRepeat(Expr data, int repeats, int axis); -Expr MakeStridedSlice(Expr data, Expr begin, Expr end, Expr strides, bool ignore_end); +Expr MakeStridedSlice(Expr data, Expr begin, Expr end, Expr strides, bool slice_mode); Expr MakeStack(Expr data, int axis); diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py index 4c131715d532..e09a93b0744d 100644 --- a/tests/python/relay/test_any.py +++ b/tests/python/relay/test_any.py @@ -644,14 +644,14 @@ def test_arange_with_dynamic_shape(): tvm.testing.assert_allclose(result.asnumpy(), np.array(range(10)).astype("int32")+1) def verify_any_strided_slice(data_shape, begin_shape, end_shape, strides_shape, - data_np_shape, ignore_end=False, const_attrs=False, dtype="int32"): + data_np_shape, slice_mode=False, const_attrs=False, dtype="int32"): # Generate random numpy input data np_data = np.random.uniform(size=data_np_shape).astype('float32') np_begin = np.random.randint(2, size=begin_shape, dtype=dtype) np_end = np.random.randint(5, 15, size=end_shape, dtype=dtype) np_strides = np.random.randint(1, 3, size=strides_shape, dtype=dtype) # target numpy result - ref_res = topi.testing.strided_slice_python(np_data, np_begin, np_end, np_strides, ignore_end) + ref_res = topi.testing.strided_slice_python(np_data, np_begin, np_end, np_strides, slice_mode) # Relay Module mod = tvm.IRModule() @@ -670,7 +670,7 @@ def verify_any_strided_slice(data_shape, begin_shape, end_shape, strides_shape, np_inputs = [np_data, np_begin, np_end, np_strides] y = relay.strided_slice(data, begin=begin, end=end, - strides=strides, ignore_end=ignore_end) + strides=strides, slice_mode=slice_mode) mod["main"] = relay.Function(args, y) for kind in ["debug", "vm"]: @@ -684,7 +684,7 @@ def test_any_strided_slice(): verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (15, 17, 21)) verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (23, 29, 41)) verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70)) - verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70), ignore_end=True) + verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70), slice_mode=True) def test_recursive_concat(): diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index 081236badd02..4f04a3a53307 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -296,7 +296,7 @@ def test_mean_var_std(): def test_strided_slice(): - def verify(dshape, begin, end, strides, output, ignore_end=False, + def verify(dshape, begin, end, strides, output, slice_mode=False, attr_const=True, test_ref=True, dtype="int32"): x = relay.var("x", relay.TensorType(dshape, "float32")) ndim = len(dshape) @@ -306,7 +306,7 @@ def verify(dshape, begin, end, strides, output, ignore_end=False, # target numpy result x_data = np.random.uniform(size=dshape).astype("float32") ref_res = topi.testing.strided_slice_python( - x_data, begin, end, strides, ignore_end) + x_data, begin, end, strides, slice_mode) if attr_const: begin = relay.const(begin, dtype=dtype) @@ -319,12 +319,12 @@ def verify(dshape, begin, end, strides, output, ignore_end=False, begin=begin, end=end, strides=strides, - ignore_end=ignore_end) + slice_mode=slice_mode) else: z = relay.strided_slice(x, begin=begin, end=end, - ignore_end=ignore_end) + slice_mode=slice_mode) func = relay.Function([x], z) func = run_infer_type(func) @@ -354,9 +354,9 @@ def verify(dshape, begin, end, strides, output, ignore_end=False, verify((3, 4, 3), [1, -1, 0], [4, -5, 3], [2, -1, 1], (1, 4, 3)) verify((3, 4, 3), [1, -1, 0], [2, -3, 3], [1, -1, 1], (1, 2, 3)) verify((3, 4, 3), [1, 0, 0], [3, -1, 3], [1, 1, 2], - (2, 4, 2), ignore_end=True, test_ref=False) + (2, 4, 2), slice_mode=True, test_ref=False) verify((3, 4, 3), [1, 0, 0], [-1, 2, 3], [1, 1, 2], - (2, 2, 2), ignore_end=True, test_ref=True) + (2, 2, 2), slice_mode=True, test_ref=True) def test_strided_set(): def verify(dshape, begin, end, strides, vshape, test_ref=True): diff --git a/tests/python/relay/test_pass_combine_parallel_conv2d.py b/tests/python/relay/test_pass_combine_parallel_conv2d.py index 28c9655808f5..112791d83fb7 100644 --- a/tests/python/relay/test_pass_combine_parallel_conv2d.py +++ b/tests/python/relay/test_pass_combine_parallel_conv2d.py @@ -53,18 +53,18 @@ def expected(x, w1, w2, w3, w4, channels1, channels2, channels3, channels4): begin=relay.const([0, 0], "int64"), end=relay.const([-1, channels1], "int64"), strides=relay.const([1, 1], 'int64'), - ignore_end=True) + slice_mode=True) y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), end=relay.const([-1, channels1 + channels2], "int64"), strides=relay.const([1, 1], 'int64'), - ignore_end=True) + slice_mode=True) y3 = relay.nn.conv2d(x, w3) y4 = relay.strided_slice(y, begin=relay.const([0, channels1 + channels2], "int64"), end=relay.const([-1, channels1 + channels2 + channels4], "int64"), strides=relay.const([1, 1], 'int64'), - ignore_end=True) + slice_mode=True) y5 = relay.nn.max_pool2d(x) y = relay.Tuple((y1, y2, y3, y4, y5)) return relay.Function(args, y) @@ -113,12 +113,12 @@ def expected(x, w1, w2, scale1, scale2, bias, channels1, channels2): begin=relay.const([0, 0], "int64"), end=relay.const([-1, channels1], "int64"), strides=relay.const([1, 1], "int64"), - ignore_end=True) + slice_mode=True) y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), end=relay.const([-1, channels1 + channels2], "int64"), strides=relay.const([1, 1], "int64"), - ignore_end=True) + slice_mode=True) y2 = relay.add(y2, bias) y = relay.Tuple((y1, y2)) return relay.Function(args, y) @@ -160,12 +160,12 @@ def expected(x, w1, w2, scale1, scale2, channels1, channels2): begin=relay.const([0, 0], "int64"), end=relay.const([-1, channels1], "int64"), strides=relay.const([1, 1], "int64"), - ignore_end=True) + slice_mode=True) y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), end=relay.const([-1, channels1 + channels2], "int64"), strides=relay.const([1, 1], "int64"), - ignore_end=True) + slice_mode=True) y1 = relay.multiply(y1, scale1) y2 = relay.multiply(y2, scale2) y = relay.Tuple((y1, y2)) @@ -208,12 +208,12 @@ def expected(x, w, channels, repeat): begin=relay.const([0, 0], "int64"), end=relay.const([-1, channels], "int64"), strides=relay.const([1, 1], "int64"), - ignore_end=True) + slice_mode=True) y2 = relay.strided_slice(y, begin=relay.const([0, channels], "int64"), end=relay.const([-1, channels * 2], "int64"), strides=relay.const([1, 1], "int64"), - ignore_end=True) + slice_mode=True) y = relay.concatenate((y1, y2), axis=1) return relay.Function(args, y) diff --git a/topi/include/topi/transform.h b/topi/include/topi/transform.h index 8201a50335ef..873eae5bc76f 100644 --- a/topi/include/topi/transform.h +++ b/topi/include/topi/transform.h @@ -520,7 +520,7 @@ inline Array split(const Tensor& x, Array split_indices, int ax * \param begin The indices to begin with in the slicing * \param end Indicies indicating end of the slice * \param strides Specifies the stride values, it can be negative - * \param ignore_end Specifies whether to ignore negative elements of input end + * \param slice_mode Specifies whether to ignore negative elements of input end * in that case, the input tensor will be reversed in that particular axis * \param name The name of the operation * \param tag The tag to mark the operation @@ -528,7 +528,7 @@ inline Array split(const Tensor& x, Array split_indices, int ax * \return A Tensor whose op member is the split operation */ inline Tensor strided_slice(const Tensor& x, const Array& begin, const Array& end, - const Array& strides, const bool& ignore_end, + const Array& strides, const bool& slice_mode, std::string name = "T_strided_slice", std::string tag = kInjective) { size_t src_tensor_dim = static_cast(x->shape.size()); // Setup the ranges. @@ -560,8 +560,17 @@ inline Tensor strided_slice(const Tensor& x, const Array& begin, const std::vector end_vec; for (size_t i = 0; i < end.size(); ++i) { // allow end to be None - if (!end[i].defined() || (ignore_end && end[i]->value < 0)) { + + if (!end[i].defined()) { end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); + } else if (slice_mode) { + if (end[i]->value < 0) { + end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); + } else if (stride_vec[i] > 0) { + end_vec.push_back(begin_vec[i] + end[i]->value); + } else { + end_vec.push_back(begin_vec[i] - end[i]->value); + } } else { end_vec.push_back(end[i]->value); } diff --git a/topi/python/topi/testing/strided_slice_python.py b/topi/python/topi/testing/strided_slice_python.py index c60f05fef66d..a72402ca4dc2 100644 --- a/topi/python/topi/testing/strided_slice_python.py +++ b/topi/python/topi/testing/strided_slice_python.py @@ -17,7 +17,7 @@ """strided_slice/set in python""" -def strided_slice_python(data, begin, end, strides, ignore_end=False): +def strided_slice_python(data, begin, end, strides, slice_mode=False): """Python version of strided slice operator. Parameters @@ -34,7 +34,7 @@ def strided_slice_python(data, begin, end, strides, ignore_end=False): strides : list The stride of each slice. - ignore_end : boolean + slice_mode : boolean Whether to ignore negative elements of input end Returns @@ -45,12 +45,21 @@ def strided_slice_python(data, begin, end, strides, ignore_end=False): strides = [] if strides is None else strides slices = [] for i in range(len(data.shape)): + new_stride = strides[i] if i < len(strides) else None + new_begin = begin[i] if i < len(begin) else None - if i >= len(end) or (ignore_end and end[i] < 0): + if i >= len(end): new_end = None + elif slice_mode: + if end[i] < 0: + new_end = None + elif new_stride and new_stride < 0: + new_end = new_begin - end[i] + else: + new_end = new_begin + end[i] else: new_end = end[i] - new_stride = strides[i] if i < len(strides) else None + slices.append(slice(new_begin, new_end, new_stride)) diff --git a/topi/python/topi/transform.py b/topi/python/topi/transform.py index 3e479db1c02b..209c4b2d837c 100644 --- a/topi/python/topi/transform.py +++ b/topi/python/topi/transform.py @@ -131,7 +131,7 @@ def flip(a, axis=0): """ return cpp.flip(a, axis) -def strided_slice(a, begin, end, strides=None, ignore_end=False): +def strided_slice(a, begin, end, strides=None, slice_mode=False): """Slice of an array. Parameters @@ -150,7 +150,7 @@ def strided_slice(a, begin, end, strides=None, ignore_end=False): in that case, the input tensor will be reversed in that particular axis. - ignore_end: boolean, optional + slice_mode: boolean, optional Specifies whether to ignore negative elements of input end. Returns @@ -159,7 +159,7 @@ def strided_slice(a, begin, end, strides=None, ignore_end=False): """ if strides is None: strides = [] - return cpp.strided_slice(a, begin, end, strides, ignore_end) + return cpp.strided_slice(a, begin, end, strides, slice_mode) @tvm.te.tag_scope(tag=tag.INJECTIVE+",strided_set") def strided_set(a, v, begin, end, strides=None): From 69a440e54b39c11e98a1d3f9cdda201b875469cc Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Sun, 31 May 2020 04:53:04 +0800 Subject: [PATCH 18/22] clang-format, fix comments --- include/tvm/relay/attrs/transform.h | 6 +++- python/tvm/relay/frontend/pytorch.py | 16 ++++++++-- python/tvm/relay/frontend/tensorflow.py | 22 ++++++------- python/tvm/relay/op/_transform.py | 6 ++-- python/tvm/relay/op/transform.py | 30 +++++++++-------- src/relay/op/tensor/transform.cc | 32 +++++++++---------- .../transforms/combine_parallel_conv2d.cc | 1 + tests/python/relay/test_any.py | 6 ++-- tests/python/relay/test_op_level4.py | 9 +++--- .../test_pass_combine_parallel_conv2d.py | 10 +++--- topi/include/topi/transform.h | 18 ++++------- .../topi/testing/strided_slice_python.py | 11 ++++--- topi/python/topi/transform.py | 5 ++- 13 files changed, 92 insertions(+), 80 deletions(-) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index 42e894bc0c8b..c1700ed14b79 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -221,7 +221,11 @@ struct StridedSliceAttrs : public tvm::AttrsNode { TVM_ATTR_FIELD(strides).describe("Stride values of the slice"); TVM_ATTR_FIELD(slice_mode) .set_default(false) - .describe("Whether to ignore the negative elements in input end."); + .describe( + "Specifies whether to enable slice mode. In slice mode," + "strides will be ignored, end indicates the size of a slice" + "starting at the location specified by begin. If end[i] is -1," + "all remaining elements in that dimension are included in the slice"); } }; diff --git a/python/tvm/relay/frontend/pytorch.py b/python/tvm/relay/frontend/pytorch.py index cc7cd4830cd4..c7c80de15a54 100644 --- a/python/tvm/relay/frontend/pytorch.py +++ b/python/tvm/relay/frontend/pytorch.py @@ -243,7 +243,11 @@ def _impl(inputs, input_types): end[dim] = inputs[3] strides.append(int(inputs[4])) - return _op.transform.strided_slice(data, begin, end, strides) + return _op.transform.strided_slice(data, + begin=_expr.const(begin), + end=_expr.const(end), + strides=_expr.const(strides), + slice_mode=True) return _impl def _split(): @@ -1233,7 +1237,10 @@ def _impl(inputs, input_types): end[axis] = i + unif_size stride = [1] * len(shape) - chunk_out = _op.transform.strided_slice(data, begin, end, stride) + chunk_out = _op.transform.strided_slice(data, + begin=_expr.const(begin), + end=_expr.const(end), + strides=_expr.const(stride)) chunks.append(chunk_out) if dim % num_chunks: @@ -1243,7 +1250,10 @@ def _impl(inputs, input_types): end[axis] = dim stride = [1] * len(shape) - chunk_out = _op.transform.strided_slice(data, begin, end, stride) + chunk_out = _op.transform.strided_slice(data, + begin=_expr.const(begin), + end=_expr.const(end), + strides=_expr.const(stride)) chunks.append(chunk_out) return chunks diff --git a/python/tvm/relay/frontend/tensorflow.py b/python/tvm/relay/frontend/tensorflow.py index d4c658ab44e4..002fb857e258 100644 --- a/python/tvm/relay/frontend/tensorflow.py +++ b/python/tvm/relay/frontend/tensorflow.py @@ -652,11 +652,12 @@ def _impl(inputs, attr, params, mod): invalid_to_bottom=False) # squeeze it, TF NMS is not batched - end = get_relay_op("squeeze")(nms_ret[1], axis=[1]) + size = get_relay_op("squeeze")(nms_ret[1], axis=[1]) data_slice = get_relay_op("squeeze")(nms_ret[0], axis=[0]) # slice to get the dynamic result - ret = get_relay_op("strided_slice")(data_slice, _expr.const([0]), end, _expr.const([1])) + ret = get_relay_op("strided_slice")(data_slice, begin=_expr.const([0]), + end=size, slice_mode=True) return ret return _impl @@ -1165,7 +1166,11 @@ def _impl(inputs, attr, params, mod): try: begin = _get_list_param(params, inputs[1]) except (IndexError, KeyError, AttributeError): - begin = _infer_value(inputs[1], params).asnumpy().tolist()[0] + # Handle symbolic begin + try: + begin = _infer_value(inputs[1], params).asnumpy().tolist()[0] + except Exception: + begin = inputs[1] try: size = _get_list_param(params, inputs[2]) except (IndexError, KeyError, AttributeError): @@ -1174,16 +1179,7 @@ def _impl(inputs, attr, params, mod): size = _infer_value(inputs[2], params).asnumpy().tolist()[0] except Exception: size = inputs[2] - data_shape = _infer_shape(inputs[0], mod) - data_dim = len(data_shape) - end = size - if not isinstance(end, (_expr.Call, _expr.Var)): - for i in range(data_dim): - if size[i] == -1: - end[i] = data_shape[i] - else: - end[i] += begin[i] - return _op.strided_slice(inputs[0], begin=begin, end=end) + return _op.strided_slice(inputs[0], begin=begin, end=size, slice_mode=True) return _impl diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index 3a22c16be3fa..1ddd335e4d9c 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -120,10 +120,9 @@ def _strided_slice_shape_func_input_data(data, begin, end, strides, if end.shape[0] <= i: cend = data.shape[i] elif slice_mode != 0: + cstride = 1 if end[i] < 0: cend = data.shape[i] - elif cstride < 0: - cend = cbegin - end[i] else: cend = cbegin + end[i] else: @@ -148,10 +147,9 @@ def _strided_slice_shape_func_input_shape(data_shape, begin, end, strides, slice if len(end) <= i: cend = int64(data_shape[i]) elif slice_mode != 0: + cstride = int64(1) if end[i] < 0: cend = int64(data_shape[i]) - elif cstride < 0: - cend = cbegin - int64(end[i]) else: cend = cbegin + int64(end[i]) else: diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index 6e52f7896d2b..38a17c41bd32 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -619,19 +619,21 @@ def strided_slice(data, begin, end, strides=None, slice_mode=False): data : relay.Expr The source array to be sliced. - begin: relay.Expr or List[int] + begin: relay.Expr, Tuple[int], or List[int] The indices to begin with in the slicing. - end: relay.Expr or List[int] + end: relay.Expr, Tuple[int], or List[int] Indices indicating end of the slice. - strides: relay.Expr or List[int], optional + strides: relay.Expr, Tuple[int], or List[int], optional Specifies the stride values, it can be negative in that case, the input tensor will be reversed in that particular axis. slice_mode: boolean, optional - Whether to ignore the negative elements in input end, - will slice to the end of data for the ignored element. + Specifies whether to enable slice mode. In slice mode, + strides will be ignored, end indicates the size of a slice + starting at the location specified by begin. If end[i] is -1, + all remaining elements in that dimension are included in the slice Returns ------- @@ -639,11 +641,11 @@ def strided_slice(data, begin, end, strides=None, slice_mode=False): The computed result. """ strides = strides or const([1], dtype="int32") - if isinstance(begin, list): + if isinstance(begin, (tuple, list)): begin = const(list(begin)) - if isinstance(end, list): + if isinstance(end, (tuple, list)): end = const(list(end)) - if isinstance(strides, list): + if isinstance(strides, (tuple, list)): strides = const(list(strides)) return _make.strided_slice(data, begin, end, strides, slice_mode) @@ -659,13 +661,13 @@ def strided_set(data, v, begin, end, strides=None): v : relay.Expr The data to be set. - begin: relay.Expr or List[int] + begin: relay.Expr, Tuple[int], or List[int] The indices to begin with in the slicing. - end: relay.Expr or List[int] + end: relay.Expr, Tuple[int], or List[int] Indices indicating end of the slice. - strides: relay.Expr or List[int], optional + strides: relay.Expr, Tuple[int], or List[int], optional Specifies the stride values, it can be negative in that case, the input tensor will be reversed in that particular axis. @@ -675,11 +677,11 @@ def strided_set(data, v, begin, end, strides=None): The computed result. """ strides = strides or const([1], dtype="int32") - if isinstance(begin, list): + if isinstance(begin, (tuple, list)): begin = const(list(begin)) - if isinstance(end, list): + if isinstance(end, (tuple, list)): end = const(list(end)) - if isinstance(strides, list): + if isinstance(strides, (tuple, list)): strides = const(list(strides)) return _make.strided_set(data, v, begin, end, strides) diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index a405aca73983..a2d4704dfa3b 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1687,13 +1687,13 @@ bool StridedSliceRel(const Array& types, int num_inputs, const Attrs& attr // calculate output shape std::vector oshape(num_axis); if (param->begin && param->end && param->strides) { - std::vector stride_vec; - for (Integer i : param->strides.value()) { - CHECK(i.defined()); - stride_vec.push_back(i->value); - } - for (int64_t i = stride_vec.size(); i < num_axis; ++i) { - stride_vec.push_back(1); + // stride will be set as 1 if slice mode is enabled + std::vector stride_vec(num_axis, 1); + if (!param->slice_mode) { + for (size_t i = 0; i < param->strides.value().size(); ++i) { + CHECK(param->strides.value()[i].defined()); + stride_vec[i] = param->strides.value()[i]->value; + } } const int64_t max_range = std::numeric_limits::max(); std::vector begin_vec; @@ -1714,13 +1714,11 @@ bool StridedSliceRel(const Array& types, int num_inputs, const Attrs& attr if (!param->end.value()[i].defined()) { end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); } else if (param->slice_mode) { - if (param->end.value()[i]->value < 0) { - end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); - } else if (stride_vec[i] < 0) { - end_vec.push_back(begin_vec[i] - param->end.value()[i]->value); - } else { - end_vec.push_back(begin_vec[i] + param->end.value()[i]->value); - } + if (param->end.value()[i]->value < 0) { + end_vec.push_back(max_range); + } else { + end_vec.push_back(begin_vec[i] + param->end.value()[i]->value); + } } else { end_vec.push_back(param->end.value()[i]->value); } @@ -1762,7 +1760,7 @@ bool StridedSliceRel(const Array& types, int num_inputs, const Attrs& attr } else { if (begin_v < 0) begin_v = 0; CHECK_GE(stride_v, 0); - CHECK_LT(begin_v, end_v) << "strided_slice get empty slice at axis " << i; + CHECK_LE(begin_v, end_v) << "strided_slice get invalid slice at axis " << i; end_v = std::min(dim_size, end_v); slice_range = end_v - begin_v; step = stride_v; @@ -1807,7 +1805,7 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, if (params->begin && params->end && params->strides) { for (Integer i : params->strides.value()) { CHECK(i.defined()); - strides.push_back(i->value); + strides.push_back(params->slice_mode ? 1 : i->value); } for (Integer i : params->begin.value()) { @@ -1978,7 +1976,7 @@ Examples:: .add_argument("begin", "Tensor", "The indices to begin with in the slicing.") .add_argument("end", "Tensor", "Indices indicating end of the slice.") .add_argument("strides", "Tensor", "The stride values.") - .add_argument("slice_mode", "Tensor", "Whether to ignore negative elements of input end.") + .add_argument("slice_mode", "Tensor", "Whether to enable slice mode.") .set_support_level(4) .set_attrs_type() .add_type_rel("StridedSlice", StridedSliceRel) diff --git a/src/relay/transforms/combine_parallel_conv2d.cc b/src/relay/transforms/combine_parallel_conv2d.cc index 04ed35b709e5..d680de49b005 100644 --- a/src/relay/transforms/combine_parallel_conv2d.cc +++ b/src/relay/transforms/combine_parallel_conv2d.cc @@ -192,6 +192,7 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { for (size_t i = 0; i < begin.size(); ++i) { begin_data[i] = begin[i]; end_data[i] = end[i]; + end_data[i] -= begin_data[i]; strides_data[i] = 1; } diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py index e09a93b0744d..27a2c6250dbe 100644 --- a/tests/python/relay/test_any.py +++ b/tests/python/relay/test_any.py @@ -648,8 +648,8 @@ def verify_any_strided_slice(data_shape, begin_shape, end_shape, strides_shape, # Generate random numpy input data np_data = np.random.uniform(size=data_np_shape).astype('float32') np_begin = np.random.randint(2, size=begin_shape, dtype=dtype) - np_end = np.random.randint(5, 15, size=end_shape, dtype=dtype) - np_strides = np.random.randint(1, 3, size=strides_shape, dtype=dtype) + np_end = np.random.randint(5, 10, size=end_shape, dtype=dtype) + np_strides = np.random.randint(1, 2 if slice_mode else 3, size=strides_shape, dtype=dtype) # target numpy result ref_res = topi.testing.strided_slice_python(np_data, np_begin, np_end, np_strides, slice_mode) @@ -684,7 +684,7 @@ def test_any_strided_slice(): verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (15, 17, 21)) verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (23, 29, 41)) verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70)) - verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70), slice_mode=True) + verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (15, 17, 21), slice_mode=True) def test_recursive_concat(): diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index 4f04a3a53307..dad1adebbf4e 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -342,6 +342,7 @@ def verify(dshape, begin, end, strides, output, slice_mode=False, op_res = intrp.evaluate(func)(x_data) tvm.testing.assert_allclose(op_res.asnumpy(), ref_res) + verify((1, 3, 10, 10), [0, 0, 0, 0], [-1, 3, 10, 10], [1], (0, 3, 10, 10), dtype="int64") verify((1, 224, 224, 3), [0, 20, 20, 0], [1, 140, 140, 3], [1, 1, 1, 1], (1, 120, 120, 3), dtype="int64") verify((3, 4, 3), [1, 1, 0], [4, 4, 3], [2, 1, 1], (1, 3, 3), dtype="int16") @@ -353,10 +354,10 @@ def verify(dshape, begin, end, strides, output, slice_mode=False, verify((3, 4, 3), [1, 1], [4, 4, 3], None, (2, 3, 3)) verify((3, 4, 3), [1, -1, 0], [4, -5, 3], [2, -1, 1], (1, 4, 3)) verify((3, 4, 3), [1, -1, 0], [2, -3, 3], [1, -1, 1], (1, 2, 3)) - verify((3, 4, 3), [1, 0, 0], [3, -1, 3], [1, 1, 2], - (2, 4, 2), slice_mode=True, test_ref=False) - verify((3, 4, 3), [1, 0, 0], [-1, 2, 3], [1, 1, 2], - (2, 2, 2), slice_mode=True, test_ref=True) + verify((3, 4, 3), [1, 0, 0], [3, -1, 3], [1, 1, 1], + (2, 4, 3), slice_mode=True, test_ref=False) + verify((3, 4, 3), [1, 0, 0], [-1, 2, 3], [1, 1, 1], + (2, 2, 3), slice_mode=True, test_ref=True) def test_strided_set(): def verify(dshape, begin, end, strides, vshape, test_ref=True): diff --git a/tests/python/relay/test_pass_combine_parallel_conv2d.py b/tests/python/relay/test_pass_combine_parallel_conv2d.py index 112791d83fb7..429fd620e09e 100644 --- a/tests/python/relay/test_pass_combine_parallel_conv2d.py +++ b/tests/python/relay/test_pass_combine_parallel_conv2d.py @@ -56,13 +56,13 @@ def expected(x, w1, w2, w3, w4, channels1, channels2, channels3, channels4): slice_mode=True) y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), - end=relay.const([-1, channels1 + channels2], "int64"), + end=relay.const([-1, channels2], "int64"), strides=relay.const([1, 1], 'int64'), slice_mode=True) y3 = relay.nn.conv2d(x, w3) y4 = relay.strided_slice(y, begin=relay.const([0, channels1 + channels2], "int64"), - end=relay.const([-1, channels1 + channels2 + channels4], "int64"), + end=relay.const([-1, channels4], "int64"), strides=relay.const([1, 1], 'int64'), slice_mode=True) y5 = relay.nn.max_pool2d(x) @@ -116,7 +116,7 @@ def expected(x, w1, w2, scale1, scale2, bias, channels1, channels2): slice_mode=True) y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), - end=relay.const([-1, channels1 + channels2], "int64"), + end=relay.const([-1, channels2], "int64"), strides=relay.const([1, 1], "int64"), slice_mode=True) y2 = relay.add(y2, bias) @@ -163,7 +163,7 @@ def expected(x, w1, w2, scale1, scale2, channels1, channels2): slice_mode=True) y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), - end=relay.const([-1, channels1 + channels2], "int64"), + end=relay.const([-1, channels2], "int64"), strides=relay.const([1, 1], "int64"), slice_mode=True) y1 = relay.multiply(y1, scale1) @@ -211,7 +211,7 @@ def expected(x, w, channels, repeat): slice_mode=True) y2 = relay.strided_slice(y, begin=relay.const([0, channels], "int64"), - end=relay.const([-1, channels * 2], "int64"), + end=relay.const([-1, channels], "int64"), strides=relay.const([1, 1], "int64"), slice_mode=True) y = relay.concatenate((y1, y2), axis=1) diff --git a/topi/include/topi/transform.h b/topi/include/topi/transform.h index 873eae5bc76f..cacde55eb703 100644 --- a/topi/include/topi/transform.h +++ b/topi/include/topi/transform.h @@ -520,7 +520,7 @@ inline Array split(const Tensor& x, Array split_indices, int ax * \param begin The indices to begin with in the slicing * \param end Indicies indicating end of the slice * \param strides Specifies the stride values, it can be negative - * \param slice_mode Specifies whether to ignore negative elements of input end + * \param slice_mode Specifies whether to enable slice mode * in that case, the input tensor will be reversed in that particular axis * \param name The name of the operation * \param tag The tag to mark the operation @@ -534,14 +534,12 @@ inline Tensor strided_slice(const Tensor& x, const Array& begin, const // Setup the ranges. // NOTE: this code duplicates the shape inference logic relay.op // Consider to refactor in the future. - std::vector stride_vec; - for (Integer i : strides) { - CHECK(i.defined()); - stride_vec.push_back(i->value); - } - for (size_t i = stride_vec.size(); i < src_tensor_dim; ++i) { - stride_vec.push_back(1); + std::vector stride_vec(src_tensor_dim, 1); + for (size_t i = 0; i < strides.size(); ++i) { + CHECK(strides[i].defined()); + stride_vec[i] = strides[i]->value; } + const int64_t max_range = std::numeric_limits::max(); std::vector begin_vec; @@ -566,10 +564,8 @@ inline Tensor strided_slice(const Tensor& x, const Array& begin, const } else if (slice_mode) { if (end[i]->value < 0) { end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); - } else if (stride_vec[i] > 0) { - end_vec.push_back(begin_vec[i] + end[i]->value); } else { - end_vec.push_back(begin_vec[i] - end[i]->value); + end_vec.push_back(begin_vec[i] + end[i]->value); } } else { end_vec.push_back(end[i]->value); diff --git a/topi/python/topi/testing/strided_slice_python.py b/topi/python/topi/testing/strided_slice_python.py index a72402ca4dc2..8d68fc1c1492 100644 --- a/topi/python/topi/testing/strided_slice_python.py +++ b/topi/python/topi/testing/strided_slice_python.py @@ -35,7 +35,10 @@ def strided_slice_python(data, begin, end, strides, slice_mode=False): The stride of each slice. slice_mode : boolean - Whether to ignore negative elements of input end + Specifies whether to enable slice mode. + In slice mode, strides will be ignored, + end indicates the size of a slice starting + at the location specified by begin. Returns ------- @@ -45,7 +48,9 @@ def strided_slice_python(data, begin, end, strides, slice_mode=False): strides = [] if strides is None else strides slices = [] for i in range(len(data.shape)): - new_stride = strides[i] if i < len(strides) else None + new_stride = None + if not slice_mode and i < len(strides): + new_stride = strides[i] new_begin = begin[i] if i < len(begin) else None if i >= len(end): @@ -53,8 +58,6 @@ def strided_slice_python(data, begin, end, strides, slice_mode=False): elif slice_mode: if end[i] < 0: new_end = None - elif new_stride and new_stride < 0: - new_end = new_begin - end[i] else: new_end = new_begin + end[i] else: diff --git a/topi/python/topi/transform.py b/topi/python/topi/transform.py index 209c4b2d837c..a5a564b106f1 100644 --- a/topi/python/topi/transform.py +++ b/topi/python/topi/transform.py @@ -151,7 +151,10 @@ def strided_slice(a, begin, end, strides=None, slice_mode=False): in that particular axis. slice_mode: boolean, optional - Specifies whether to ignore negative elements of input end. + Specifies whether to enable slice mode. In slice mode, + strides will be ignored, end indicates the size of a slice + starting at the location specified by begin. If end[i] is -1, + all remaining elements in that dimension are included in the slice Returns ------- From 4b7a92476f5d8d2bbbf717e07350d42a0aea3bb8 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Thu, 4 Jun 2020 02:16:28 +0800 Subject: [PATCH 19/22] fix comment --- python/tvm/relay/frontend/tensorflow.py | 21 +++++++++++++++------ python/tvm/relay/op/_transform.py | 1 - src/relay/op/tensor/transform.cc | 12 +++--------- tests/python/relay/test_any.py | 22 ++++++++++++---------- 4 files changed, 30 insertions(+), 26 deletions(-) diff --git a/python/tvm/relay/frontend/tensorflow.py b/python/tvm/relay/frontend/tensorflow.py index 002fb857e258..784e86b2f290 100644 --- a/python/tvm/relay/frontend/tensorflow.py +++ b/python/tvm/relay/frontend/tensorflow.py @@ -617,7 +617,16 @@ def _impl(inputs, attr, params, mod): def _nms(): def _impl(inputs, attr, params, mod): # Get parameter values - max_output_size = int(np.atleast_1d(inputs[2].data.asnumpy().astype("int64"))[0]) + # TODO(yongwww) change nms in relay to support symbolic max_output_size + try: + max_output_size = int(np.atleast_1d(inputs[2].data.asnumpy() + .astype("int64"))[0]) + except Exception: + try: + max_output_size = _infer_value(inputs[2], params, + mod).asnumpy().astype("int64").tolist()[0] + except Exception: + max_output_size = -1 iou_threshold = np.atleast_1d(inputs[3].data.asnumpy())[0] # score_threshold was introduced from V3 score_threshold = np.atleast_1d(inputs[4].data.asnumpy())[0] if len(inputs) > 4 else 0.0 @@ -1168,7 +1177,7 @@ def _impl(inputs, attr, params, mod): except (IndexError, KeyError, AttributeError): # Handle symbolic begin try: - begin = _infer_value(inputs[1], params).asnumpy().tolist()[0] + begin = _infer_value(inputs[1], params).asnumpy().tolist() except Exception: begin = inputs[1] try: @@ -1176,7 +1185,7 @@ def _impl(inputs, attr, params, mod): except (IndexError, KeyError, AttributeError): # Handle symbolic size try: - size = _infer_value(inputs[2], params).asnumpy().tolist()[0] + size = _infer_value(inputs[2], params).asnumpy().tolist() except Exception: size = inputs[2] return _op.strided_slice(inputs[0], begin=begin, end=size, slice_mode=True) @@ -1509,9 +1518,9 @@ def _transform_mask(stride_dim, ellipsis_mask): if begin_mask or end_mask or ellipsis_mask or new_axis_mask or shrink_axis_mask: begin, end, stride, fshape_indices = _transform_mask(stride_dim, ellipsis_mask) out = _op.strided_slice(inputs[0], - begin=_expr.const(begin), - end=_expr.const(end), - strides=_expr.const(stride)) + begin=begin, + end=end, + strides=stride) out_shape = _infer_shape(out, mod=mod) if not fshape_indices: fshape_indices = range(len(out_shape)) diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index 1ddd335e4d9c..32be4929a971 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -134,7 +134,6 @@ def _strided_slice_shape_func_input_data(data, begin, end, strides, @script def _strided_slice_shape_func_input_shape(data_shape, begin, end, strides, slice_mode): ndim = data_shape.shape[0] - assert ndim == 2, "not correct" out = output_tensor((ndim,), "int64") for i in const_range(ndim): cbegin = int64(0) diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index a2d4704dfa3b..e78f89b1d91c 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1927,15 +1927,9 @@ Expr MakeStridedSlice(Expr data, Expr begin, Expr end, Expr strides, bool slice_ CHECK_EQ(cend->data->ndim, 1); CHECK_EQ(cstrides->data->ndim, 1); Array begin, end, strides; - for (int i = 0; i < cbegin->data->shape[0]; i++) { - begin.push_back(Integer(static_cast(ToScalar(cbegin->data, i)))); - } - for (int i = 0; i < cend->data->shape[0]; i++) { - end.push_back(Integer(static_cast(ToScalar(cend->data, i)))); - } - for (int i = 0; i < cstrides->data->shape[0]; i++) { - strides.push_back(Integer(static_cast(ToScalar(cstrides->data, i)))); - } + begin = ToVector(cbegin->data); + end = ToVector(cend->data); + strides = ToVector(cstrides->data); attrs->begin = begin; attrs->end = end; attrs->strides = strides; diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py index 27a2c6250dbe..8fd6aed70a75 100644 --- a/tests/python/relay/test_any.py +++ b/tests/python/relay/test_any.py @@ -644,12 +644,12 @@ def test_arange_with_dynamic_shape(): tvm.testing.assert_allclose(result.asnumpy(), np.array(range(10)).astype("int32")+1) def verify_any_strided_slice(data_shape, begin_shape, end_shape, strides_shape, - data_np_shape, slice_mode=False, const_attrs=False, dtype="int32"): + data_np_shape, slice_mode=False, const_attrs=False): # Generate random numpy input data np_data = np.random.uniform(size=data_np_shape).astype('float32') - np_begin = np.random.randint(2, size=begin_shape, dtype=dtype) - np_end = np.random.randint(5, 10, size=end_shape, dtype=dtype) - np_strides = np.random.randint(1, 2 if slice_mode else 3, size=strides_shape, dtype=dtype) + np_begin = np.random.randint(2, size=begin_shape, dtype="int32") + np_end = np.random.randint(5, 10, size=end_shape, dtype="int32") + np_strides = np.random.randint(1, 2 if slice_mode else 3, size=strides_shape, dtype="int32") # target numpy result ref_res = topi.testing.strided_slice_python(np_data, np_begin, np_end, np_strides, slice_mode) @@ -657,15 +657,16 @@ def verify_any_strided_slice(data_shape, begin_shape, end_shape, strides_shape, mod = tvm.IRModule() data = relay.var('data', shape=data_shape, dtype='float32') if const_attrs: - begin = relay.const(np_begin, dtype) - end = relay.const(np_end, dtype) - strides = relay.const(np_strides, dtype) + data = relay.var('data', shape=data_np_shape, dtype='float32') + begin = relay.const(np_begin) + end = relay.const(np_end) + strides = relay.const(np_strides) args = [data] np_inputs = [np_data] else: - begin = relay.var('begin', shape=begin_shape, dtype=dtype) - end = relay.var('end', shape=end_shape, dtype=dtype) - strides = relay.var('strides', shape=strides_shape, dtype=dtype) + begin = relay.var('begin', shape=begin_shape, dtype="int32") + end = relay.var('end', shape=end_shape, dtype="int32") + strides = relay.var('strides', shape=strides_shape, dtype="int32") args = [data, begin, end, strides] np_inputs = [np_data, np_begin, np_end, np_strides] @@ -685,6 +686,7 @@ def test_any_strided_slice(): verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (23, 29, 41)) verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70)) verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (15, 17, 21), slice_mode=True) + verify_any_strided_slice(any_dims(2), (2,), (2,), (2,), (15, 21), const_attrs=True) def test_recursive_concat(): From f37574c68cf84b62ab7a2a7b72c513adc44aea44 Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Sun, 7 Jun 2020 00:03:39 +0800 Subject: [PATCH 20/22] change slice_mode to string --- include/tvm/relay/attrs/transform.h | 14 +++++---- python/tvm/relay/frontend/pytorch.py | 2 +- python/tvm/relay/frontend/tensorflow.py | 4 +-- python/tvm/relay/op/_transform.py | 2 +- python/tvm/relay/op/transform.py | 13 ++++---- src/relay/op/tensor/transform.cc | 18 ++++++----- .../transforms/combine_parallel_conv2d.cc | 30 ++++++------------- src/relay/transforms/pattern_util.h | 2 +- tests/python/relay/test_any.py | 6 ++-- tests/python/relay/test_op_level4.py | 6 ++-- .../test_pass_combine_parallel_conv2d.py | 18 +++++------ topi/include/topi/transform.h | 6 ++-- .../topi/testing/strided_slice_python.py | 18 ++++++----- topi/python/topi/transform.py | 19 ++++++------ 14 files changed, 77 insertions(+), 81 deletions(-) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index c1700ed14b79..052bc608affc 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -213,18 +213,20 @@ struct StridedSliceAttrs : public tvm::AttrsNode { Optional> begin; Optional> end; Optional> strides; - bool slice_mode; + std::string slice_mode; TVM_DECLARE_ATTRS(StridedSliceAttrs, "relay.attrs.StridedSliceAttrs") { TVM_ATTR_FIELD(begin).describe("Indices for begin of slice, begin index is also inclusive"); TVM_ATTR_FIELD(end).describe("Indices for end of slice, end index is exclusive"); - TVM_ATTR_FIELD(strides).describe("Stride values of the slice"); + TVM_ATTR_FIELD(strides).describe( + "Stride values of the slice, a stride can be negative, which causes a reverse slice."); TVM_ATTR_FIELD(slice_mode) - .set_default(false) + .set_default("end") .describe( - "Specifies whether to enable slice mode. In slice mode," - "strides will be ignored, end indicates the size of a slice" - "starting at the location specified by begin. If end[i] is -1," + "The slice mode [end, size]." + "end - The default slice mode, ending indices for the slice." + "size - The input strides will be ignored, input end in this mode indicates the size" + "of a slice starting at the location specified by begin. If end[i] is -1," "all remaining elements in that dimension are included in the slice"); } }; diff --git a/python/tvm/relay/frontend/pytorch.py b/python/tvm/relay/frontend/pytorch.py index c7c80de15a54..d1cf8748315b 100644 --- a/python/tvm/relay/frontend/pytorch.py +++ b/python/tvm/relay/frontend/pytorch.py @@ -247,7 +247,7 @@ def _impl(inputs, input_types): begin=_expr.const(begin), end=_expr.const(end), strides=_expr.const(strides), - slice_mode=True) + slice_mode="size") return _impl def _split(): diff --git a/python/tvm/relay/frontend/tensorflow.py b/python/tvm/relay/frontend/tensorflow.py index 784e86b2f290..a30f6f0fda2d 100644 --- a/python/tvm/relay/frontend/tensorflow.py +++ b/python/tvm/relay/frontend/tensorflow.py @@ -666,7 +666,7 @@ def _impl(inputs, attr, params, mod): # slice to get the dynamic result ret = get_relay_op("strided_slice")(data_slice, begin=_expr.const([0]), - end=size, slice_mode=True) + end=size, slice_mode="size") return ret return _impl @@ -1188,7 +1188,7 @@ def _impl(inputs, attr, params, mod): size = _infer_value(inputs[2], params).asnumpy().tolist() except Exception: size = inputs[2] - return _op.strided_slice(inputs[0], begin=begin, end=size, slice_mode=True) + return _op.strided_slice(inputs[0], begin=begin, end=size, slice_mode="size") return _impl diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index 32be4929a971..a409fd44fc24 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -163,7 +163,7 @@ def strided_slice_shape_func(attrs, inputs, _): """ Shape func for strided_slice """ - slice_mode = convert(get_const_int(attrs.slice_mode)) + slice_mode = convert(0 if attrs.slice_mode == "end" else 1) # data independent if begin, end and strides exist if attrs.begin and attrs.end and attrs.strides: return [_strided_slice_shape_func_input_shape(inputs[0], attrs.begin, attrs.end, diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index 38a17c41bd32..fab6e2c37454 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -611,7 +611,7 @@ def split(data, indices_or_sections, axis=0): return TupleWrapper(_make.split(data, indices_or_sections, axis), ret_size) -def strided_slice(data, begin, end, strides=None, slice_mode=False): +def strided_slice(data, begin, end, strides=None, slice_mode="end"): """Strided slice of an array. Parameters @@ -629,11 +629,12 @@ def strided_slice(data, begin, end, strides=None, slice_mode=False): Specifies the stride values, it can be negative in that case, the input tensor will be reversed in that particular axis. - slice_mode: boolean, optional - Specifies whether to enable slice mode. In slice mode, - strides will be ignored, end indicates the size of a slice - starting at the location specified by begin. If end[i] is -1, - all remaining elements in that dimension are included in the slice + slice_mode: str, optional + The slice mode [end, size]. + end: The ending indices for the slice [default]. + size: The input strides will be ignored, input end in this mode indicates + the size of a slice starting at the location specified by begin. If end[i] + is -1, all remaining elements in that dimension are included in the slice. Returns ------- diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index e78f89b1d91c..55f2fced3e07 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1689,7 +1689,7 @@ bool StridedSliceRel(const Array& types, int num_inputs, const Attrs& attr if (param->begin && param->end && param->strides) { // stride will be set as 1 if slice mode is enabled std::vector stride_vec(num_axis, 1); - if (!param->slice_mode) { + if (param->slice_mode == "end") { for (size_t i = 0; i < param->strides.value().size(); ++i) { CHECK(param->strides.value()[i].defined()); stride_vec[i] = param->strides.value()[i]->value; @@ -1713,14 +1713,16 @@ bool StridedSliceRel(const Array& types, int num_inputs, const Attrs& attr // allow end to be None if (!param->end.value()[i].defined()) { end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); - } else if (param->slice_mode) { + } else if (param->slice_mode == "size") { if (param->end.value()[i]->value < 0) { end_vec.push_back(max_range); } else { end_vec.push_back(begin_vec[i] + param->end.value()[i]->value); } - } else { + } else if (param->slice_mode == "end") { end_vec.push_back(param->end.value()[i]->value); + } else { + LOG(FATAL) << "Unsupported slice mode: " << param->slice_mode; } } for (int64_t i = end_vec.size(); i < num_axis; ++i) { @@ -1805,7 +1807,7 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, if (params->begin && params->end && params->strides) { for (Integer i : params->strides.value()) { CHECK(i.defined()); - strides.push_back(params->slice_mode ? 1 : i->value); + strides.push_back(params->slice_mode == "size" ? 1 : i->value); } for (Integer i : params->begin.value()) { @@ -1842,7 +1844,7 @@ Array> StridedSliceInferCorrectLayout(const Attrs& attrs, int64_t ed; if (!end[i].defined()) { ed = shape[i].as()->value; - } else if (params->slice_mode) { + } else if (params->slice_mode == "size") { if (end[i]->value < 0) { ed = shape[i].as()->value; } else { @@ -1918,7 +1920,7 @@ Array StridedSliceCompute(const Attrs& attrs, const Array(); const ConstantNode *cbegin, *cend, *cstrides; if ((cbegin = begin.as()) && (cend = end.as()) && @@ -1970,7 +1972,7 @@ Examples:: .add_argument("begin", "Tensor", "The indices to begin with in the slicing.") .add_argument("end", "Tensor", "Indices indicating end of the slice.") .add_argument("strides", "Tensor", "The stride values.") - .add_argument("slice_mode", "Tensor", "Whether to enable slice mode.") + .add_argument("slice_mode", "Tensor", "The slice mode.") .set_support_level(4) .set_attrs_type() .add_type_rel("StridedSlice", StridedSliceRel) @@ -2230,7 +2232,7 @@ Array SliceLikeCompute(const Attrs& attrs, const Array& } } return Array{topi::strided_slice(inputs[0], GetIntArray(begin_idx), - GetIntArray(end_idx), GetIntArray(strides), false)}; + GetIntArray(end_idx), GetIntArray(strides), "end")}; } TVM_REGISTER_GLOBAL("relay.op._make.slice_like").set_body_typed(MakeSliceLike); diff --git a/src/relay/transforms/combine_parallel_conv2d.cc b/src/relay/transforms/combine_parallel_conv2d.cc index d680de49b005..0bf9e7fd38a6 100644 --- a/src/relay/transforms/combine_parallel_conv2d.cc +++ b/src/relay/transforms/combine_parallel_conv2d.cc @@ -168,8 +168,8 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { for (const auto& branch : branches) { const CallNode* conv2d = branch[0]; int64_t channels = GetConv2DSuperChannelsDim(conv2d); - Array begin; - Array end; + std::vector begin; + std::vector end; for (size_t i = 0; i < channel_pos_; i++) { begin.push_back(0); end.push_back(-1); @@ -177,27 +177,15 @@ class ParallelConv2DCombiner : public ParallelOpCombiner { begin.push_back(index); index += channels; end.push_back(index); - DLContext ctx; - ctx.device_type = kDLCPU; - ctx.device_id = 0; - auto begin_ndarray = runtime::NDArray::Empty({int64_t(begin.size())}, DataType::Int(64), ctx); - auto end_ndarray = runtime::NDArray::Empty({int64_t(begin.size())}, DataType::Int(64), ctx); - auto strides_ndarray = - runtime::NDArray::Empty({int64_t(begin.size())}, DataType::Int(64), ctx); - - auto* begin_data = static_cast(begin_ndarray->data); - auto* end_data = static_cast(end_ndarray->data); - auto* strides_data = static_cast(strides_ndarray->data); - + std::vector strides(begin.size(), 1); for (size_t i = 0; i < begin.size(); ++i) { - begin_data[i] = begin[i]; - end_data[i] = end[i]; - end_data[i] -= begin_data[i]; - strides_data[i] = 1; + end[i] -= begin[i]; } - - auto slice = MakeStridedSlice(data, Constant(begin_ndarray), Constant(end_ndarray), - Constant(strides_ndarray), true); + std::vector ndarray_shape = {static_cast(begin.size())}; + Constant begin_const = MakeConstantTensor(DataType::Int(64), ndarray_shape, begin); + Constant end_const = MakeConstantTensor(DataType::Int(64), ndarray_shape, end); + Constant strides_const = MakeConstantTensor(DataType::Int(64), ndarray_shape, strides); + auto slice = MakeStridedSlice(data, begin_const, end_const, strides_const, "size"); subst_map->insert({GetRef(branch[depth]), slice}); } } diff --git a/src/relay/transforms/pattern_util.h b/src/relay/transforms/pattern_util.h index 2e73c632230b..7518eb9ac81a 100644 --- a/src/relay/transforms/pattern_util.h +++ b/src/relay/transforms/pattern_util.h @@ -673,7 +673,7 @@ Expr MakeConcatenate(Expr data, int axis); Expr MakeRepeat(Expr data, int repeats, int axis); -Expr MakeStridedSlice(Expr data, Expr begin, Expr end, Expr strides, bool slice_mode); +Expr MakeStridedSlice(Expr data, Expr begin, Expr end, Expr strides, String slice_mode); Expr MakeStack(Expr data, int axis); diff --git a/tests/python/relay/test_any.py b/tests/python/relay/test_any.py index 8fd6aed70a75..8e535a692b88 100644 --- a/tests/python/relay/test_any.py +++ b/tests/python/relay/test_any.py @@ -644,12 +644,12 @@ def test_arange_with_dynamic_shape(): tvm.testing.assert_allclose(result.asnumpy(), np.array(range(10)).astype("int32")+1) def verify_any_strided_slice(data_shape, begin_shape, end_shape, strides_shape, - data_np_shape, slice_mode=False, const_attrs=False): + data_np_shape, slice_mode="end", const_attrs=False): # Generate random numpy input data np_data = np.random.uniform(size=data_np_shape).astype('float32') np_begin = np.random.randint(2, size=begin_shape, dtype="int32") np_end = np.random.randint(5, 10, size=end_shape, dtype="int32") - np_strides = np.random.randint(1, 2 if slice_mode else 3, size=strides_shape, dtype="int32") + np_strides = np.random.randint(1, 2 if slice_mode == "size" else 3, size=strides_shape, dtype="int32") # target numpy result ref_res = topi.testing.strided_slice_python(np_data, np_begin, np_end, np_strides, slice_mode) @@ -685,7 +685,7 @@ def test_any_strided_slice(): verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (15, 17, 21)) verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (23, 29, 41)) verify_any_strided_slice(any_dims(4), (4,), (4,), (4,), (40, 50, 60, 70)) - verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (15, 17, 21), slice_mode=True) + verify_any_strided_slice(any_dims(3), (3,), (3,), (3,), (15, 17, 21), slice_mode="size") verify_any_strided_slice(any_dims(2), (2,), (2,), (2,), (15, 21), const_attrs=True) diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index dad1adebbf4e..74231cb0d5a1 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -296,7 +296,7 @@ def test_mean_var_std(): def test_strided_slice(): - def verify(dshape, begin, end, strides, output, slice_mode=False, + def verify(dshape, begin, end, strides, output, slice_mode="end", attr_const=True, test_ref=True, dtype="int32"): x = relay.var("x", relay.TensorType(dshape, "float32")) ndim = len(dshape) @@ -355,9 +355,9 @@ def verify(dshape, begin, end, strides, output, slice_mode=False, verify((3, 4, 3), [1, -1, 0], [4, -5, 3], [2, -1, 1], (1, 4, 3)) verify((3, 4, 3), [1, -1, 0], [2, -3, 3], [1, -1, 1], (1, 2, 3)) verify((3, 4, 3), [1, 0, 0], [3, -1, 3], [1, 1, 1], - (2, 4, 3), slice_mode=True, test_ref=False) + (2, 4, 3), slice_mode="size", test_ref=False) verify((3, 4, 3), [1, 0, 0], [-1, 2, 3], [1, 1, 1], - (2, 2, 3), slice_mode=True, test_ref=True) + (2, 2, 3), slice_mode="size", test_ref=True) def test_strided_set(): def verify(dshape, begin, end, strides, vshape, test_ref=True): diff --git a/tests/python/relay/test_pass_combine_parallel_conv2d.py b/tests/python/relay/test_pass_combine_parallel_conv2d.py index 429fd620e09e..68e7fece7e98 100644 --- a/tests/python/relay/test_pass_combine_parallel_conv2d.py +++ b/tests/python/relay/test_pass_combine_parallel_conv2d.py @@ -53,18 +53,18 @@ def expected(x, w1, w2, w3, w4, channels1, channels2, channels3, channels4): begin=relay.const([0, 0], "int64"), end=relay.const([-1, channels1], "int64"), strides=relay.const([1, 1], 'int64'), - slice_mode=True) + slice_mode="size") y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), end=relay.const([-1, channels2], "int64"), strides=relay.const([1, 1], 'int64'), - slice_mode=True) + slice_mode="size") y3 = relay.nn.conv2d(x, w3) y4 = relay.strided_slice(y, begin=relay.const([0, channels1 + channels2], "int64"), end=relay.const([-1, channels4], "int64"), strides=relay.const([1, 1], 'int64'), - slice_mode=True) + slice_mode="size") y5 = relay.nn.max_pool2d(x) y = relay.Tuple((y1, y2, y3, y4, y5)) return relay.Function(args, y) @@ -113,12 +113,12 @@ def expected(x, w1, w2, scale1, scale2, bias, channels1, channels2): begin=relay.const([0, 0], "int64"), end=relay.const([-1, channels1], "int64"), strides=relay.const([1, 1], "int64"), - slice_mode=True) + slice_mode="size") y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), end=relay.const([-1, channels2], "int64"), strides=relay.const([1, 1], "int64"), - slice_mode=True) + slice_mode="size") y2 = relay.add(y2, bias) y = relay.Tuple((y1, y2)) return relay.Function(args, y) @@ -160,12 +160,12 @@ def expected(x, w1, w2, scale1, scale2, channels1, channels2): begin=relay.const([0, 0], "int64"), end=relay.const([-1, channels1], "int64"), strides=relay.const([1, 1], "int64"), - slice_mode=True) + slice_mode="size") y2 = relay.strided_slice(y, begin=relay.const([0, channels1], "int64"), end=relay.const([-1, channels2], "int64"), strides=relay.const([1, 1], "int64"), - slice_mode=True) + slice_mode="size") y1 = relay.multiply(y1, scale1) y2 = relay.multiply(y2, scale2) y = relay.Tuple((y1, y2)) @@ -208,12 +208,12 @@ def expected(x, w, channels, repeat): begin=relay.const([0, 0], "int64"), end=relay.const([-1, channels], "int64"), strides=relay.const([1, 1], "int64"), - slice_mode=True) + slice_mode="size") y2 = relay.strided_slice(y, begin=relay.const([0, channels], "int64"), end=relay.const([-1, channels], "int64"), strides=relay.const([1, 1], "int64"), - slice_mode=True) + slice_mode="size") y = relay.concatenate((y1, y2), axis=1) return relay.Function(args, y) diff --git a/topi/include/topi/transform.h b/topi/include/topi/transform.h index cacde55eb703..d46caff39e9a 100644 --- a/topi/include/topi/transform.h +++ b/topi/include/topi/transform.h @@ -520,15 +520,15 @@ inline Array split(const Tensor& x, Array split_indices, int ax * \param begin The indices to begin with in the slicing * \param end Indicies indicating end of the slice * \param strides Specifies the stride values, it can be negative - * \param slice_mode Specifies whether to enable slice mode * in that case, the input tensor will be reversed in that particular axis + * \param slice_mode Specifies the slice mode * \param name The name of the operation * \param tag The tag to mark the operation * * \return A Tensor whose op member is the split operation */ inline Tensor strided_slice(const Tensor& x, const Array& begin, const Array& end, - const Array& strides, const bool& slice_mode, + const Array& strides, std::string slice_mode = "end", std::string name = "T_strided_slice", std::string tag = kInjective) { size_t src_tensor_dim = static_cast(x->shape.size()); // Setup the ranges. @@ -561,7 +561,7 @@ inline Tensor strided_slice(const Tensor& x, const Array& begin, const if (!end[i].defined()) { end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); - } else if (slice_mode) { + } else if (slice_mode == "size") { if (end[i]->value < 0) { end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range); } else { diff --git a/topi/python/topi/testing/strided_slice_python.py b/topi/python/topi/testing/strided_slice_python.py index 8d68fc1c1492..970e1dedd8c9 100644 --- a/topi/python/topi/testing/strided_slice_python.py +++ b/topi/python/topi/testing/strided_slice_python.py @@ -17,7 +17,7 @@ """strided_slice/set in python""" -def strided_slice_python(data, begin, end, strides, slice_mode=False): +def strided_slice_python(data, begin, end, strides, slice_mode="end"): """Python version of strided slice operator. Parameters @@ -34,11 +34,13 @@ def strided_slice_python(data, begin, end, strides, slice_mode=False): strides : list The stride of each slice. - slice_mode : boolean - Specifies whether to enable slice mode. - In slice mode, strides will be ignored, - end indicates the size of a slice starting - at the location specified by begin. + slice_mode : str, optional + The slice mode [end, size]. + end: The default slice mode, ending indices for the slice. + size: The input strides will be ignored, input end in this mode indicates + the sizeof a slice starting at the location specified by begin. If end[i] is -1, + all remaining elements in that dimension are included in the slice. + Returns ------- @@ -49,13 +51,13 @@ def strided_slice_python(data, begin, end, strides, slice_mode=False): slices = [] for i in range(len(data.shape)): new_stride = None - if not slice_mode and i < len(strides): + if slice_mode == "end" and i < len(strides): new_stride = strides[i] new_begin = begin[i] if i < len(begin) else None if i >= len(end): new_end = None - elif slice_mode: + elif slice_mode == "size": if end[i] < 0: new_end = None else: diff --git a/topi/python/topi/transform.py b/topi/python/topi/transform.py index a5a564b106f1..e1984458d677 100644 --- a/topi/python/topi/transform.py +++ b/topi/python/topi/transform.py @@ -131,7 +131,7 @@ def flip(a, axis=0): """ return cpp.flip(a, axis) -def strided_slice(a, begin, end, strides=None, slice_mode=False): +def strided_slice(a, begin, end, strides=None, slice_mode="end"): """Slice of an array. Parameters @@ -139,22 +139,23 @@ def strided_slice(a, begin, end, strides=None, slice_mode=False): a : tvm.te.Tensor The tensor to be sliced. - begin: list of int + begin : list of int The indices to begin with in the slicing. - end: list of int + end : list of int Indicies indicating end of the slice. - strides: list of int, optional + strides : list of int, optional Specifies the stride values, it can be negative in that case, the input tensor will be reversed in that particular axis. - slice_mode: boolean, optional - Specifies whether to enable slice mode. In slice mode, - strides will be ignored, end indicates the size of a slice - starting at the location specified by begin. If end[i] is -1, - all remaining elements in that dimension are included in the slice + slice_mode : str, optional + The slice mode [end, size]. + end - The default slice mode, ending indices for the slice. + size - The input strides will be ignored, input end in this mode indicates + the sizeof a slice starting at the location specified by begin. If end[i] + is -1, all remaining elements in that dimension are included in the slice. Returns ------- From 5e32d734235fe489f90cd96452e325fe1fc5a8fc Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Sun, 7 Jun 2020 06:53:43 +0800 Subject: [PATCH 21/22] fix CI --- python/tvm/relay/op/transform.py | 12 ++++++------ topi/python/topi/transform.py | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index fab6e2c37454..6b93bbf11114 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -619,22 +619,22 @@ def strided_slice(data, begin, end, strides=None, slice_mode="end"): data : relay.Expr The source array to be sliced. - begin: relay.Expr, Tuple[int], or List[int] + begin : relay.Expr, Tuple[int], or List[int] The indices to begin with in the slicing. - end: relay.Expr, Tuple[int], or List[int] + end : relay.Expr, Tuple[int], or List[int] Indices indicating end of the slice. - strides: relay.Expr, Tuple[int], or List[int], optional + strides : relay.Expr, Tuple[int], or List[int], optional Specifies the stride values, it can be negative in that case, the input tensor will be reversed in that particular axis. - slice_mode: str, optional + slice_mode : str, optional The slice mode [end, size]. end: The ending indices for the slice [default]. size: The input strides will be ignored, input end in this mode indicates - the size of a slice starting at the location specified by begin. If end[i] - is -1, all remaining elements in that dimension are included in the slice. + the size of a slice starting at the location specified by begin. If end[i] + is -1, all remaining elements in that dimension are included in the slice. Returns ------- diff --git a/topi/python/topi/transform.py b/topi/python/topi/transform.py index e1984458d677..f9a7c7796b67 100644 --- a/topi/python/topi/transform.py +++ b/topi/python/topi/transform.py @@ -152,10 +152,10 @@ def strided_slice(a, begin, end, strides=None, slice_mode="end"): slice_mode : str, optional The slice mode [end, size]. - end - The default slice mode, ending indices for the slice. + end - The ending indices for the slice [default]. size - The input strides will be ignored, input end in this mode indicates - the sizeof a slice starting at the location specified by begin. If end[i] - is -1, all remaining elements in that dimension are included in the slice. + the sizeof a slice starting at the location specified by begin. If end[i] + is -1, all remaining elements in that dimension are included in the slice. Returns ------- From 5b5dda5389171c934b94b6b4ef664caf74488b8e Mon Sep 17 00:00:00 2001 From: Yong Wu Date: Tue, 9 Jun 2020 10:35:53 +0800 Subject: [PATCH 22/22] update docstring --- python/tvm/relay/op/vision/nms.py | 12 +++++++++--- topi/python/topi/cuda/nms.py | 10 ++++++++-- topi/python/topi/vision/nms.py | 12 +++++++++--- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/python/tvm/relay/op/vision/nms.py b/python/tvm/relay/op/vision/nms.py index 38dcbe5452be..b60b49ab0ccd 100644 --- a/python/tvm/relay/op/vision/nms.py +++ b/python/tvm/relay/op/vision/nms.py @@ -77,13 +77,19 @@ def non_max_suppression(data, or [batch_size, num_anchors, 5]. The last dimension should be in format of [class_id, score, box_left, box_top, box_right, box_bottom] - or [score, box_left, box_top, box_right, box_bottom]. + or [score, box_left, box_top, box_right, box_bottom]. It could + be the second output out_tensor of get_valid_counts. valid_count : relay.Expr - 1-D tensor for valid number of boxes. + 1-D tensor for valid number of boxes. It could be the output + valid_count of get_valid_counts. indices: relay.Expr - 2-D tensor with shape [batch_size, num_anchors] + 2-D tensor with shape [batch_size, num_anchors], represents + the index of box in original data. It could be the third + output out_indices of get_valid_counts. The values in the + second dimension are like the output of arange(num_anchors) + if get_valid_counts is not used before non_max_suppression. max_output_size : int, optional Max number of output valid boxes for each instance. diff --git a/topi/python/topi/cuda/nms.py b/topi/python/topi/cuda/nms.py index c72cdad0454c..f2c1143b5fb8 100644 --- a/topi/python/topi/cuda/nms.py +++ b/topi/python/topi/cuda/nms.py @@ -354,12 +354,18 @@ def non_max_suppression(data, valid_count, indices, max_output_size=-1, 3-D tensor with shape [batch_size, num_anchors, elem_length]. The last dimension should be in format of [class_id, score, box_left, box_top, box_right, box_bottom]. + It could be the second output out_tensor of get_valid_counts. valid_count : tvm.te.Tensor - 1-D tensor for valid number of boxes. + 1-D tensor for valid number of boxes. It could be the output + valid_count of get_valid_counts. indices : tvm.te.Tensor - 2-D tensor with shape [batch_size, num_anchors]. + 2-D tensor with shape [batch_size, num_anchors], represents + the index of box in original data. It could be the third + output out_indices of get_valid_counts. The values in the + second dimension are like the output of arange(num_anchors) + if get_valid_counts is not used before non_max_suppression. max_output_size : optional, int Max number of output valid boxes for each instance. diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 1a2089683b62..269c876d647e 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -235,17 +235,23 @@ def hybrid_nms(data, sorted_index, valid_count, indices, batch_size, num_anchors ---------- data: tvm.te.Tensor or numpy NDArray Bounding boxes with class and score. 3-D tensor with shape - [batch_size, num_anchors, 6]. + [batch_size, num_anchors, 6]. It could be the second output + out_tensor of get_valid_counts. sorted_index : tvm.te.Tensor or numpy NDArray Bounding box indexes sorted by score, with shape [batch_size, num_anchors]. valid_count : tvm.te.Tensor or numpy NDArray - 1-D tensor for valid number of boxes. + 1-D tensor for valid number of boxes. It could be the output + valid_count of get_valid_counts. indices : tvm.te.Tensor or numpy.NDArray - indices in original tensor, with shape [batch_size, num_anchors] + indices in original tensor, with shape [batch_size, num_anchors], + represents the index of box in original data. It could be the third + output out_indices of get_valid_counts. The values in the second + dimension are like the output of arange(num_anchors) if get_valid_counts + is not used before non_max_suppression. batch_size: tvm.tir.IntImm or tvm.tir.Var Batch size. We need to pass it in since hybrid script doesn't support