From dbec9a159a2a80e2012f89706591eb85f3b016f6 Mon Sep 17 00:00:00 2001 From: barry-jin Date: Tue, 28 Sep 2021 18:31:10 -0700 Subject: [PATCH 01/19] [API] Add floor_divide --- python/mxnet/ndarray/numpy/_op.py | 40 ++- python/mxnet/numpy/multiarray.py | 62 ++++- .../numpy/np_elemwise_broadcast_op.cc | 9 + src/operator/mshadow_op.h | 90 +++++++ src/operator/numpy/np_floor_divide-inl.h | 240 ++++++++++++++++++ src/operator/numpy/np_floor_divide.cc | 137 ++++++++++ src/operator/numpy/np_floor_divide.cu | 40 +++ 7 files changed, 616 insertions(+), 2 deletions(-) create mode 100644 src/operator/numpy/np_floor_divide-inl.h create mode 100644 src/operator/numpy/np_floor_divide.cc create mode 100644 src/operator/numpy/np_floor_divide.cu diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py index a7465865b707..53d425d8f961 100644 --- a/python/mxnet/ndarray/numpy/_op.py +++ b/python/mxnet/ndarray/numpy/_op.py @@ -50,7 +50,8 @@ 'true_divide', 'nonzero', 'quantile', 'percentile', 'shares_memory', 'may_share_memory', 'interp', 'diff', 'ediff1d', 'resize', 'polyval', 'nan_to_num', 'isnan', 'isinf', 'isposinf', 'isneginf', 'isfinite', 'atleast_1d', 'atleast_2d', 'atleast_3d', 'fill_diagonal', 'squeeze', - 'where', 'bincount', 'rollaxis', 'diagflat', 'repeat', 'prod', 'pad', 'cumsum', 'sum', 'diag', 'diagonal'] + 'where', 'bincount', 'rollaxis', 'diagflat', 'repeat', 'prod', 'pad', 'cumsum', 'sum', 'diag', 'diagonal', + 'floor_divide'] @set_module('mxnet.ndarray.numpy') @@ -1167,6 +1168,43 @@ def true_divide(x1, x2, out=None): return _api_internal.true_divide(x1, x2, out) +@set_module('mxnet.ndarray.numpy') +def floor_divide(x1, x2, out=None): + """Return the largest integer smaller or equal to the division of the inputs. + It is equivalent to the Python // operator and pairs with the Python % (remainder), + function so that a = a % b + b * (a // b) up to roundoff. + + Parameters + ---------- + x1 : ndarray or scalar + Dividend array. + x2 : ndarray or scalar + Divisor array. + out : ndarray + A location into which the result is stored. If provided, it must have a shape + that the inputs broadcast to. If not provided or None, a freshly-allocated array + is returned. + + Returns + ------- + out : ndarray or scalar + This is a scalar if both x1 and x2 are scalars. + + .. note:: + + This operator now supports automatic type promotion. The resulting type will be determined + according to the following rules: + + * If both inputs are of floating number types, the output is the more precise type. + * If only one of the inputs is floating number type, the result is that type. + * If both inputs are of integer types (including boolean), the output is the more + precise type + """ + if isinstance(x1, numeric_types) and isinstance(x2, numeric_types): + return _np.floor_divide(x1, x2, out=out) + return _api_internal.floor_divide(x1, x2, out) + + @set_module('mxnet.ndarray.numpy') @wrap_np_binary_func def mod(x1, x2, out=None, **kwargs): diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index c2d9db95f471..82bf9c0e1d6b 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -80,7 +80,8 @@ 'quantile', 'percentile', 'shares_memory', 'may_share_memory', 'diff', 'ediff1d', 'resize', 'matmul', 'nan_to_num', 'isnan', 'isinf', 'isposinf', 'isneginf', 'isfinite', 'polyval', 'where', 'bincount', 'atleast_1d', 'atleast_2d', 'atleast_3d', 'fill_diagonal', 'squeeze', - 'diagflat', 'repeat', 'prod', 'pad', 'cumsum', 'sum', 'rollaxis', 'diag', 'diagonal'] + 'diagflat', 'repeat', 'prod', 'pad', 'cumsum', 'sum', 'rollaxis', 'diag', 'diagonal', + 'floor_divide'] __all__ += fallback.__all__ @@ -1093,6 +1094,23 @@ def __mul__(self, other): """x.__mul__(y) <=> x * y""" return multiply(self, other) + @wrap_mxnp_np_ufunc + def __floordiv__(self, other): + """x.__floordiv__(y) <=> x // y""" + return floor_divide(self, other) + + @wrap_mxnp_np_ufunc + def __ifloordiv__(self, other): + """x.__ifloordiv__(y) <=> x //= y""" + if not self.writable: + raise ValueError('trying to divide from a readonly ndarray') + return floor_divide(self, other, out=self) + + @wrap_mxnp_np_ufunc + def __rfloordiv__(self, other): + """x.__rfloordiv__(y) <=> y // x""" + return floor_divide(other, self) + def __neg__(self): return negative(self) @@ -3399,6 +3417,48 @@ def true_divide(x1, x2, out=None): return _mx_nd_np.true_divide(x1, x2, out=out) +@set_module('mxnet.numpy') +def floor_divide(x1, x2, out=None): + """Return the largest integer smaller or equal to the division of the inputs. + It is equivalent to the Python // operator and pairs with the Python % (remainder), + function so that a = a % b + b * (a // b) up to roundoff. + + Parameters + ---------- + x1 : ndarray or scalar + Dividend array. + x2 : ndarray or scalar + Divisor array. + out : ndarray + A location into which the result is stored. If provided, it must have a shape + that the inputs broadcast to. If not provided or None, a freshly-allocated array + is returned. + + Returns + ------- + out : ndarray or scalar + This is a scalar if both x1 and x2 are scalars. + + .. note:: + + This operator now supports automatic type promotion. The resulting type will be determined + according to the following rules: + + * If both inputs are of floating number types, the output is the more precise type. + * If only one of the inputs is floating number type, the result is that type. + * If both inputs are of integer types (including boolean), the output is the more + precise type + + Examples + -------- + >>> np.floor_divide(7,3) + 2 + >>> np.floor_divide([1., 2., 3., 4.], 2.5) + array([ 0., 0., 1., 1.]) + """ + return _mx_nd_np.floor_divide(x1, x2, out=out) + + @set_module('mxnet.numpy') @wrap_np_binary_func def mod(x1, x2, out=None, **kwargs): diff --git a/src/api/operator/numpy/np_elemwise_broadcast_op.cc b/src/api/operator/numpy/np_elemwise_broadcast_op.cc index 184a4e241eff..2161a421cff5 100644 --- a/src/api/operator/numpy/np_elemwise_broadcast_op.cc +++ b/src/api/operator/numpy/np_elemwise_broadcast_op.cc @@ -61,6 +61,15 @@ MXNET_REGISTER_API("_npi.true_divide") UFuncHelper(args, ret, op, op_scalar, op_rscalar); }); +MXNET_REGISTER_API("_npi.floor_divide") + .set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) { + using namespace runtime; + const nnvm::Op* op = Op::Get("_npi_floor_divide"); + const nnvm::Op* op_scalar = Op::Get("_npi_floor_divide_scalar"); + const nnvm::Op* op_rscalar = Op::Get("_npi_rfloor_divide_scalar"); + UFuncHelper(args, ret, op, op_scalar, op_rscalar); + }); + MXNET_REGISTER_API("_npi.mod").set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) { using namespace runtime; const nnvm::Op* op = Op::Get("_npi_mod"); diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index eee928452f50..091b3481494a 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -221,6 +221,96 @@ struct rtrue_divide : public mxnet_op::tunable { } }; +struct floor_divide : public mxnet_op::tunable { + template + MSHADOW_XINLINE static DType Map(DType a, DType b) { + DType c = static_cast(::floor(a / b)); + if ((c * b != a) && (a < DType(0)) != (b < DType(0))) { + return DType(c - 1); + } else { + return c; + } + } + + template ::value, int>::type = 0> + MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { + mshadow::half::half_t a_half = static_cast(a); + mshadow::half::half_t c = static_cast(::floor(a_half / b)); + if ((c * b != a_half) && (a < DType(0)) != (b < mshadow::half::half_t(0))) { + return mshadow::half::half_t(c - 1); + } else { + return c; + } + } + + template ::value, int>::type = 0> + MSHADOW_XINLINE static float Map(DType a, float b) { + float a_float = static_cast(a); + float c = ::floorf(a_float / b); + if ((c * b != a_float) && (a < DType(0)) != (b < float(0))) { + return float(c - 1); + } else { + return c; + } + } + + template ::value, int>::type = 0> + MSHADOW_XINLINE static double Map(DType a, double b) { + double a_double = static_cast(a); + double c = ::floor(a_double / b); + if ((c * b != a_double) && (a < DType(0)) != (b < double(0))) { + return double(c - 1); + } else { + return c; + } + } +}; + +struct rfloor_divide : public mxnet_op::tunable { + template + MSHADOW_XINLINE static DType Map(DType a, DType b) { + DType c = static_cast(::floor(b / a)); + if ((c * a != b) && (a < DType(0)) != (b < DType(0))) { + return DType(c - 1); + } else { + return c; + } + } + + template ::value, int>::type = 0> + MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { + mshadow::half::half_t a_half = static_cast(a); + mshadow::half::half_t c = static_cast(::floor(b / a_half)); + if ((c * a_half != b) && (a < DType(0)) != (b < mshadow::half::half_t(0))) { + return mshadow::half::half_t(c - 1); + } else { + return c; + } + } + + template ::value, int>::type = 0> + MSHADOW_XINLINE static float Map(DType a, float b) { + float a_float = static_cast(a); + float c = ::floorf(b / a_float); + if ((c * a_float != b) && (a < DType(0)) != (b < float(0))) { + return float(c - 1); + } else { + return c; + } + } + + template ::value, int>::type = 0> + MSHADOW_XINLINE static double Map(DType a, double b) { + double a_double = static_cast(a); + double c = ::floor(b / a_double); + if ((c * a_double != b) && (a < DType(0)) != (b < double(0))) { + return double(c - 1); + } else { + return c; + } + } +}; + MXNET_BINARY_MATH_OP_NC(left, a); MXNET_BINARY_MATH_OP_NC(right, b); diff --git a/src/operator/numpy/np_floor_divide-inl.h b/src/operator/numpy/np_floor_divide-inl.h new file mode 100644 index 000000000000..070f5bb862cd --- /dev/null +++ b/src/operator/numpy/np_floor_divide-inl.h @@ -0,0 +1,240 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_floor_divide-inl.h + * \brief Function definitions of floor_divide operator + */ + +#ifndef MXNET_OPERATOR_NUMPY_NP_FLOOR_DIVIDE_INL_H_ +#define MXNET_OPERATOR_NUMPY_NP_FLOOR_DIVIDE_INL_H_ + +#include +#include "../../common/utils.h" +#include "../tensor/elemwise_binary_broadcast_op.h" +#include "../numpy/np_elemwise_broadcast_op.h" + +namespace mxnet { +namespace op { + +template +void FloorDivideScalarCompute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + CHECK_EQ(inputs.size(), 1U); + CHECK_EQ(outputs.size(), 1U); + if (req[0] == kNullOp || outputs[0].Size() == 0U) + return; + using namespace mshadow; + using namespace mxnet_op; + using namespace mshadow::expr; + Stream* s = ctx.get_stream(); + const NumpyBinaryScalarParam& param = nnvm::get(attrs.parsed); + const double alpha = param.scalar; + const TBlob& data = inputs[0]; + const TBlob& out = outputs[0]; + if (common::is_int(out.type_flag_)) { + MXNET_INT_TYPE_SWITCH(outputs[0].type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { + Kernel, xpu>::Launch( + s, data.Size(), out.dptr(), data.dptr(), static_cast(alpha)); + }); + }); + } else { + CHECK(out.type_flag_ == mshadow::kFloat32 || out.type_flag_ == mshadow::kFloat64) + << "floor_divide only supports float32 and float64" + " output when input's dtype is " + << type_string(inputs[0].type_flag_); + if (common::is_int(data.type_flag_)) { + MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, ODType, { + MXNET_INT_TYPE_SWITCH(inputs[0].type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { + Kernel, xpu>::Launch( + s, data.Size(), out.dptr(), data.dptr(), static_cast(alpha)); + }); + }); + }); + } else { + MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { + Kernel, xpu>::Launch( + s, data.Size(), out.dptr(), data.dptr(), DType(alpha)); + }); + }); + } + } +} + +template +void FloorDivideElemwiseCompute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mxnet_op; + if (req[0] == kNullOp || outputs[0].Size() == 0U) + return; + Stream* s = ctx.get_stream(); + CHECK_EQ(inputs.size(), 2U); + CHECK_EQ(outputs.size(), 1U); + + const TBlob& lhs = inputs[0]; + const TBlob& rhs = inputs[1]; + const TBlob& out = outputs[0]; + if (lhs.type_flag_ == rhs.type_flag_) { + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { + MSHADOW_TYPE_SWITCH(out.type_flag_, DType, { + Kernel, xpu>::Launch( + s, out.Size(), out.dptr(), lhs.dptr(), rhs.dptr()); + }); + }); + } else { + // Case when types of the 2 input tensors are different + if (common::is_float(lhs.type_flag_) && common::is_float(rhs.type_flag_)) { + // both lhs and rhs are float types, output type is the more precise one + LOG(FATAL) << "not implemented yet..."; + } else if (common::is_float(lhs.type_flag_) || common::is_float(rhs.type_flag_)) { + // one is float type, the other is integer type, the output type should be the same as float + CHECK_EQ(out.type_flag_, common::is_float(lhs.type_flag_) ? lhs.type_flag_ : rhs.type_flag_) + << "This case out type should be same as the float type"; + if (common::is_float(lhs.type_flag_)) { + // lhs is the float one + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { + MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, { + MXNET_INT_TYPE_SWITCH(rhs.type_flag_, RType, { + Kernel, xpu>::Launch( + s, out.Size(), out.dptr(), rhs.dptr(), lhs.dptr()); + }); + }); + }); + } else { + // rhs is the float one + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { + MXNET_INT_TYPE_SWITCH(lhs.type_flag_, LType, { + MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, { + Kernel, xpu>::Launch( + s, out.Size(), out.dptr(), lhs.dptr(), rhs.dptr()); + }); + }); + }); + } + } else { + // lhs is integer type, rhs is integer type, output type should be float + LOG(FATAL) << "not implemented yet..."; + } + } +} + +template +void FloorDivideBroadcastCompute(const nnvm::NodeAttrs& attrs, + const OpContext& ctx, + const std::vector& inputs, + const std::vector& req, + const std::vector& outputs) { + using namespace mxnet_op; + if (outputs[0].shape_.Size() == 0U) + return; + CHECK_EQ(inputs.size(), 2U); + mxnet::TShape new_lshape, new_rshape, new_oshape; + int ndim = BinaryBroadcastShapeCompact( + inputs[0].shape_, inputs[1].shape_, outputs[0].shape_, &new_lshape, &new_rshape, &new_oshape); + if (!ndim) { + FloorDivideElemwiseCompute(attrs, ctx, inputs, req, outputs); + } else { + if (req[0] == kNullOp) + return; + mshadow::Stream* s = ctx.get_stream(); + const TBlob& lhs = inputs[0]; + const TBlob& rhs = inputs[1]; + const TBlob& out = outputs[0]; + BROADCAST_NDIM_SWITCH(ndim, NDim, { + mshadow::Shape oshape = new_oshape.get(); + mshadow::Shape lstride = calc_stride(new_lshape.get()); + mshadow::Shape rstride = calc_stride(new_rshape.get()); + if (lhs.type_flag_ == rhs.type_flag_) { + MSHADOW_TYPE_SWITCH(lhs.type_flag_, DType, { + Kernel, xpu>::template LaunchEx( + s, + new_oshape.Size(), + req[0], + lstride, + rstride, + oshape, + lhs.dptr(), + rhs.dptr(), + out.dptr()); + }); + } else { + if (common::is_float(lhs.type_flag_) && common::is_float(rhs.type_flag_)) { + // lhs and rhs have different float types, the output is the more precise one + LOG(FATAL) << "not implemented yet..."; + } else if (common::is_float(lhs.type_flag_) || common::is_float(rhs.type_flag_)) { + // one of lhs and rhs is float, the output is the same type as the float one + if (common::is_float(lhs.type_flag_)) { + // lhs is float type, output will be the same float type + CHECK_EQ(lhs.type_flag_, out.type_flag_) + << "lhs should have the same type as out, infer type broken?"; + MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, { + MXNET_INT_TYPE_SWITCH(rhs.type_flag_, RType, { + Kernel, + xpu>::template LaunchEx(s, + new_oshape.Size(), + req[0], + rstride, + lstride, + oshape, + rhs.dptr(), + lhs.dptr(), + out.dptr()); + }); + }); + } else { + // rhs is float type, output will be the same float type + CHECK_EQ(rhs.type_flag_, out.type_flag_) + << "rhs should have the same type as out, infer type broken?"; + MXNET_INT_TYPE_SWITCH(lhs.type_flag_, LType, { + MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, { + Kernel, + xpu>::template LaunchEx(s, + new_oshape.Size(), + req[0], + lstride, + rstride, + oshape, + lhs.dptr(), + rhs.dptr(), + out.dptr()); + }); + }); + } + } else { + // lhs and rhs have different integer types, the output is float type + LOG(FATAL) << "not implemented yet..."; + } + } + }); + } +} + +} // namespace op +} // namespace mxnet + +#endif // MXNET_OPERATOR_NUMPY_NP_FLOOR_DIVIDE_INL_H_ diff --git a/src/operator/numpy/np_floor_divide.cc b/src/operator/numpy/np_floor_divide.cc new file mode 100644 index 000000000000..11489fb246dd --- /dev/null +++ b/src/operator/numpy/np_floor_divide.cc @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_floor_divide.cc + * \brief CPU Implementation of floor_divide operator. + */ + +#include "./np_floor_divide-inl.h" + +namespace mxnet { +namespace op { + +int FloorDivideOutType(int ltype, int rtype) { + if (common::is_float(ltype) && common::is_float(rtype)) { + // If both inputs are float, return the one with the higher precision + return common::get_more_precise_type(ltype, rtype); + } else if (common::is_float(ltype) || common::is_float(rtype)) { + // If only one of the inputs is float, return that float type + return (common::is_float(ltype)) ? ltype : rtype; + } + // If neither of the inputs is float, return the higher precision int dtype + return common::get_more_precise_type(ltype, rtype); +} + +bool FloorDivideType(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_GT(in_attrs->size(), 0U); + CHECK_EQ(out_attrs->size(), 1U); + + for (const int dtype : *in_attrs) { + if (dtype == -1) + return false; + } + + const int lhs_dtype = in_attrs->at(0); + const int rhs_dtype = in_attrs->at(1); + TYPE_ASSIGN_CHECK(*out_attrs, 0, FloorDivideOutType(lhs_dtype, rhs_dtype)); + return true; +} + +bool FloorDivideScalarType(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + CHECK_GT(in_attrs->size(), 0U); + CHECK_EQ(out_attrs->size(), 1U); + + for (const int dtype : *in_attrs) { + if (dtype == -1) + return false; + } + + const NumpyBinaryScalarParam& param = nnvm::get(attrs.parsed); + bool is_int = param.is_int; + const int lhs_dtype = in_attrs->at(0); + const int rhs_dtype = + common::is_float(lhs_dtype) + ? lhs_dtype + : (is_int ? mxnet::common::GetDefaultIntType() : mxnet::common::GetDefaultDtype()); + TYPE_ASSIGN_CHECK(*out_attrs, 0, FloorDivideOutType(lhs_dtype, rhs_dtype)); + return true; +} + +NNVM_REGISTER_OP(_npi_floor_divide) + .set_num_inputs(2) + .set_num_outputs(1) + .set_attr("FListInputNames", + [](const NodeAttrs& attrs) { + return std::vector{"lhs", "rhs"}; + }) + .set_attr("FInferShape", BinaryBroadcastShape) + .set_attr("FInferType", FloorDivideType) + .set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}, {1, 0}}; + }) + .set_attr("FCompute", FloorDivideBroadcastCompute) + .set_attr("FGradient", MakeZeroGradNodes) + .add_argument("lhs", "NDArray-or-Symbol", "Dividend array") + .add_argument("rhs", "NDArray-or-Symbol", "Divisor array"); + + +NNVM_REGISTER_OP(_npi_floor_divide_scalar) + .set_num_inputs(1) + .set_num_outputs(1) + .set_attr_parser(ParamParser) + .set_attr("FInferShape", ElemwiseShape<1, 1>) + .set_attr("FInferType", FloorDivideScalarType) + .set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}}; + }) + .set_attr("FCompute", FloorDivideScalarCompute) + .set_attr("FGradient", MakeZeroGradNodes) + .add_argument("data", "NDArray-or-Symbol", "source input") + .add_arguments(NumpyBinaryScalarParam::__FIELDS__()); + +NNVM_REGISTER_OP(_npi_rfloor_divide_scalar) + .set_num_inputs(1) + .set_num_outputs(1) + .set_attr_parser(ParamParser) + .set_attr("FInferShape", ElemwiseShape<1, 1>) + .set_attr("FInferType", FloorDivideType) + .set_attr("FInplaceOption", + [](const NodeAttrs& attrs) { + return std::vector >{{0, 0}}; + }) +#ifdef _WIN32 + .set_attr("FResourceRequest", + [](const NodeAttrs& attrs) { + return std::vector{ResourceRequest::kTempSpace}; + }) +#endif + .set_attr("FCompute", FloorDivideScalarCompute) + .set_attr("FGradient", MakeZeroGradNodes) + .add_argument("data", "NDArray-or-Symbol", "source input") + .add_arguments(NumpyBinaryScalarParam::__FIELDS__()); + +} // namespace op +} // namespace mxnet diff --git a/src/operator/numpy/np_floor_divide.cu b/src/operator/numpy/np_floor_divide.cu new file mode 100644 index 000000000000..b6d01b706460 --- /dev/null +++ b/src/operator/numpy/np_floor_divide.cu @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file np_floor_divide.cu + * \brief GPU Implementation of floor_divide operator. + */ + +#include "./np_floor_divide-inl.h" + +namespace mxnet { +namespace op { + +NNVM_REGISTER_OP(_npi_floor_divide) + .set_attr("FCompute", FloorDivideBroadcastCompute); + +NNVM_REGISTER_OP(_npi_floor_divide_scalar) + .set_attr("FCompute", FloorDivideScalarCompute); + +NNVM_REGISTER_OP(_npi_rfloor_divide_scalar) + .set_attr("FCompute", FloorDivideScalarCompute); + +} // namespace op +} // namespace mxnet From 6e1d0805d81062065fac7d5e72dfae18484b95c5 Mon Sep 17 00:00:00 2001 From: barry-jin Date: Sun, 3 Oct 2021 21:55:31 -0700 Subject: [PATCH 02/19] fix lint --- src/operator/mshadow_op.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index 418d0e331f2e..02c46b278043 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -224,7 +224,7 @@ struct floor_divide : public mxnet_op::tunable { template MSHADOW_XINLINE static DType Map(DType a, DType b) { DType c = static_cast(::floor(a / b)); - if ((c * b != a) && (a < DType(0)) != (b < DType(0))) { + if ((c * b != a) && (a < 0) != (b < 0)) { return DType(c - 1); } else { return c; @@ -235,7 +235,7 @@ struct floor_divide : public mxnet_op::tunable { MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { mshadow::half::half_t a_half = static_cast(a); mshadow::half::half_t c = static_cast(::floor(a_half / b)); - if ((c * b != a_half) && (a < DType(0)) != (b < mshadow::half::half_t(0))) { + if ((c * b != a_half) && (a < 0) != (b < 0)) { return mshadow::half::half_t(c - 1); } else { return c; @@ -246,7 +246,7 @@ struct floor_divide : public mxnet_op::tunable { MSHADOW_XINLINE static float Map(DType a, float b) { float a_float = static_cast(a); float c = ::floorf(a_float / b); - if ((c * b != a_float) && (a < DType(0)) != (b < float(0))) { + if ((c * b != a_float) && (a < 0) != (b < 0)) { return float(c - 1); } else { return c; @@ -257,7 +257,7 @@ struct floor_divide : public mxnet_op::tunable { MSHADOW_XINLINE static double Map(DType a, double b) { double a_double = static_cast(a); double c = ::floor(a_double / b); - if ((c * b != a_double) && (a < DType(0)) != (b < double(0))) { + if ((c * b != a_double) && (a < 0) != (b < 0)) { return double(c - 1); } else { return c; @@ -269,7 +269,7 @@ struct rfloor_divide : public mxnet_op::tunable { template MSHADOW_XINLINE static DType Map(DType a, DType b) { DType c = static_cast(::floor(b / a)); - if ((c * a != b) && (a < DType(0)) != (b < DType(0))) { + if ((c * a != b) && (a < 0) != (b < 0)) { return DType(c - 1); } else { return c; @@ -280,7 +280,7 @@ struct rfloor_divide : public mxnet_op::tunable { MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { mshadow::half::half_t a_half = static_cast(a); mshadow::half::half_t c = static_cast(::floor(b / a_half)); - if ((c * a_half != b) && (a < DType(0)) != (b < mshadow::half::half_t(0))) { + if ((c * a_half != b) && (a < 0) != (b < 0)) { return mshadow::half::half_t(c - 1); } else { return c; @@ -291,7 +291,7 @@ struct rfloor_divide : public mxnet_op::tunable { MSHADOW_XINLINE static float Map(DType a, float b) { float a_float = static_cast(a); float c = ::floorf(b / a_float); - if ((c * a_float != b) && (a < DType(0)) != (b < float(0))) { + if ((c * a_float != b) && (a < 0) != (b < 0)) { return float(c - 1); } else { return c; @@ -302,7 +302,7 @@ struct rfloor_divide : public mxnet_op::tunable { MSHADOW_XINLINE static double Map(DType a, double b) { double a_double = static_cast(a); double c = ::floor(b / a_double); - if ((c * a_double != b) && (a < DType(0)) != (b < double(0))) { + if ((c * a_double != b) && (a < 0) != (b < 0)) { return double(c - 1); } else { return c; From db91b410761b7b416e817ef972ffb2f9a672fdcb Mon Sep 17 00:00:00 2001 From: barry-jin Date: Thu, 7 Oct 2021 15:42:47 -0700 Subject: [PATCH 03/19] fix sanity --- src/operator/mshadow_op.h | 8 ++++---- src/operator/numpy/np_floor_divide.cc | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index 02c46b278043..31ad36670628 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -247,7 +247,7 @@ struct floor_divide : public mxnet_op::tunable { float a_float = static_cast(a); float c = ::floorf(a_float / b); if ((c * b != a_float) && (a < 0) != (b < 0)) { - return float(c - 1); + return c - 1.0f; } else { return c; } @@ -258,7 +258,7 @@ struct floor_divide : public mxnet_op::tunable { double a_double = static_cast(a); double c = ::floor(a_double / b); if ((c * b != a_double) && (a < 0) != (b < 0)) { - return double(c - 1); + return c - 1.0; } else { return c; } @@ -292,7 +292,7 @@ struct rfloor_divide : public mxnet_op::tunable { float a_float = static_cast(a); float c = ::floorf(b / a_float); if ((c * a_float != b) && (a < 0) != (b < 0)) { - return float(c - 1); + return c - 1.0f; } else { return c; } @@ -303,7 +303,7 @@ struct rfloor_divide : public mxnet_op::tunable { double a_double = static_cast(a); double c = ::floor(b / a_double); if ((c * a_double != b) && (a < 0) != (b < 0)) { - return double(c - 1); + return c - 1.0; } else { return c; } diff --git a/src/operator/numpy/np_floor_divide.cc b/src/operator/numpy/np_floor_divide.cc index 11489fb246dd..f89d580c6f9c 100644 --- a/src/operator/numpy/np_floor_divide.cc +++ b/src/operator/numpy/np_floor_divide.cc @@ -107,7 +107,8 @@ NNVM_REGISTER_OP(_npi_floor_divide_scalar) [](const NodeAttrs& attrs) { return std::vector >{{0, 0}}; }) - .set_attr("FCompute", FloorDivideScalarCompute) + .set_attr("FCompute", + FloorDivideScalarCompute) .set_attr("FGradient", MakeZeroGradNodes) .add_argument("data", "NDArray-or-Symbol", "source input") .add_arguments(NumpyBinaryScalarParam::__FIELDS__()); From d14c2e1a614bd649f22f7e7fab22c130f5afe48d Mon Sep 17 00:00:00 2001 From: barry-jin Date: Sun, 17 Oct 2021 18:57:53 -0700 Subject: [PATCH 04/19] update implementation --- python/mxnet/ndarray/numpy/_op.py | 1 + python/mxnet/numpy/multiarray.py | 1 + src/common/cuda/rtc/forward_functions-inl.h | 22 ++ src/operator/mshadow_op.h | 51 +++-- src/operator/numpy/np_floor_divide-inl.h | 240 -------------------- src/operator/numpy/np_floor_divide.cc | 114 +--------- src/operator/numpy/np_floor_divide.cu | 8 +- tests/python/unittest/test_numpy_op.py | 2 + 8 files changed, 69 insertions(+), 370 deletions(-) delete mode 100644 src/operator/numpy/np_floor_divide-inl.h diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py index 53d425d8f961..f9c7b553f3fc 100644 --- a/python/mxnet/ndarray/numpy/_op.py +++ b/python/mxnet/ndarray/numpy/_op.py @@ -1169,6 +1169,7 @@ def true_divide(x1, x2, out=None): @set_module('mxnet.ndarray.numpy') +@wrap_np_binary_func def floor_divide(x1, x2, out=None): """Return the largest integer smaller or equal to the division of the inputs. It is equivalent to the Python // operator and pairs with the Python % (remainder), diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index 82bf9c0e1d6b..3f7cc1c2efb0 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -3418,6 +3418,7 @@ def true_divide(x1, x2, out=None): @set_module('mxnet.numpy') +@wrap_np_binary_func def floor_divide(x1, x2, out=None): """Return the largest integer smaller or equal to the division of the inputs. It is equivalent to the Python // operator and pairs with the Python % (remainder), diff --git a/src/common/cuda/rtc/forward_functions-inl.h b/src/common/cuda/rtc/forward_functions-inl.h index 7a886a0a9aec..cb93de8c49a6 100644 --- a/src/common/cuda/rtc/forward_functions-inl.h +++ b/src/common/cuda/rtc/forward_functions-inl.h @@ -259,6 +259,28 @@ rsub(const DType a, const DType2 b) { return b - a; } +template +__device__ inline mixed_type +floor_divide(const DType a, const DType2 b) { + mixed_type c = op::floor(op::div(a, b)); + if ((c * b != a) && (a < 0) != (b < 0)) { + return mixed_type(c - 1); + } else { + return c; + } +} + +template +__device__ inline mixed_type +rfloor_divide(const DType a, const DType2 b) { + mixed_type c = op::floor(op::div(b, a)); + if ((c * a != b) && (a < 0) != (b < 0)) { + return mixed_type(c - 1); + } else { + return c; + } +} + template __device__ inline mixed_type mul(const DType a, const DType2 b) { diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index 31ad36670628..cd12f0166847 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -220,6 +220,8 @@ struct rtrue_divide : public mxnet_op::tunable { } }; +/***** floor_divide ******/ + struct floor_divide : public mxnet_op::tunable { template MSHADOW_XINLINE static DType Map(DType a, DType b) { @@ -230,7 +232,21 @@ struct floor_divide : public mxnet_op::tunable { return c; } } +}; + +struct rfloor_divide : public mxnet_op::tunable { + template + MSHADOW_XINLINE static DType Map(DType a, DType b) { + DType c = static_cast(::floor(b / a)); + if ((c * a != b) && (a < 0) != (b < 0)) { + return DType(c - 1); + } else { + return c; + } + } +}; +struct mixed_floor_divide { template ::value, int>::type = 0> MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { mshadow::half::half_t a_half = static_cast(a); @@ -242,7 +258,10 @@ struct floor_divide : public mxnet_op::tunable { } } - template ::value, int>::type = 0> + template ::value || + std::is_integral::value, + int>::type = 0> MSHADOW_XINLINE static float Map(DType a, float b) { float a_float = static_cast(a); float c = ::floorf(a_float / b); @@ -253,8 +272,11 @@ struct floor_divide : public mxnet_op::tunable { } } - template ::value, int>::type = 0> - MSHADOW_XINLINE static double Map(DType a, double b) { + template ::value || + std::is_same::value || + std::is_integral::value, + int>::type = 0> MSHADOW_XINLINE static double Map(DType a, double b) { double a_double = static_cast(a); double c = ::floor(a_double / b); if ((c * b != a_double) && (a < 0) != (b < 0)) { @@ -265,17 +287,7 @@ struct floor_divide : public mxnet_op::tunable { } }; -struct rfloor_divide : public mxnet_op::tunable { - template - MSHADOW_XINLINE static DType Map(DType a, DType b) { - DType c = static_cast(::floor(b / a)); - if ((c * a != b) && (a < 0) != (b < 0)) { - return DType(c - 1); - } else { - return c; - } - } - +struct mixed_rfloor_divide { template ::value, int>::type = 0> MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { mshadow::half::half_t a_half = static_cast(a); @@ -287,7 +299,10 @@ struct rfloor_divide : public mxnet_op::tunable { } } - template ::value, int>::type = 0> + template ::value || + std::is_integral::value, + int>::type = 0> MSHADOW_XINLINE static float Map(DType a, float b) { float a_float = static_cast(a); float c = ::floorf(b / a_float); @@ -298,7 +313,11 @@ struct rfloor_divide : public mxnet_op::tunable { } } - template ::value, int>::type = 0> + template ::value || + std::is_same::value || + std::is_integral::value, + int>::type = 0> MSHADOW_XINLINE static double Map(DType a, double b) { double a_double = static_cast(a); double c = ::floor(b / a_double); diff --git a/src/operator/numpy/np_floor_divide-inl.h b/src/operator/numpy/np_floor_divide-inl.h deleted file mode 100644 index 070f5bb862cd..000000000000 --- a/src/operator/numpy/np_floor_divide-inl.h +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file np_floor_divide-inl.h - * \brief Function definitions of floor_divide operator - */ - -#ifndef MXNET_OPERATOR_NUMPY_NP_FLOOR_DIVIDE_INL_H_ -#define MXNET_OPERATOR_NUMPY_NP_FLOOR_DIVIDE_INL_H_ - -#include -#include "../../common/utils.h" -#include "../tensor/elemwise_binary_broadcast_op.h" -#include "../numpy/np_elemwise_broadcast_op.h" - -namespace mxnet { -namespace op { - -template -void FloorDivideScalarCompute(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - CHECK_EQ(inputs.size(), 1U); - CHECK_EQ(outputs.size(), 1U); - if (req[0] == kNullOp || outputs[0].Size() == 0U) - return; - using namespace mshadow; - using namespace mxnet_op; - using namespace mshadow::expr; - Stream* s = ctx.get_stream(); - const NumpyBinaryScalarParam& param = nnvm::get(attrs.parsed); - const double alpha = param.scalar; - const TBlob& data = inputs[0]; - const TBlob& out = outputs[0]; - if (common::is_int(out.type_flag_)) { - MXNET_INT_TYPE_SWITCH(outputs[0].type_flag_, DType, { - MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { - Kernel, xpu>::Launch( - s, data.Size(), out.dptr(), data.dptr(), static_cast(alpha)); - }); - }); - } else { - CHECK(out.type_flag_ == mshadow::kFloat32 || out.type_flag_ == mshadow::kFloat64) - << "floor_divide only supports float32 and float64" - " output when input's dtype is " - << type_string(inputs[0].type_flag_); - if (common::is_int(data.type_flag_)) { - MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, ODType, { - MXNET_INT_TYPE_SWITCH(inputs[0].type_flag_, DType, { - MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { - Kernel, xpu>::Launch( - s, data.Size(), out.dptr(), data.dptr(), static_cast(alpha)); - }); - }); - }); - } else { - MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, { - MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { - Kernel, xpu>::Launch( - s, data.Size(), out.dptr(), data.dptr(), DType(alpha)); - }); - }); - } - } -} - -template -void FloorDivideElemwiseCompute(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - using namespace mxnet_op; - if (req[0] == kNullOp || outputs[0].Size() == 0U) - return; - Stream* s = ctx.get_stream(); - CHECK_EQ(inputs.size(), 2U); - CHECK_EQ(outputs.size(), 1U); - - const TBlob& lhs = inputs[0]; - const TBlob& rhs = inputs[1]; - const TBlob& out = outputs[0]; - if (lhs.type_flag_ == rhs.type_flag_) { - MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { - MSHADOW_TYPE_SWITCH(out.type_flag_, DType, { - Kernel, xpu>::Launch( - s, out.Size(), out.dptr(), lhs.dptr(), rhs.dptr()); - }); - }); - } else { - // Case when types of the 2 input tensors are different - if (common::is_float(lhs.type_flag_) && common::is_float(rhs.type_flag_)) { - // both lhs and rhs are float types, output type is the more precise one - LOG(FATAL) << "not implemented yet..."; - } else if (common::is_float(lhs.type_flag_) || common::is_float(rhs.type_flag_)) { - // one is float type, the other is integer type, the output type should be the same as float - CHECK_EQ(out.type_flag_, common::is_float(lhs.type_flag_) ? lhs.type_flag_ : rhs.type_flag_) - << "This case out type should be same as the float type"; - if (common::is_float(lhs.type_flag_)) { - // lhs is the float one - MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { - MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, { - MXNET_INT_TYPE_SWITCH(rhs.type_flag_, RType, { - Kernel, xpu>::Launch( - s, out.Size(), out.dptr(), rhs.dptr(), lhs.dptr()); - }); - }); - }); - } else { - // rhs is the float one - MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { - MXNET_INT_TYPE_SWITCH(lhs.type_flag_, LType, { - MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, { - Kernel, xpu>::Launch( - s, out.Size(), out.dptr(), lhs.dptr(), rhs.dptr()); - }); - }); - }); - } - } else { - // lhs is integer type, rhs is integer type, output type should be float - LOG(FATAL) << "not implemented yet..."; - } - } -} - -template -void FloorDivideBroadcastCompute(const nnvm::NodeAttrs& attrs, - const OpContext& ctx, - const std::vector& inputs, - const std::vector& req, - const std::vector& outputs) { - using namespace mxnet_op; - if (outputs[0].shape_.Size() == 0U) - return; - CHECK_EQ(inputs.size(), 2U); - mxnet::TShape new_lshape, new_rshape, new_oshape; - int ndim = BinaryBroadcastShapeCompact( - inputs[0].shape_, inputs[1].shape_, outputs[0].shape_, &new_lshape, &new_rshape, &new_oshape); - if (!ndim) { - FloorDivideElemwiseCompute(attrs, ctx, inputs, req, outputs); - } else { - if (req[0] == kNullOp) - return; - mshadow::Stream* s = ctx.get_stream(); - const TBlob& lhs = inputs[0]; - const TBlob& rhs = inputs[1]; - const TBlob& out = outputs[0]; - BROADCAST_NDIM_SWITCH(ndim, NDim, { - mshadow::Shape oshape = new_oshape.get(); - mshadow::Shape lstride = calc_stride(new_lshape.get()); - mshadow::Shape rstride = calc_stride(new_rshape.get()); - if (lhs.type_flag_ == rhs.type_flag_) { - MSHADOW_TYPE_SWITCH(lhs.type_flag_, DType, { - Kernel, xpu>::template LaunchEx( - s, - new_oshape.Size(), - req[0], - lstride, - rstride, - oshape, - lhs.dptr(), - rhs.dptr(), - out.dptr()); - }); - } else { - if (common::is_float(lhs.type_flag_) && common::is_float(rhs.type_flag_)) { - // lhs and rhs have different float types, the output is the more precise one - LOG(FATAL) << "not implemented yet..."; - } else if (common::is_float(lhs.type_flag_) || common::is_float(rhs.type_flag_)) { - // one of lhs and rhs is float, the output is the same type as the float one - if (common::is_float(lhs.type_flag_)) { - // lhs is float type, output will be the same float type - CHECK_EQ(lhs.type_flag_, out.type_flag_) - << "lhs should have the same type as out, infer type broken?"; - MSHADOW_REAL_TYPE_SWITCH(lhs.type_flag_, LType, { - MXNET_INT_TYPE_SWITCH(rhs.type_flag_, RType, { - Kernel, - xpu>::template LaunchEx(s, - new_oshape.Size(), - req[0], - rstride, - lstride, - oshape, - rhs.dptr(), - lhs.dptr(), - out.dptr()); - }); - }); - } else { - // rhs is float type, output will be the same float type - CHECK_EQ(rhs.type_flag_, out.type_flag_) - << "rhs should have the same type as out, infer type broken?"; - MXNET_INT_TYPE_SWITCH(lhs.type_flag_, LType, { - MSHADOW_REAL_TYPE_SWITCH(rhs.type_flag_, RType, { - Kernel, - xpu>::template LaunchEx(s, - new_oshape.Size(), - req[0], - lstride, - rstride, - oshape, - lhs.dptr(), - rhs.dptr(), - out.dptr()); - }); - }); - } - } else { - // lhs and rhs have different integer types, the output is float type - LOG(FATAL) << "not implemented yet..."; - } - } - }); - } -} - -} // namespace op -} // namespace mxnet - -#endif // MXNET_OPERATOR_NUMPY_NP_FLOOR_DIVIDE_INL_H_ diff --git a/src/operator/numpy/np_floor_divide.cc b/src/operator/numpy/np_floor_divide.cc index f89d580c6f9c..df00f8483110 100644 --- a/src/operator/numpy/np_floor_divide.cc +++ b/src/operator/numpy/np_floor_divide.cc @@ -22,117 +22,17 @@ * \brief CPU Implementation of floor_divide operator. */ -#include "./np_floor_divide-inl.h" +#include "./np_elemwise_broadcast_op.h" namespace mxnet { namespace op { -int FloorDivideOutType(int ltype, int rtype) { - if (common::is_float(ltype) && common::is_float(rtype)) { - // If both inputs are float, return the one with the higher precision - return common::get_more_precise_type(ltype, rtype); - } else if (common::is_float(ltype) || common::is_float(rtype)) { - // If only one of the inputs is float, return that float type - return (common::is_float(ltype)) ? ltype : rtype; - } - // If neither of the inputs is float, return the higher precision int dtype - return common::get_more_precise_type(ltype, rtype); -} - -bool FloorDivideType(const nnvm::NodeAttrs& attrs, - std::vector* in_attrs, - std::vector* out_attrs) { - CHECK_GT(in_attrs->size(), 0U); - CHECK_EQ(out_attrs->size(), 1U); - - for (const int dtype : *in_attrs) { - if (dtype == -1) - return false; - } - - const int lhs_dtype = in_attrs->at(0); - const int rhs_dtype = in_attrs->at(1); - TYPE_ASSIGN_CHECK(*out_attrs, 0, FloorDivideOutType(lhs_dtype, rhs_dtype)); - return true; -} - -bool FloorDivideScalarType(const nnvm::NodeAttrs& attrs, - std::vector* in_attrs, - std::vector* out_attrs) { - CHECK_GT(in_attrs->size(), 0U); - CHECK_EQ(out_attrs->size(), 1U); - - for (const int dtype : *in_attrs) { - if (dtype == -1) - return false; - } - - const NumpyBinaryScalarParam& param = nnvm::get(attrs.parsed); - bool is_int = param.is_int; - const int lhs_dtype = in_attrs->at(0); - const int rhs_dtype = - common::is_float(lhs_dtype) - ? lhs_dtype - : (is_int ? mxnet::common::GetDefaultIntType() : mxnet::common::GetDefaultDtype()); - TYPE_ASSIGN_CHECK(*out_attrs, 0, FloorDivideOutType(lhs_dtype, rhs_dtype)); - return true; -} - -NNVM_REGISTER_OP(_npi_floor_divide) - .set_num_inputs(2) - .set_num_outputs(1) - .set_attr("FListInputNames", - [](const NodeAttrs& attrs) { - return std::vector{"lhs", "rhs"}; - }) - .set_attr("FInferShape", BinaryBroadcastShape) - .set_attr("FInferType", FloorDivideType) - .set_attr("FInplaceOption", - [](const NodeAttrs& attrs) { - return std::vector >{{0, 0}, {1, 0}}; - }) - .set_attr("FCompute", FloorDivideBroadcastCompute) - .set_attr("FGradient", MakeZeroGradNodes) - .add_argument("lhs", "NDArray-or-Symbol", "Dividend array") - .add_argument("rhs", "NDArray-or-Symbol", "Divisor array"); - - -NNVM_REGISTER_OP(_npi_floor_divide_scalar) - .set_num_inputs(1) - .set_num_outputs(1) - .set_attr_parser(ParamParser) - .set_attr("FInferShape", ElemwiseShape<1, 1>) - .set_attr("FInferType", FloorDivideScalarType) - .set_attr("FInplaceOption", - [](const NodeAttrs& attrs) { - return std::vector >{{0, 0}}; - }) - .set_attr("FCompute", - FloorDivideScalarCompute) - .set_attr("FGradient", MakeZeroGradNodes) - .add_argument("data", "NDArray-or-Symbol", "source input") - .add_arguments(NumpyBinaryScalarParam::__FIELDS__()); - -NNVM_REGISTER_OP(_npi_rfloor_divide_scalar) - .set_num_inputs(1) - .set_num_outputs(1) - .set_attr_parser(ParamParser) - .set_attr("FInferShape", ElemwiseShape<1, 1>) - .set_attr("FInferType", FloorDivideType) - .set_attr("FInplaceOption", - [](const NodeAttrs& attrs) { - return std::vector >{{0, 0}}; - }) -#ifdef _WIN32 - .set_attr("FResourceRequest", - [](const NodeAttrs& attrs) { - return std::vector{ResourceRequest::kTempSpace}; - }) -#endif - .set_attr("FCompute", FloorDivideScalarCompute) - .set_attr("FGradient", MakeZeroGradNodes) - .add_argument("data", "NDArray-or-Symbol", "source input") - .add_arguments(NumpyBinaryScalarParam::__FIELDS__()); +MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_floor_divide) +.set_attr( + "FCompute", + NumpyBinaryBroadcastComputeWithBool) +.set_attr("FGradient", MakeZeroGradNodes); } // namespace op } // namespace mxnet diff --git a/src/operator/numpy/np_floor_divide.cu b/src/operator/numpy/np_floor_divide.cu index b6d01b706460..0ce87a089f19 100644 --- a/src/operator/numpy/np_floor_divide.cu +++ b/src/operator/numpy/np_floor_divide.cu @@ -28,13 +28,7 @@ namespace mxnet { namespace op { NNVM_REGISTER_OP(_npi_floor_divide) - .set_attr("FCompute", FloorDivideBroadcastCompute); - -NNVM_REGISTER_OP(_npi_floor_divide_scalar) - .set_attr("FCompute", FloorDivideScalarCompute); - -NNVM_REGISTER_OP(_npi_rfloor_divide_scalar) - .set_attr("FCompute", FloorDivideScalarCompute); +.set_attr("FCompute", BinaryBroadcastRTCCompute{"floor_divide"}); } // namespace op } // namespace mxnet diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 488f1a80285d..750035d3f1f9 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -3069,6 +3069,8 @@ def forward(self, a, b, *args, **kwargs): [lambda y, x1, x2: onp.broadcast_to(x1, y.shape)]), 'divide': (0.1, 1.0, [lambda y, x1, x2: onp.ones(y.shape) / x2], [lambda y, x1, x2: -x1 / (x2 * x2)]), + 'floor_divide': (0.1, 1.0, [lambda y, x1, x2: onp.zeros(y.shape)], + [lambda y, x1, x2: onp.zeros(y.shape)]), 'mod': (1.0, 10.0, [lambda y, x1, x2: onp.ones(y.shape), lambda y, x1, x2: onp.zeros(y.shape)], From a56a1d1ee455d1c534c00efc1b65ae7554f66c5b Mon Sep 17 00:00:00 2001 From: barry-jin Date: Sun, 17 Oct 2021 22:12:29 -0700 Subject: [PATCH 05/19] fix lint --- src/operator/mshadow_op.h | 15 ++++++++------- src/operator/numpy/np_floor_divide.cc | 11 ++++++----- src/operator/numpy/np_floor_divide.cu | 2 +- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index 3397862435f8..32a846419989 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -261,7 +261,7 @@ struct mixed_floor_divide { template ::value, int>::type = 0> MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { mshadow::half::half_t a_half = static_cast(a); - mshadow::half::half_t c = static_cast(::floor(a_half / b)); + mshadow::half::half_t c = static_cast(::floor(a_half / b)); if ((c * b != a_half) && (a < 0) != (b < 0)) { return mshadow::half::half_t(c - 1); } else { @@ -275,7 +275,7 @@ struct mixed_floor_divide { int>::type = 0> MSHADOW_XINLINE static float Map(DType a, float b) { float a_float = static_cast(a); - float c = ::floorf(a_float / b); + float c = ::floorf(a_float / b); if ((c * b != a_float) && (a < 0) != (b < 0)) { return c - 1.0f; } else { @@ -287,9 +287,10 @@ struct mixed_floor_divide { typename std::enable_if::value || std::is_same::value || std::is_integral::value, - int>::type = 0> MSHADOW_XINLINE static double Map(DType a, double b) { + int>::type = 0> + MSHADOW_XINLINE static double Map(DType a, double b) { double a_double = static_cast(a); - double c = ::floor(a_double / b); + double c = ::floor(a_double / b); if ((c * b != a_double) && (a < 0) != (b < 0)) { return c - 1.0; } else { @@ -302,7 +303,7 @@ struct mixed_rfloor_divide { template ::value, int>::type = 0> MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { mshadow::half::half_t a_half = static_cast(a); - mshadow::half::half_t c = static_cast(::floor(b / a_half)); + mshadow::half::half_t c = static_cast(::floor(b / a_half)); if ((c * a_half != b) && (a < 0) != (b < 0)) { return mshadow::half::half_t(c - 1); } else { @@ -316,7 +317,7 @@ struct mixed_rfloor_divide { int>::type = 0> MSHADOW_XINLINE static float Map(DType a, float b) { float a_float = static_cast(a); - float c = ::floorf(b / a_float); + float c = ::floorf(b / a_float); if ((c * a_float != b) && (a < 0) != (b < 0)) { return c - 1.0f; } else { @@ -331,7 +332,7 @@ struct mixed_rfloor_divide { int>::type = 0> MSHADOW_XINLINE static double Map(DType a, double b) { double a_double = static_cast(a); - double c = ::floor(b / a_double); + double c = ::floor(b / a_double); if ((c * a_double != b) && (a < 0) != (b < 0)) { return c - 1.0; } else { diff --git a/src/operator/numpy/np_floor_divide.cc b/src/operator/numpy/np_floor_divide.cc index df00f8483110..78f6cf58ec7a 100644 --- a/src/operator/numpy/np_floor_divide.cc +++ b/src/operator/numpy/np_floor_divide.cc @@ -28,11 +28,12 @@ namespace mxnet { namespace op { MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_floor_divide) -.set_attr( - "FCompute", - NumpyBinaryBroadcastComputeWithBool) -.set_attr("FGradient", MakeZeroGradNodes); + .set_attr("FCompute", + NumpyBinaryBroadcastComputeWithBool) + .set_attr("FGradient", MakeZeroGradNodes); } // namespace op } // namespace mxnet diff --git a/src/operator/numpy/np_floor_divide.cu b/src/operator/numpy/np_floor_divide.cu index 0ce87a089f19..81596d8d21f2 100644 --- a/src/operator/numpy/np_floor_divide.cu +++ b/src/operator/numpy/np_floor_divide.cu @@ -28,7 +28,7 @@ namespace mxnet { namespace op { NNVM_REGISTER_OP(_npi_floor_divide) -.set_attr("FCompute", BinaryBroadcastRTCCompute{"floor_divide"}); + .set_attr("FCompute", BinaryBroadcastRTCCompute{"floor_divide"}); } // namespace op } // namespace mxnet From 61546b2dea27c175e3f3da5b4d055e9677b694b3 Mon Sep 17 00:00:00 2001 From: barry-jin Date: Mon, 18 Oct 2021 09:47:41 -0700 Subject: [PATCH 06/19] update operator_tune.cc --- src/operator/operator_tune.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/operator/operator_tune.cc b/src/operator/operator_tune.cc index f83885da1ba1..ddb5eebea63a 100644 --- a/src/operator/operator_tune.cc +++ b/src/operator/operator_tune.cc @@ -362,17 +362,21 @@ IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::plus); IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::minus); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::mul); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::div); // NOLINT() +IMPLEMENT_BINARY_WORKLOAD_FWD_WITH_BOOL(mxnet::op::mshadow_op::floor_divide); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::true_divide); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::minus_sign); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rminus); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rdiv); // NOLINT() +IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rfloor_divide); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::plus); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::minus); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::mul); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::div); // NOLINT() +IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::floor_divide); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::minus_sign); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::rminus); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rdiv); // NOLINT() +IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rfloor_divide); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::rtrue_divide); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_FWD(mxnet::op::mshadow_op::div_grad); // NOLINT() IMPLEMENT_BINARY_WORKLOAD_BWD(mxnet::op::mshadow_op::div_grad); // NOLINT() From 09e92d61ea0276644548e4b96f656fc57fc6cd29 Mon Sep 17 00:00:00 2001 From: barry-jin Date: Tue, 19 Oct 2021 16:00:31 -0700 Subject: [PATCH 07/19] fix --- src/operator/mshadow_op.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index 32a846419989..e23fe728ba8c 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -237,7 +237,7 @@ struct floor_divide : public mxnet_op::tunable { template MSHADOW_XINLINE static DType Map(DType a, DType b) { DType c = static_cast(::floor(a / b)); - if ((c * b != a) && (a < 0) != (b < 0)) { + if ((c * b != a) && ((a < 0) != (b < 0))) { return DType(c - 1); } else { return c; @@ -249,7 +249,7 @@ struct rfloor_divide : public mxnet_op::tunable { template MSHADOW_XINLINE static DType Map(DType a, DType b) { DType c = static_cast(::floor(b / a)); - if ((c * a != b) && (a < 0) != (b < 0)) { + if ((c * a != b) && ((a < 0) != (b < 0))) { return DType(c - 1); } else { return c; @@ -262,7 +262,7 @@ struct mixed_floor_divide { MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { mshadow::half::half_t a_half = static_cast(a); mshadow::half::half_t c = static_cast(::floor(a_half / b)); - if ((c * b != a_half) && (a < 0) != (b < 0)) { + if ((c * b != a_half) && ((a < 0) != (b < 0))) { return mshadow::half::half_t(c - 1); } else { return c; @@ -276,7 +276,7 @@ struct mixed_floor_divide { MSHADOW_XINLINE static float Map(DType a, float b) { float a_float = static_cast(a); float c = ::floorf(a_float / b); - if ((c * b != a_float) && (a < 0) != (b < 0)) { + if ((c * b != a_float) && ((a < 0) != (b < 0))) { return c - 1.0f; } else { return c; @@ -291,7 +291,7 @@ struct mixed_floor_divide { MSHADOW_XINLINE static double Map(DType a, double b) { double a_double = static_cast(a); double c = ::floor(a_double / b); - if ((c * b != a_double) && (a < 0) != (b < 0)) { + if ((c * b != a_double) && ((a < 0) != (b < 0))) { return c - 1.0; } else { return c; @@ -304,7 +304,7 @@ struct mixed_rfloor_divide { MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { mshadow::half::half_t a_half = static_cast(a); mshadow::half::half_t c = static_cast(::floor(b / a_half)); - if ((c * a_half != b) && (a < 0) != (b < 0)) { + if ((c * a_half != b) && ((a < 0) != (b < 0))) { return mshadow::half::half_t(c - 1); } else { return c; @@ -318,7 +318,7 @@ struct mixed_rfloor_divide { MSHADOW_XINLINE static float Map(DType a, float b) { float a_float = static_cast(a); float c = ::floorf(b / a_float); - if ((c * a_float != b) && (a < 0) != (b < 0)) { + if ((c * a_float != b) && ((a < 0) != (b < 0))) { return c - 1.0f; } else { return c; @@ -333,7 +333,7 @@ struct mixed_rfloor_divide { MSHADOW_XINLINE static double Map(DType a, double b) { double a_double = static_cast(a); double c = ::floor(b / a_double); - if ((c * a_double != b) && (a < 0) != (b < 0)) { + if ((c * a_double != b) && ((a < 0) != (b < 0))) { return c - 1.0; } else { return c; From ff6100b2e3dc557b435f034c9b4c7858855f1525 Mon Sep 17 00:00:00 2001 From: barry-jin Date: Wed, 20 Oct 2021 15:28:05 -0700 Subject: [PATCH 08/19] fix lint --- src/operator/numpy/np_elemwise_broadcast_op_scalar.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc index 366a8efcf123..4fd1f2c84070 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc +++ b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc @@ -62,12 +62,13 @@ MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rpower_scalar) .set_attr("FGradient", ElemwiseGradUseOut{"_backward_rpower_scalar"}); MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_floor_divide_scalar) -.set_attr("FCompute", BinaryScalarOp::Compute) -.set_attr("FGradient", MakeZeroGradNodes); + .set_attr("FCompute", BinaryScalarOp::Compute) + .set_attr("FGradient", MakeZeroGradNodes); MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rfloor_divide_scalar) -.set_attr("FCompute", BinaryScalarOp::Compute) -.set_attr("FGradient", MakeZeroGradNodes); + .set_attr("FCompute", + BinaryScalarOp::Compute) + .set_attr("FGradient", MakeZeroGradNodes); } // namespace op } // namespace mxnet From 0d56b5abb36d4c68f81b60b0570f2a6fd3c501dd Mon Sep 17 00:00:00 2001 From: barry-jin Date: Wed, 20 Oct 2021 16:25:22 -0700 Subject: [PATCH 09/19] fix build --- src/operator/mshadow_op.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index a6df1466879c..2e83d73ecb3b 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -272,7 +272,7 @@ struct mixed_floor_divide { MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { mshadow::half::half_t a_half = static_cast(a); mshadow::half::half_t c = static_cast(::floor(a_half / b)); - if ((c * b != a_half) && ((a < 0) != (b < 0))) { + if ((c * b != a_half) && ((a_half < 0) != (b < 0))) { return mshadow::half::half_t(c - 1); } else { return c; @@ -286,7 +286,7 @@ struct mixed_floor_divide { MSHADOW_XINLINE static float Map(DType a, float b) { float a_float = static_cast(a); float c = ::floorf(a_float / b); - if ((c * b != a_float) && ((a < 0) != (b < 0))) { + if ((c * b != a_float) && ((a_float < 0) != (b < 0))) { return c - 1.0f; } else { return c; @@ -301,7 +301,7 @@ struct mixed_floor_divide { MSHADOW_XINLINE static double Map(DType a, double b) { double a_double = static_cast(a); double c = ::floor(a_double / b); - if ((c * b != a_double) && ((a < 0) != (b < 0))) { + if ((c * b != a_double) && ((a_double < 0) != (b < 0))) { return c - 1.0; } else { return c; @@ -314,7 +314,7 @@ struct mixed_rfloor_divide { MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) { mshadow::half::half_t a_half = static_cast(a); mshadow::half::half_t c = static_cast(::floor(b / a_half)); - if ((c * a_half != b) && ((a < 0) != (b < 0))) { + if ((c * a_half != b) && ((a_half < 0) != (b < 0))) { return mshadow::half::half_t(c - 1); } else { return c; @@ -328,7 +328,7 @@ struct mixed_rfloor_divide { MSHADOW_XINLINE static float Map(DType a, float b) { float a_float = static_cast(a); float c = ::floorf(b / a_float); - if ((c * a_float != b) && ((a < 0) != (b < 0))) { + if ((c * a_float != b) && ((a_float < 0) != (b < 0))) { return c - 1.0f; } else { return c; @@ -343,7 +343,7 @@ struct mixed_rfloor_divide { MSHADOW_XINLINE static double Map(DType a, double b) { double a_double = static_cast(a); double c = ::floor(b / a_double); - if ((c * a_double != b) && ((a < 0) != (b < 0))) { + if ((c * a_double != b) && ((a_double < 0) != (b < 0))) { return c - 1.0; } else { return c; From 0b77f03c6450682f64f30b7826f5de6ca4bb6861 Mon Sep 17 00:00:00 2001 From: barry-jin Date: Thu, 21 Oct 2021 10:29:39 -0700 Subject: [PATCH 10/19] fix include --- src/operator/numpy/np_floor_divide.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/operator/numpy/np_floor_divide.cu b/src/operator/numpy/np_floor_divide.cu index 81596d8d21f2..54fbd9d36642 100644 --- a/src/operator/numpy/np_floor_divide.cu +++ b/src/operator/numpy/np_floor_divide.cu @@ -22,7 +22,7 @@ * \brief GPU Implementation of floor_divide operator. */ -#include "./np_floor_divide-inl.h" +#include "./np_elemwise_broadcast_op.h" namespace mxnet { namespace op { From 2ff4918c3b33d5e6336c4ba5e2046ed2e97a89be Mon Sep 17 00:00:00 2001 From: barry-jin Date: Thu, 21 Oct 2021 17:48:48 -0700 Subject: [PATCH 11/19] fix rtc functions --- src/common/cuda/rtc/forward_functions-inl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/common/cuda/rtc/forward_functions-inl.h b/src/common/cuda/rtc/forward_functions-inl.h index cb93de8c49a6..8b29009520a8 100644 --- a/src/common/cuda/rtc/forward_functions-inl.h +++ b/src/common/cuda/rtc/forward_functions-inl.h @@ -262,7 +262,7 @@ rsub(const DType a, const DType2 b) { template __device__ inline mixed_type floor_divide(const DType a, const DType2 b) { - mixed_type c = op::floor(op::div(a, b)); + mixed_type c = ::floor(a / b); if ((c * b != a) && (a < 0) != (b < 0)) { return mixed_type(c - 1); } else { @@ -273,7 +273,7 @@ floor_divide(const DType a, const DType2 b) { template __device__ inline mixed_type rfloor_divide(const DType a, const DType2 b) { - mixed_type c = op::floor(op::div(b, a)); + mixed_type c = ::floor(b / a); if ((c * a != b) && (a < 0) != (b < 0)) { return mixed_type(c - 1); } else { From 062ae01a019dc924417e664a3bca6f68207babb3 Mon Sep 17 00:00:00 2001 From: barry-jin Date: Thu, 21 Oct 2021 20:28:01 -0700 Subject: [PATCH 12/19] add amp list --- python/mxnet/amp/lists/symbol_fp16.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/mxnet/amp/lists/symbol_fp16.py b/python/mxnet/amp/lists/symbol_fp16.py index b561b335d9a7..ca3955b1d236 100644 --- a/python/mxnet/amp/lists/symbol_fp16.py +++ b/python/mxnet/amp/lists/symbol_fp16.py @@ -265,6 +265,9 @@ '_npi_multinomial', '_npi_multiply', '_npi_multiply_scalar', + '_npi_floor_divide', + '_npi_floor_divide_scalar', + '_npi_rfloor_divide_scalar', '_npi_nan_to_num', '_npi_negative', '_npi_normal', From 3b9a3f071a9929d50dc40221f1c3e573e0f2ab59 Mon Sep 17 00:00:00 2001 From: barry-jin Date: Fri, 22 Oct 2021 13:39:59 -0700 Subject: [PATCH 13/19] add floor_divide in GPU --- src/operator/numpy/np_elemwise_broadcast_op_scalar.cu | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu index 024d02a21d65..e21bac115ab4 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu +++ b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu @@ -51,5 +51,11 @@ NNVM_REGISTER_OP(_npi_power_scalar) NNVM_REGISTER_OP(_npi_rpower_scalar) .set_attr("FCompute", BinaryScalarRTCCompute{"rpow"}); +NNVM_REGISTER_OP(_npi_floor_divide_scalar) +.set_attr("FCompute", BinaryScalarRTCCompute{"floor_divide"}); + +NNVM_REGISTER_OP(_npi_rfloor_divide_scalar) +.set_attr("FCompute", BinaryScalarRTCCompute{"rfloor_divide"}); + } // namespace op } // namespace mxnet From 876c2fb7d73245242cbcb45f2269234eea12597c Mon Sep 17 00:00:00 2001 From: barry-jin Date: Fri, 22 Oct 2021 13:47:12 -0700 Subject: [PATCH 14/19] fix lint --- src/operator/numpy/np_elemwise_broadcast_op_scalar.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu index e21bac115ab4..c7bbeefb4445 100644 --- a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu +++ b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu @@ -52,10 +52,10 @@ NNVM_REGISTER_OP(_npi_rpower_scalar) .set_attr("FCompute", BinaryScalarRTCCompute{"rpow"}); NNVM_REGISTER_OP(_npi_floor_divide_scalar) -.set_attr("FCompute", BinaryScalarRTCCompute{"floor_divide"}); + .set_attr("FCompute", BinaryScalarRTCCompute{"floor_divide"}); NNVM_REGISTER_OP(_npi_rfloor_divide_scalar) -.set_attr("FCompute", BinaryScalarRTCCompute{"rfloor_divide"}); + .set_attr("FCompute", BinaryScalarRTCCompute{"rfloor_divide"}); } // namespace op } // namespace mxnet From 18549fa6c3081f730135bdaedad5c46301fda81a Mon Sep 17 00:00:00 2001 From: barry-jin Date: Mon, 25 Oct 2021 09:24:13 -0700 Subject: [PATCH 15/19] fix docstring --- python/mxnet/ndarray/numpy/_op.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py index 477eb860a29b..22f43e0275eb 100644 --- a/python/mxnet/ndarray/numpy/_op.py +++ b/python/mxnet/ndarray/numpy/_op.py @@ -1200,6 +1200,7 @@ def floor_divide(x1, x2, out=None): * If only one of the inputs is floating number type, the result is that type. * If both inputs are of integer types (including boolean), the output is the more precise type + """ if isinstance(x1, numeric_types) and isinstance(x2, numeric_types): return _np.floor_divide(x1, x2, out=out) From c1b564ffc7ed41c90c04b39730bc43554226a9bd Mon Sep 17 00:00:00 2001 From: barry-jin Date: Mon, 25 Oct 2021 12:56:22 -0700 Subject: [PATCH 16/19] Fix docstring --- python/mxnet/numpy/multiarray.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index c0ccacb11921..0b6a0c02c688 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -3454,6 +3454,7 @@ def true_divide(x1, x2, out=None): @wrap_np_binary_func def floor_divide(x1, x2, out=None): """Return the largest integer smaller or equal to the division of the inputs. + It is equivalent to the Python // operator and pairs with the Python % (remainder), function so that a = a % b + b * (a // b) up to roundoff. @@ -3481,7 +3482,7 @@ def floor_divide(x1, x2, out=None): * If both inputs are of floating number types, the output is the more precise type. * If only one of the inputs is floating number type, the result is that type. * If both inputs are of integer types (including boolean), the output is the more - precise type + precise type Examples -------- From d6c2f65ca7a8e38e1edcaa3ff137a40a81b947c1 Mon Sep 17 00:00:00 2001 From: barry-jin Date: Wed, 27 Oct 2021 14:37:13 -0700 Subject: [PATCH 17/19] fix lint --- src/operator/mshadow_op.h | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index 3d331a3cc112..34f852ddaa02 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -234,9 +234,10 @@ struct rtrue_divide : public mxnet_op::tunable { /***** floor_divide ******/ struct floor_divide : public mxnet_op::tunable { - template ::value && - std::is_integral::value, int>::type = 0> + template < + typename DType, + typename std::enable_if::value && std::is_integral::value, + int>::type = 0> MSHADOW_XINLINE static DType Map(DType a, DType b) { DType c = static_cast(::floor(a / b)); if ((c * a != b) && ((a < 0) != (b < 0))) { @@ -250,9 +251,10 @@ struct floor_divide : public mxnet_op::tunable { return static_cast(::floor(a / b)); } - template ::value && - !std::is_same::value, int>::type = 0> + template < + typename DType, + typename std::enable_if::value && !std::is_same::value, + int>::type = 0> MSHADOW_XINLINE static DType Map(DType a, DType b) { return ::floor(a / b); } @@ -263,9 +265,10 @@ struct floor_divide : public mxnet_op::tunable { }; struct rfloor_divide : public mxnet_op::tunable { - template ::value && - std::is_integral::value, int>::type = 0> + template < + typename DType, + typename std::enable_if::value && std::is_integral::value, + int>::type = 0> MSHADOW_XINLINE static DType Map(DType a, DType b) { DType c = static_cast(::floor(b / a)); if ((c * a != b) && ((a < 0) != (b < 0))) { @@ -279,9 +282,10 @@ struct rfloor_divide : public mxnet_op::tunable { return static_cast(::floor(b / a)); } - template ::value && - !std::is_same::value, int>::type = 0> + template < + typename DType, + typename std::enable_if::value && !std::is_same::value, + int>::type = 0> MSHADOW_XINLINE static DType Map(DType a, DType b) { return ::floor(b / a); } From 8c94120f9b58af77708b8feeb73f3fa1a0fc40ff Mon Sep 17 00:00:00 2001 From: barry-jin Date: Wed, 27 Oct 2021 16:47:58 -0700 Subject: [PATCH 18/19] update rtc --- src/common/cuda/rtc/forward_functions-inl.h | 26 ++++++++++++++------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/common/cuda/rtc/forward_functions-inl.h b/src/common/cuda/rtc/forward_functions-inl.h index 76fa4dc0234a..cf82e2c70fce 100644 --- a/src/common/cuda/rtc/forward_functions-inl.h +++ b/src/common/cuda/rtc/forward_functions-inl.h @@ -262,22 +262,32 @@ rsub(const DType a, const DType2 b) { template __device__ inline mixed_type floor_divide(const DType a, const DType2 b) { - mixed_type c = ::floor(a / b); - if ((c * b != a) && (a < 0) != (b < 0)) { - return mixed_type(c - 1); + if (type_util::is_integral::value && + type_util::is_integral::value) { + mixed_type c = ::floor(a / b); + if ((c * b != a) && (a < 0) != (b < 0)) { + return mixed_type(c - 1); + } else { + return c; + } } else { - return c; + return ::floor(a / b); } } template __device__ inline mixed_type rfloor_divide(const DType a, const DType2 b) { - mixed_type c = ::floor(b / a); - if ((c * a != b) && (a < 0) != (b < 0)) { - return mixed_type(c - 1); + if (type_util::is_integral::value && + type_util::is_integral::value) { + mixed_type c = ::floor(b / a); + if ((c * a != b) && (a < 0) != (b < 0)) { + return mixed_type(c - 1); + } else { + return c; + } } else { - return c; + return ::floor(b / a); } } From c93df5db5cffa3b5031db7a102e2f2290e4a4e85 Mon Sep 17 00:00:00 2001 From: barry-jin Date: Thu, 28 Oct 2021 11:59:09 -0700 Subject: [PATCH 19/19] fix rtc --- src/common/cuda/rtc/forward_functions-inl.h | 24 +++++-------------- .../unittest/test_numpy_interoperability.py | 1 + 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/src/common/cuda/rtc/forward_functions-inl.h b/src/common/cuda/rtc/forward_functions-inl.h index cf82e2c70fce..2b457092b3c8 100644 --- a/src/common/cuda/rtc/forward_functions-inl.h +++ b/src/common/cuda/rtc/forward_functions-inl.h @@ -262,32 +262,20 @@ rsub(const DType a, const DType2 b) { template __device__ inline mixed_type floor_divide(const DType a, const DType2 b) { - if (type_util::is_integral::value && - type_util::is_integral::value) { - mixed_type c = ::floor(a / b); - if ((c * b != a) && (a < 0) != (b < 0)) { - return mixed_type(c - 1); - } else { - return c; - } + if (type_util::has_double_or_integral::value) { + return ::floor((double)a / (double)b); } else { - return ::floor(a / b); + return ::floorf((float)a / (float)b); } } template __device__ inline mixed_type rfloor_divide(const DType a, const DType2 b) { - if (type_util::is_integral::value && - type_util::is_integral::value) { - mixed_type c = ::floor(b / a); - if ((c * a != b) && (a < 0) != (b < 0)) { - return mixed_type(c - 1); - } else { - return c; - } + if (type_util::has_double_or_integral::value) { + return ::floor((double)b / (double)a); } else { - return ::floor(b / a); + return ::floorf((float)b / (float)a); } } diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 06b8871fc3d5..c8edad6b59f3 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -1594,6 +1594,7 @@ def _add_workload_floor_divide(array_pool): OpArgMngr.add_workload('floor_divide', np.array([-1, -2, -3], np.float32), 1.9999) OpArgMngr.add_workload('floor_divide', np.array([1000, -200, -3], np.int64), 3) OpArgMngr.add_workload('floor_divide', np.array([1, -2, -3, 4, -5], np.int32), 2.0001) + OpArgMngr.add_workload('floor_divide', np.array([1, -50, -0.2, 40000, 0], np.float64), -7) def _add_workload_remainder():