From 84d697f98b423dae47061062d971bd985aceb779 Mon Sep 17 00:00:00 2001 From: Eduardo Ponce Date: Thu, 13 May 2021 23:06:36 -0400 Subject: [PATCH 1/3] rebase with master --- cpp/src/arrow/compute/api_scalar.cc | 1 + cpp/src/arrow/compute/api_scalar.h | 12 ++ .../compute/kernels/scalar_arithmetic.cc | 78 +++++++++++++ .../compute/kernels/scalar_arithmetic_test.cc | 108 +++++++++++++++++- cpp/src/arrow/util/int_util_internal.h | 6 +- docs/source/cpp/compute.rst | 4 + 6 files changed, 204 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index c7c049af980..9f4ad42fecb 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -47,6 +47,7 @@ namespace compute { return CallFunction(func_name, {arg}, ctx); \ } +SCALAR_ARITHMETIC_UNARY(AbsoluteValue, "abs", "abs_checked") SCALAR_ARITHMETIC_UNARY(Negate, "negate", "negate_checked") #define SCALAR_ARITHMETIC_BINARY(NAME, REGISTRY_NAME, REGISTRY_CHECKED_NAME) \ diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 3e390df47e7..3a007e06567 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -160,6 +160,18 @@ struct ARROW_EXPORT ProjectOptions : public FunctionOptions { /// @} +/// \brief Get the absolute value of a value. Array values can be of arbitrary +/// length. If argument is null the result will be null. +/// +/// \param[in] arg the value transformed +/// \param[in] options arithmetic options (overflow handling), optional +/// \param[in] ctx the function execution context, optional +/// \return the elementwise absolute value +ARROW_EXPORT +Result AbsoluteValue(const Datum& arg, + ArithmeticOptions options = ArithmeticOptions(), + ExecContext* ctx = NULLPTR); + /// \brief Add two values together. Array values must be the same length. If /// either addend is null the result will be null. /// diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc index f6f7555ab61..743d2e3fc0e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include "arrow/compute/kernels/common.h" #include "arrow/type_traits.h" @@ -66,6 +67,47 @@ constexpr Unsigned to_unsigned(T signed_) { return static_cast(signed_); } +struct AbsoluteValue { + template + static constexpr enable_if_floating_point Call(KernelContext*, T arg, Status*) { + return std::fabs(arg); + } + + template + static constexpr enable_if_unsigned_integer Call(KernelContext*, T arg, Status*) { + return arg; + } + + template + static constexpr enable_if_signed_integer Call(KernelContext*, T arg, Status* st) { + return (arg < 0) ? arrow::internal::SafeSignedNegate(arg) : arg; + } +}; + +struct AbsoluteValueChecked { + template + static enable_if_signed_integer Call(KernelContext*, Arg arg, Status* st) { + static_assert(std::is_same::value, ""); + if (arg == std::numeric_limits::min()) { + *st = Status::Invalid("overflow"); + return arg; + } + return std::abs(arg); + } + + template + static enable_if_unsigned_integer Call(KernelContext* ctx, Arg arg, Status* st) { + static_assert(std::is_same::value, ""); + return arg; + } + + template + static constexpr enable_if_floating_point Call(KernelContext*, Arg arg, Status* st) { + static_assert(std::is_same::value, ""); + return std::fabs(arg); + } +}; + struct Add { template static constexpr enable_if_floating_point Call(KernelContext*, T left, T right, @@ -446,6 +488,19 @@ std::shared_ptr MakeUnaryArithmeticFunction(std::string name, return func; } +// Like MakeUnaryArithmeticFunction, but for arithmetic ops that need to run +// only on non-null output. +template +std::shared_ptr MakeUnaryArithmeticFunctionNotNull( + std::string name, const FunctionDoc* doc) { + auto func = std::make_shared(name, Arity::Unary(), doc); + for (const auto& ty : NumericTypes()) { + auto exec = ArithmeticExecFromOp(ty); + DCHECK_OK(func->AddKernel({ty}, ty, exec)); + } + return func; +} + // Like MakeUnaryArithmeticFunction, but for signed arithmetic ops that need to run // only on non-null output. template @@ -461,6 +516,19 @@ std::shared_ptr MakeUnarySignedArithmeticFunctionNotNull( return func; } +const FunctionDoc absolute_value_doc{ + "Calculate the absolute value of the argument element-wise", + ("Results will wrap around on integer overflow.\n" + "Use function \"abs_checked\" if you want overflow\n" + "to return an error."), + {"x"}}; + +const FunctionDoc absolute_value_checked_doc{ + "Calculate the absolute value of the argument element-wise", + ("This function returns an error on overflow. For a variant that\n" + "doesn't fail on overflow, use function \"abs\"."), + {"x"}}; + const FunctionDoc add_doc{"Add the arguments element-wise", ("Results will wrap around on integer overflow.\n" "Use function \"add_checked\" if you want overflow\n" @@ -537,6 +605,16 @@ const FunctionDoc pow_checked_doc{ } // namespace void RegisterScalarArithmetic(FunctionRegistry* registry) { + // ---------------------------------------------------------------------- + auto absolute_value = + MakeUnaryArithmeticFunction("abs", &absolute_value_doc); + DCHECK_OK(registry->AddFunction(std::move(absolute_value))); + + // ---------------------------------------------------------------------- + auto absolute_value_checked = MakeUnaryArithmeticFunctionNotNull( + "abs_checked", &absolute_value_checked_doc); + DCHECK_OK(registry->AddFunction(std::move(absolute_value_checked))); + // ---------------------------------------------------------------------- auto add = MakeArithmeticFunction("add", &add_doc); DCHECK_OK(registry->AddFunction(std::move(add))); diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc index fafba4b331b..ff66fcf1d12 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc @@ -933,7 +933,7 @@ TEST(TestBinaryArithmetic, AddWithImplicitCastsUint64EdgeCase) { } TEST(TestUnaryArithmetic, DispatchBest) { - for (std::string name : {"negate"}) { + for (std::string name : {"negate", "abs", "abs_checked"}) { for (const auto& ty : {int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(), uint64(), float32(), float64()}) { CheckDispatchBest(name, {ty}, {ty}); @@ -948,7 +948,7 @@ TEST(TestUnaryArithmetic, DispatchBest) { } } - for (std::string name : {"negate", "negate_checked"}) { + for (std::string name : {"negate", "negate_checked", "abs", "abs_checked"}) { CheckDispatchFails(name, {null()}); } } @@ -1057,5 +1057,109 @@ TYPED_TEST(TestUnaryArithmeticFloating, Negate) { } } +TYPED_TEST(TestUnaryArithmeticSigned, AbsoluteValue) { + using CType = typename TestFixture::CType; + + auto min = std::numeric_limits::min(); + auto max = std::numeric_limits::max(); + + for (auto check_overflow : {false, true}) { + this->SetOverflowCheck(check_overflow); + // Empty array + this->AssertUnaryOp(AbsoluteValue, "[]", "[]"); + // Scalar/arrays with nulls + this->AssertUnaryOp(AbsoluteValue, "[null]", "[null]"); + this->AssertUnaryOp(AbsoluteValue, "[1, null, -10]", "[1, null, 10]"); + this->AssertUnaryOp(AbsoluteValue, this->MakeNullScalar(), this->MakeNullScalar()); + // Scalar/arrays with zeros + this->AssertUnaryOp(AbsoluteValue, "[0, -0]", "[0, 0]"); + this->AssertUnaryOp(AbsoluteValue, -0, 0); + this->AssertUnaryOp(AbsoluteValue, 0, 0); + // Ordinary scalar/arrays (positive inputs) + this->AssertUnaryOp(AbsoluteValue, "[1, 10, 127]", "[1, 10, 127]"); + this->AssertUnaryOp(AbsoluteValue, 1, 1); + this->AssertUnaryOp(AbsoluteValue, this->MakeScalar(1), this->MakeScalar(1)); + // Ordinary scalar/arrays (negative inputs) + this->AssertUnaryOp(AbsoluteValue, "[-1, -10, -127]", "[1, 10, 127]"); + this->AssertUnaryOp(AbsoluteValue, -1, 1); + this->AssertUnaryOp(AbsoluteValue, MakeArray(-1), "[1]"); + // Min/max + this->AssertUnaryOp(AbsoluteValue, max, max); + if (check_overflow) { + this->AssertUnaryOpRaises(AbsoluteValue, MakeArray(min), "overflow"); + } else { + this->AssertUnaryOp(AbsoluteValue, min, min); + } + } + + // Overflow should not be checked on underlying value slots when output would be null + this->SetOverflowCheck(true); + auto arg = ArrayFromJSON(this->type_singleton(), MakeArray(-1, max, min)); + arg = TweakValidityBit(arg, 1, false); + arg = TweakValidityBit(arg, 2, false); + this->AssertUnaryOp(AbsoluteValue, arg, "[1, null, null]"); +} + +TYPED_TEST(TestUnaryArithmeticUnsigned, AbsoluteValue) { + using CType = typename TestFixture::CType; + + auto min = std::numeric_limits::min(); + auto max = std::numeric_limits::max(); + + for (auto check_overflow : {false, true}) { + this->SetOverflowCheck(check_overflow); + // Empty arrays + this->AssertUnaryOp(AbsoluteValue, "[]", "[]"); + // Array with nulls + this->AssertUnaryOp(AbsoluteValue, "[null]", "[null]"); + this->AssertUnaryOp(AbsoluteValue, this->MakeNullScalar(), this->MakeNullScalar()); + // Ordinary arrays + this->AssertUnaryOp(AbsoluteValue, "[0, 1, 10, 127]", "[0, 1, 10, 127]"); + // Min/max + this->AssertUnaryOp(AbsoluteValue, min, min); + this->AssertUnaryOp(AbsoluteValue, max, max); + } +} + +TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) { + using CType = typename TestFixture::CType; + + auto min = std::numeric_limits::lowest(); + auto max = std::numeric_limits::max(); + + for (auto check_overflow : {false, true}) { + this->SetOverflowCheck(check_overflow); + // Empty array + this->AssertUnaryOp(AbsoluteValue, "[]", "[]"); + // Scalar/arrays with nulls + this->AssertUnaryOp(AbsoluteValue, "[null]", "[null]"); + this->AssertUnaryOp(AbsoluteValue, "[1.3, null, -10.80]", "[1.3, null, 10.80]"); + this->AssertUnaryOp(AbsoluteValue, this->MakeNullScalar(), this->MakeNullScalar()); + // Scalars/arrays with zeros + this->AssertUnaryOp(AbsoluteValue, "[0.0, -0.0]", "[0.0, 0.0]"); + this->AssertUnaryOp(AbsoluteValue, -0.0F, 0.0F); + this->AssertUnaryOp(AbsoluteValue, 0.0F, 0.0F); + // Ordinary scalars/arrays (positive inputs) + this->AssertUnaryOp(AbsoluteValue, "[1.3, 10.80, 12748.001]", + "[1.3, 10.80, 12748.001]"); + this->AssertUnaryOp(AbsoluteValue, 1.3F, 1.3F); + this->AssertUnaryOp(AbsoluteValue, this->MakeScalar(1.3F), this->MakeScalar(1.3F)); + // Ordinary scalars/arrays (negative inputs) + this->AssertUnaryOp(AbsoluteValue, "[-1.3, -10.80, -12748.001]", + "[1.3, 10.80, 12748.001]"); + this->AssertUnaryOp(AbsoluteValue, -1.3F, 1.3F); + this->AssertUnaryOp(AbsoluteValue, MakeArray(-1.3F), "[1.3]"); + // Arrays with infinites + this->AssertUnaryOp(AbsoluteValue, "[Inf, -Inf]", "[Inf, Inf]"); + // Arrays with NaNs + this->SetNansEqual(true); + this->AssertUnaryOp(AbsoluteValue, "[NaN]", "[NaN]"); + this->AssertUnaryOp(AbsoluteValue, "[-NaN]", "[NaN]"); + // Min/max + this->AssertUnaryOp(AbsoluteValue, min, max); + this->AssertUnaryOp(AbsoluteValue, max, max); + } +} + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/util/int_util_internal.h b/cpp/src/arrow/util/int_util_internal.h index 162f1d92a2b..c40374e00e6 100644 --- a/cpp/src/arrow/util/int_util_internal.h +++ b/cpp/src/arrow/util/int_util_internal.h @@ -63,9 +63,9 @@ OPS_WITH_OVERFLOW(DivideWithOverflow, div) #undef OP_WITH_OVERFLOW #undef OPS_WITH_OVERFLOW -// Define functions NegateWithOverflow with the signature `bool(T u, T* out)` -// where T is a signed integer type. On overflow, these functions return true. -// Otherwise, false is returned and `out` is updated with the result of the +// Define functions NegateWithOverflow with the signature +// `bool(T u, T* out)` where T is a signed integer type. On overflow, these functions +// return true. Otherwise, false is returned and `out` is updated with the result of the // operation. #define UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, _type, _psnip_type) \ diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 592dc4ec1b0..d34eeee526f 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -260,6 +260,10 @@ an ``Invalid`` :class:`Status` when overflow is detected. +--------------------------+------------+--------------------+---------------------+ | Function name | Arity | Input types | Output type | +==========================+============+====================+=====================+ +| abs | Unary | Numeric | Numeric | ++--------------------------+------------+--------------------+---------------------+ +| abs_checked | Unary | Numeric | Numeric | ++--------------------------+------------+--------------------+---------------------+ | add | Binary | Numeric | Numeric | +--------------------------+------------+--------------------+---------------------+ | add_checked | Binary | Numeric | Numeric | From b519ecbc245e8163616fad1835a73f52efa022c9 Mon Sep 17 00:00:00 2001 From: Eduardo Ponce Date: Thu, 13 May 2021 23:15:39 -0400 Subject: [PATCH 2/3] add compute function to Python doc --- docs/source/python/api/compute.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index da16ccdfa29..56ccb4ae1ef 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -45,6 +45,8 @@ throws an ``ArrowInvalid`` exception when overflow is detected. .. autosummary:: :toctree: ../generated/ + abs + abs_checked add add_checked divide From 4294ba5197416b39c2a3c00d3bf5e7945ac2ebf0 Mon Sep 17 00:00:00 2001 From: Eduardo Ponce Date: Thu, 13 May 2021 23:24:49 -0400 Subject: [PATCH 3/3] fix comment of NegateWithOverflow --- cpp/src/arrow/util/int_util_internal.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/int_util_internal.h b/cpp/src/arrow/util/int_util_internal.h index c40374e00e6..4136706629f 100644 --- a/cpp/src/arrow/util/int_util_internal.h +++ b/cpp/src/arrow/util/int_util_internal.h @@ -63,9 +63,9 @@ OPS_WITH_OVERFLOW(DivideWithOverflow, div) #undef OP_WITH_OVERFLOW #undef OPS_WITH_OVERFLOW -// Define functions NegateWithOverflow with the signature -// `bool(T u, T* out)` where T is a signed integer type. On overflow, these functions -// return true. Otherwise, false is returned and `out` is updated with the result of the +// Define function NegateWithOverflow with the signature `bool(T u, T* out)` +// where T is a signed integer type. On overflow, these functions return true. +// Otherwise, false is returned and `out` is updated with the result of the // operation. #define UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, _type, _psnip_type) \