From e85cb4f40cbc16e43b9c6c466da52e07b7098e60 Mon Sep 17 00:00:00 2001 From: Eduardo Ponce Date: Thu, 9 Sep 2021 05:14:36 -0400 Subject: [PATCH 01/10] testing waters with StructScalar and arithmetic infrastructure --- cpp/src/arrow/compute/api_scalar.cc | 5 +-- cpp/src/arrow/compute/api_scalar.h | 17 +++++++-- .../kernels/base_arithmetic_internal.h | 36 +++++++++++++++++++ .../compute/kernels/scalar_arithmetic.cc | 28 +++++++++++++-- 4 files changed, 79 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 3bdff691778..e044b6bcb41 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -672,13 +672,14 @@ Result RoundToMultiple(const Datum& arg, RoundToMultipleOptions options, } SCALAR_ARITHMETIC_BINARY(Add, "add", "add_checked") +SCALAR_ARITHMETIC_BINARY(Subtract, "subtract", "subtract_checked") +SCALAR_ARITHMETIC_BINARY(Multiply, "multiply", "multiply_checked") SCALAR_ARITHMETIC_BINARY(Divide, "divide", "divide_checked") +SCALAR_ARITHMETIC_BINARY(Divmod, "divmod", "divmod_checked") SCALAR_ARITHMETIC_BINARY(Logb, "logb", "logb_checked") -SCALAR_ARITHMETIC_BINARY(Multiply, "multiply", "multiply_checked") SCALAR_ARITHMETIC_BINARY(Power, "power", "power_checked") SCALAR_ARITHMETIC_BINARY(ShiftLeft, "shift_left", "shift_left_checked") SCALAR_ARITHMETIC_BINARY(ShiftRight, "shift_right", "shift_right_checked") -SCALAR_ARITHMETIC_BINARY(Subtract, "subtract", "subtract_checked") SCALAR_EAGER_BINARY(Atan2, "atan2") SCALAR_EAGER_UNARY(Floor, "floor") SCALAR_EAGER_UNARY(Ceil, "ceil") diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 7d86a555ec8..cbb187df1ce 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -571,8 +571,7 @@ Result Multiply(const Datum& left, const Datum& right, ExecContext* ctx = NULLPTR); /// \brief Divide two values. Array values must be the same length. If either -/// argument is null the result will be null. For integer types, if there is -/// a zero divisor, an error will be raised. +/// argument is null the result will be null. If divisor is zero, an error will be raised. /// /// \param[in] left the dividend /// \param[in] right the divisor @@ -584,6 +583,20 @@ Result Divide(const Datum& left, const Datum& right, ArithmeticOptions options = ArithmeticOptions(), ExecContext* ctx = NULLPTR); +/// \brief Calculate the quotient and remainder between two values. Array values must be +/// the same length. If either argument is null the result will be null. If divisor is +/// zero, an error will be raised. +/// +/// \param[in] left the dividend +/// \param[in] right the divisor +/// \param[in] options arithmetic options (enable/disable overflow checking), optional +/// \param[in] ctx the function execution context, optional +/// \return the elementwise quotient and remainder +ARROW_EXPORT +Result Divmod(const Datum& left, const Datum& right, + ArithmeticOptions options = ArithmeticOptions(), + ExecContext* ctx = NULLPTR); + /// \brief Negate values. /// /// If argument is null the result will be null. diff --git a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h index f416881ccb8..95fff62a430 100644 --- a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h +++ b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h @@ -425,6 +425,42 @@ struct DivideChecked { } }; +template +const std::shared_ptr& DivmodType() { + // TODO(edponce): Need to find a way of mapping T to DataType. + static auto type = struct_({field("quotient", int64()), field("remainder", int64())}); + return type; +} + +struct Divmod { + template + static enable_if_floating_point Call(KernelContext*, Arg0 left, + Arg1 right, Status*) { + auto quotient = std::floor(left / right); + auto remainder = std::fmod(left, right); + ScalarVector values = {std::make_shared(quotient), std::make_shared(remainder)}; + return StructScalar(std::move(values), DivmodType()); + } + + template + static enable_if_c_integer Call(KernelContext*, Arg0 left, Arg1 right, + Status* st) { + T quotient; + T remainder = 0; + if (ARROW_PREDICT_FALSE(DivideWithOverflow(left, right, "ient))) { + if (right == 0) { + *st = Status::Invalid("divide by zero"); + } else { + quotient = 0; + } + } else { + remainder = left % right; + } + ScalarVector values = {std::make_shared(quotient), std::make_shared(remainder)}; + return StructScalar(std::move(values), DivmodType()); + } +}; + struct Negate { template static constexpr enable_if_floating_value Call(KernelContext*, Arg arg, Status*) { diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc index 984c3b56538..d189518b8c0 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -79,9 +80,6 @@ bool IsPositive(const Scalar& scalar) { return visitor.result; } -// N.B. take care not to conflict with type_traits.h as that can cause surprises in a -// unity build - // Bitwise operations struct BitWiseNot { @@ -1373,6 +1371,17 @@ std::shared_ptr MakeArithmeticFunctionNotNull(std::string name, return func; } +template +std::shared_ptr MakeArithmeticFunctionNotNullStructOutType( + std::string name, const FunctionDoc* doc, std::shared_ptr out_type) { + auto func = std::make_shared(name, Arity::Binary(), doc); + for (const auto& ty : NumericTypes()) { + auto exec = ArithmeticExecFromOp(ty); + DCHECK_OK(func->AddKernel({ty, ty}, out_type, exec)); + } + return func; +} + template std::shared_ptr MakeUnaryArithmeticFunction(std::string name, FunctionDoc doc) { @@ -1699,6 +1708,14 @@ const FunctionDoc div_checked_doc{ "integer overflow is encountered."), {"dividend", "divisor"}}; +const FunctionDoc divmod_doc{ + "Calculate the quotient and remainder", + ("Integer division by zero returns an error. However, integer overflow\n" + "wraps around, and floating-point division by zero returns an infinite.\n" + "Use function \"divmod_checked\" if you want to get an error\n" + "in all the aforementioned cases."), + {"dividend", "divisor"}}; + const FunctionDoc negate_doc{"Negate the argument element-wise", ("Results will wrap around on integer overflow.\n" "Use function \"negate_checked\" if you want overflow\n" @@ -2201,6 +2218,11 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunction(std::move(divide_checked))); + // ---------------------------------------------------------------------- + auto divmod = MakeArithmeticFunctionNotNullStructOutType("divmod", divmod_doc, + DivmodType()); + DCHECK_OK(registry->AddFunction(std::move(divmod))); + // ---------------------------------------------------------------------- auto negate = MakeUnaryArithmeticFunction("negate", negate_doc); AddDecimalUnaryKernels(negate.get()); From 68554d820ee3fa1b4f6f4f2d3ede6bef85acbb4f Mon Sep 17 00:00:00 2001 From: Eduardo Ponce Date: Tue, 21 Sep 2021 05:10:10 -0400 Subject: [PATCH 02/10] update placeholder kernels and add visitor support for StructScalar --- .../kernels/base_arithmetic_internal.h | 60 +++++++++++-------- .../arrow/compute/kernels/codegen_internal.h | 49 ++++++++++++++- .../compute/kernels/scalar_arithmetic.cc | 51 ++++++++++++++-- 3 files changed, 129 insertions(+), 31 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h index 95fff62a430..625e00e3030 100644 --- a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h +++ b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h @@ -425,39 +425,49 @@ struct DivideChecked { } }; -template -const std::shared_ptr& DivmodType() { - // TODO(edponce): Need to find a way of mapping T to DataType. - static auto type = struct_({field("quotient", int64()), field("remainder", int64())}); - return type; +std::shared_ptr DivmodType(const std::shared_ptr& ty) { + std::vector> fields{field("quotient", ty), + field("remainder", ty)}; + return struct_(fields); } struct Divmod { + // TODO(edponce): mod() should support integer types too. template - static enable_if_floating_point Call(KernelContext*, Arg0 left, - Arg1 right, Status*) { + static enable_if_floating_point Call(KernelContext*, Arg0 left, + Arg1 right, Status*) { auto quotient = std::floor(left / right); auto remainder = std::fmod(left, right); - ScalarVector values = {std::make_shared(quotient), std::make_shared(remainder)}; - return StructScalar(std::move(values), DivmodType()); + ScalarVector values = {MakeScalar(quotient), MakeScalar(remainder)}; + // TODO(edponce): How to only call DivmodType once (before processing)? + // This is a static method so there is not state. + auto ty = CTypeTraits::type_singleton(); + return StructScalar(std::move(values), DivmodType(ty)); } +}; - template - static enable_if_c_integer Call(KernelContext*, Arg0 left, Arg1 right, - Status* st) { - T quotient; - T remainder = 0; - if (ARROW_PREDICT_FALSE(DivideWithOverflow(left, right, "ient))) { - if (right == 0) { - *st = Status::Invalid("divide by zero"); - } else { - quotient = 0; - } - } else { - remainder = left % right; - } - ScalarVector values = {std::make_shared(quotient), std::make_shared(remainder)}; - return StructScalar(std::move(values), DivmodType()); +struct DivmodChecked { + template + static enable_if_floating_point Call(KernelContext*, Arg0 left, + Arg1 right, Status* st) { + // Arg0 quotient; + // Arg0 remainder = 0; + // if (ARROW_PREDICT_FALSE(DivideWithOverflow(left, right, "ient))) { + // if (right == 0) { + // *st = Status::Invalid("divide by zero"); + // } else { + // quotient = 0; + // } + // } else { + // remainder = left % right; + // } + auto quotient = std::floor(left / right); + auto remainder = std::fmod(left, right); + ScalarVector values = {MakeScalar(quotient), MakeScalar(remainder)}; + // TODO(edponce): How to only call DivmodType once (before processing)? + // This is a static method so there is not state. + auto ty = CTypeTraits::type_singleton(); + return StructScalar(std::move(values), DivmodType(ty)); } }; diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h index a6ede14176c..0a8cbb4bd3c 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.h +++ b/cpp/src/arrow/compute/kernels/codegen_internal.h @@ -189,6 +189,11 @@ struct GetOutputType { using T = Decimal256; }; +template <> +struct GetOutputType { + using T = StructScalar; +}; + // ---------------------------------------------------------------------- // enable_if helpers for C types @@ -322,6 +327,24 @@ struct OutputArrayWriter> { } }; +template +struct OutputArrayWriter> { + using T = typename TypeTraits::ScalarType; + T* values; + + explicit OutputArrayWriter(ArrayData* data) : values(data->GetMutableValues(1)) {} + + void Write(T value) { *values++ = value; } + + // Note that this doesn't write the null bitmap, which should be consistent + // with Write / WriteNull calls + void WriteNull() { *values++ = T(null()); } + + void WriteAllNull(int64_t length) { + std::memset(static_cast(values), 0, sizeof(T) * length); + } +}; + // (Un)box Scalar to / from C++ value template @@ -400,7 +423,16 @@ struct BoxScalar { static void Box(T val, Scalar* out) { checked_cast(out)->value = val; } }; -// A VisitArraySpanInline variant that calls its visitor function with logical +template <> +struct BoxScalar { + using T = StructScalar; + using ScalarType = T; + static void Box(T val, Scalar* out) { + checked_cast(out)->value = std::move(val.value); + } +}; + +// A VisitArrayDataInline variant that calls its visitor function with logical // values, such as Decimal128 rather than util::string_view. template @@ -555,6 +587,21 @@ struct OutputAdapter> { } }; +template +struct OutputAdapter> { + using T = typename TypeTraits::ScalarType; + + template + static Status Write(KernelContext*, Datum* out, Generator&& generator) { + ArrayData* out_arr = out->mutable_array(); + auto out_data = out_arr->GetMutableValues(1); + for (int64_t i = 0; i < out_arr->length; ++i) { + *out_data++ = generator(); + } + return Status::OK(); + } +}; + // A kernel exec generator for unary functions that addresses both array and // scalar inputs and dispatches input iteration and output writing to other // templates diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc index d189518b8c0..c23cc5b0af6 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc @@ -1061,6 +1061,19 @@ ArrayKernelExec GenerateArithmeticFloatingPoint(detail::GetTypeId get_id) { } } +template