diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h index 306d861b09f..d4725dc851f 100644 --- a/cpp/src/arrow/array/builder_nested.h +++ b/cpp/src/arrow/array/builder_nested.h @@ -371,14 +371,14 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder { /// \brief Builder class for fixed-length list array value types class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder { public: - /// Use this constructor to define the built array's type explicitly. If value_builder - /// has indeterminate type, this builder will also. + /// Use this constructor to infer the built array's type. If value_builder has + /// indeterminate type, this builder will also infer it. FixedSizeListBuilder(MemoryPool* pool, std::shared_ptr const& value_builder, int32_t list_size); - /// Use this constructor to infer the built array's type. If value_builder has - /// indeterminate type, this builder will also. + /// Use this constructor to define the built array's type explicitly. If value_builder + /// has indeterminate type, this builder will also infer it. FixedSizeListBuilder(MemoryPool* pool, std::shared_ptr const& value_builder, const std::shared_ptr& type); @@ -401,7 +401,7 @@ class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder { /// \brief Vector append /// - /// If passed, valid_bytes wil be read and any zero byte + /// If passed, valid_bytes will be read and any zero byte /// will cause the corresponding slot to be null /// /// This function affects only the validity bitmap; the child values must be appended diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc index 3bdff691778..d9d62e06240 100644 --- a/cpp/src/arrow/compute/api_scalar.cc +++ b/cpp/src/arrow/compute/api_scalar.cc @@ -648,9 +648,9 @@ SCALAR_ARITHMETIC_UNARY(Ln, "ln", "ln_checked") SCALAR_ARITHMETIC_UNARY(Log10, "log10", "log10_checked") SCALAR_ARITHMETIC_UNARY(Log1p, "log1p", "log1p_checked") SCALAR_ARITHMETIC_UNARY(Log2, "log2", "log2_checked") -SCALAR_ARITHMETIC_UNARY(Sqrt, "sqrt", "sqrt_checked") SCALAR_ARITHMETIC_UNARY(Negate, "negate", "negate_checked") SCALAR_ARITHMETIC_UNARY(Sin, "sin", "sin_checked") +SCALAR_ARITHMETIC_UNARY(Sqrt, "sqrt", "sqrt_checked") SCALAR_ARITHMETIC_UNARY(Tan, "tan", "tan_checked") SCALAR_EAGER_UNARY(Atan, "atan") SCALAR_EAGER_UNARY(Sign, "sign") @@ -673,6 +673,7 @@ Result RoundToMultiple(const Datum& arg, RoundToMultipleOptions options, SCALAR_ARITHMETIC_BINARY(Add, "add", "add_checked") SCALAR_ARITHMETIC_BINARY(Divide, "divide", "divide_checked") +SCALAR_ARITHMETIC_BINARY(Divmod, "divmod", "divmod_checked") SCALAR_ARITHMETIC_BINARY(Logb, "logb", "logb_checked") SCALAR_ARITHMETIC_BINARY(Multiply, "multiply", "multiply_checked") SCALAR_ARITHMETIC_BINARY(Power, "power", "power_checked") @@ -680,8 +681,8 @@ SCALAR_ARITHMETIC_BINARY(ShiftLeft, "shift_left", "shift_left_checked") SCALAR_ARITHMETIC_BINARY(ShiftRight, "shift_right", "shift_right_checked") SCALAR_ARITHMETIC_BINARY(Subtract, "subtract", "subtract_checked") SCALAR_EAGER_BINARY(Atan2, "atan2") -SCALAR_EAGER_UNARY(Floor, "floor") SCALAR_EAGER_UNARY(Ceil, "ceil") +SCALAR_EAGER_UNARY(Floor, "floor") SCALAR_EAGER_UNARY(Trunc, "trunc") Result MaxElementWise(const std::vector& args, diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h index 7d86a555ec8..5b4f6f83168 100644 --- a/cpp/src/arrow/compute/api_scalar.h +++ b/cpp/src/arrow/compute/api_scalar.h @@ -584,6 +584,22 @@ Result Divide(const Datum& left, const Datum& right, ArithmeticOptions options = ArithmeticOptions(), ExecContext* ctx = NULLPTR); +/// \brief Calculate the quotient and remainder between two values. +/// +/// Array values must be the same length. If either argument is null, +/// then the result will be null. If divisor is zero, an error will be raised. +/// +/// \param[in] dividend the dividend +/// \param[in] divisor the divisor +/// \param[in] options arithmetic options (enable/disable overflow checking), optional +/// \param[in] ctx the function execution context, optional +/// \return the elementwise quotient and remainder as an array of +/// struct +ARROW_EXPORT +Result Divmod(const Datum& dividend, const Datum& divisor, + ArithmeticOptions options = ArithmeticOptions(), + ExecContext* ctx = NULLPTR); + /// \brief Negate values. /// /// If argument is null the result will be null. diff --git a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h index f416881ccb8..d1900150dd2 100644 --- a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h +++ b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h @@ -383,7 +383,7 @@ struct Divide { static enable_if_decimal_value Call(KernelContext*, Arg0 left, Arg1 right, Status* st) { if (right == Arg1()) { - *st = Status::Invalid("Divide by zero"); + *st = Status::Invalid("divide by zero"); return T(); } else { return left / right; diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h index a6ede14176c..9515302666a 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.h +++ b/cpp/src/arrow/compute/kernels/codegen_internal.h @@ -189,6 +189,11 @@ struct GetOutputType { using T = Decimal256; }; +template <> +struct GetOutputType { + using T = StructScalar; +}; + // ---------------------------------------------------------------------- // enable_if helpers for C types @@ -221,6 +226,9 @@ using enable_if_decimal_value = enable_if_t::value || std::is_same::value, R>; +template +using enable_if_c_number = enable_if_t::value && !is_boolean_type::value, R>; + // ---------------------------------------------------------------------- // Iteration / value access utilities @@ -322,6 +330,24 @@ struct OutputArrayWriter> { } }; +template +struct OutputArrayWriter> { + using T = typename TypeTraits::ScalarType; + T* values; + + explicit OutputArrayWriter(ArrayData* data) : values(data->GetMutableValues(1)) {} + + void Write(T value) { *values++ = value; } + + // Note that this doesn't write the null bitmap, which should be consistent + // with Write / WriteNull calls + void WriteNull() { *values++ = T(null()); } + + void WriteAllNull(int64_t length) { + std::memset(static_cast(values), 0, sizeof(T) * length); + } +}; + // (Un)box Scalar to / from C++ value template @@ -400,7 +426,16 @@ struct BoxScalar { static void Box(T val, Scalar* out) { checked_cast(out)->value = val; } }; -// A VisitArraySpanInline variant that calls its visitor function with logical +template <> +struct BoxScalar { + using T = StructScalar; + using ScalarType = StructScalar; + static void Box(T val, Scalar* out) { + checked_cast(out)->value = std::move(val.value); + } +}; + +// A VisitArrayDataInline variant that calls its visitor function with logical // values, such as Decimal128 rather than util::string_view. template @@ -555,6 +590,21 @@ struct OutputAdapter> { } }; +template +struct OutputAdapter> { + using T = typename TypeTraits::ScalarType; + + template + static Status Write(KernelContext*, Datum* out, Generator&& generator) { + ArrayData* out_arr = out->mutable_array(); + auto out_data = out_arr->GetMutableValues(1); + for (int64_t i = 0; i < out_arr->length; ++i) { + *out_data++ = generator(); + } + return Status::OK(); + } +}; + // A kernel exec generator for unary functions that addresses both array and // scalar inputs and dispatches input iteration and output writing to other // templates @@ -591,7 +641,7 @@ struct ScalarUnary { } }; -// An alternative to ScalarUnary that Applies a scalar operation with state on +// An alternative to ScalarUnary that applies a scalar operation with state on // only the not-null values of a single array template struct ScalarUnaryNotNullStateful { diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc index 984c3b56538..aab7f08c3ed 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc @@ -65,11 +65,13 @@ struct IsPositiveVisitor { result = scalar.value > 0; return Status::OK(); } + template Status Visit(const DecimalScalar& scalar) { result = scalar.value > 0; return Status::OK(); } + Status Visit(const Scalar& scalar) { return Status::OK(); } }; @@ -79,9 +81,6 @@ bool IsPositive(const Scalar& scalar) { return visitor.result; } -// N.B. take care not to conflict with type_traits.h as that can cause surprises in a -// unity build - // Bitwise operations struct BitWiseNot { @@ -647,7 +646,7 @@ struct RoundOptionsWrapper : public OptionsWrapper { explicit RoundOptionsWrapper(OptionsType options) : OptionsWrapper(std::move(options)) { // Only positive exponents for powers of 10 are used because combining // multiply and division operations produced more stable rounding than - // using multiply-only. Refer to NumPy's round implementation: + // using multiply-only. Refer to NumPy's round implementation: // https://github.com/numpy/numpy/blob/7b2f20b406d27364c812f7a81a9c901afbd3600c/numpy/core/src/multiarray/calculation.c#L589 pow10 = RoundUtil::Pow10(std::abs(options.ndigits)); } @@ -707,7 +706,7 @@ struct RoundOptionsWrapper } }; -template +template struct Round { using CType = typename TypeTraits::CType; using State = RoundOptionsWrapper; @@ -729,13 +728,13 @@ struct Round { auto frac = round_val - std::floor(round_val); if (frac != T(0)) { // Use std::round() if in tie-breaking mode and scaled value is not 0.5. - if ((RndMode >= RoundMode::HALF_DOWN) && (frac != T(0.5))) { + if ((kRoundMode >= RoundMode::HALF_DOWN) && (frac != T(0.5))) { round_val = std::round(round_val); } else { - round_val = RoundImpl::Round(round_val); + round_val = RoundImpl::Round(round_val); } // Equality check is ommitted so that the common case of 10^0 (integer rounding) - // uses multiply-only + // uses multiply-only. round_val = ndigits > 0 ? (round_val / pow10) : (round_val * pow10); if (!std::isfinite(round_val)) { *st = Status::Invalid("overflow occurred during rounding"); @@ -900,6 +899,7 @@ struct RoundToMultiple> { std::pair pair; *st = arg.Divide(multiple).Value(&pair); if (!st->ok()) return arg; + auto& quotient = pair.first; const auto& remainder = pair.second; if (remainder == 0) return arg; if (kRoundMode >= RoundMode::HALF_DOWN) { @@ -910,25 +910,25 @@ struct RoundToMultiple> { // decimal value, but rather manipulating the multiple switch (kRoundMode) { case RoundMode::HALF_DOWN: - if (remainder.Sign() < 0) pair.first -= 1; + if (remainder.Sign() < 0) quotient -= 1; break; case RoundMode::HALF_UP: - if (remainder.Sign() >= 0) pair.first += 1; + if (remainder.Sign() >= 0) quotient += 1; break; case RoundMode::HALF_TOWARDS_ZERO: // Do nothing break; case RoundMode::HALF_TOWARDS_INFINITY: - pair.first += remainder.Sign() >= 0 ? 1 : -1; + quotient += remainder.Sign() >= 0 ? 1 : -1; break; case RoundMode::HALF_TO_EVEN: - if (pair.first.low_bits() % 2 != 0) { - pair.first += remainder.Sign() >= 0 ? 1 : -1; + if (quotient.low_bits() % 2 != 0) { + quotient += remainder.Sign() >= 0 ? 1 : -1; } break; case RoundMode::HALF_TO_ODD: - if (pair.first.low_bits() % 2 == 0) { - pair.first += remainder.Sign() >= 0 ? 1 : -1; + if (quotient.low_bits() % 2 == 0) { + quotient += remainder.Sign() >= 0 ? 1 : -1; } break; default: @@ -937,12 +937,12 @@ struct RoundToMultiple> { } else if (remainder.Sign() >= 0) { // Positive, round up/down if (remainder > half_multiple) { - pair.first += 1; + quotient += 1; } } else { // Negative, round up/down if (remainder < neg_half_multiple) { - pair.first -= 1; + quotient -= 1; } } } else { @@ -950,22 +950,22 @@ struct RoundToMultiple> { // decimal value, but rather manipulating the multiple switch (kRoundMode) { case RoundMode::DOWN: - if (remainder.Sign() < 0) pair.first -= 1; + if (remainder.Sign() < 0) quotient -= 1; break; case RoundMode::UP: - if (remainder.Sign() >= 0) pair.first += 1; + if (remainder.Sign() >= 0) quotient += 1; break; case RoundMode::TOWARDS_ZERO: // Do nothing break; case RoundMode::TOWARDS_INFINITY: - pair.first += remainder.Sign() >= 0 ? 1 : -1; + quotient += remainder.Sign() >= 0 ? 1 : -1; break; default: DCHECK(false); } } - CType round_val = pair.first * multiple; + CType round_val = quotient * multiple; if (!round_val.FitsInPrecision(ty.precision())) { *st = Status::Invalid("Rounded value ", round_val.ToString(ty.scale()), " does not fit in precision of ", ty); @@ -1002,6 +1002,102 @@ struct Trunc { } }; +// TODO(edponce): Move builders to function state and reset. +struct Divmod { + template + static enable_if_floating_value Call(KernelContext* ctx, Arg0 dividend, Arg1 divisor, Status* st) { + static_assert(std::is_same::value, ""); + + T quotient = std::floor(dividend / divisor); + T remainder = dividend - quotient * divisor; + + NumericBuilder builder; + builder.Append(quotient); + builder.Append(remainder); + std::shared_ptr array; + builder.Finish(&array); + FixedSizeListScalar list(array); + return list; + } + + template + static enable_if_integer_value Call(KernelContext* ctx, Arg0 dividend, Arg1 divisor, Status* st) { + T quotient = dividend / divisor; + T remainder = dividend - quotient * divisor; + + NumericBuilder builder; + builder.Append(quotient); + builder.Append(remainder); + std::shared_ptr array; + builder.Finish(&array); + FixedSizeListScalar list(array); + return list; + } + + // template + // static enable_if_decimal_value Call(KernelContext* ctx, Arg0 dividend, Arg1 divisor, Status* st) { + // std::pair pair; + // *st = dividend.Divide(divisor).Value(&pair); + // const auto& quotient = pair.first; + // const auto& remainder = pair.second; + // return {quotient, remainder}; + // } +}; + +struct DivmodChecked { + template + static enable_if_floating_value Call(KernelContext* ctx, Arg0 dividend, Arg1 divisor, Status* st) { + T quotient = std::floor(DivideChecked::Call(ctx, dividend, divisor, st)); + T remainder; + if (!st->ok()) { + quotient = 0; + remainder = 0; + } else { + remainder = dividend - quotient * divisor; + } + + NumericBuilder builder; + builder.Append(quotient); + builder.Append(remainder); + std::shared_ptr array; + builder.Finish(&array); + FixedSizeListScalar list(array); + return list; + } + + template + static enable_if_integer_value Call(KernelContext* ctx, Arg0 dividend, Arg1 divisor, Status* st) { + T quotient = DivideChecked::Call(ctx, dividend, divisor, st); + T remainder; + if (!st->ok()) { + return {0, 0}; + } else { + remainder = dividend - quotient * divisor; + } + + NumericBuilder builder; + builder.Append(quotient); + builder.Append(remainder); + std::shared_ptr array; + builder.Finish(&array); + FixedSizeListScalar list(array); + return list; + } + + // template + // static enable_if_decimal_value Call(KernelContext* ctx, Arg0 dividend, Arg1 divisor, Status* st) { + // std::pair pair; + // *st = dividend.Divide(divisor).Value(&pair); + // if (!st->ok()) { + // *st = Status::Invalid("division error"); + // return {T(), T()}; + // } + // const auto& quotient = pair.first; + // const auto& remainder = pair.second; + // return {quotient, remainder}; + // } +}; + // Generate a kernel given a bitwise arithmetic functor. Assumes the // functor treats all integer types of equal width identically template