diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index ff1d6619905..7edee94a767 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -161,6 +161,17 @@ static auto kCumulativeSumOptionsType = GetFunctionOptionsType( + DataMember("start", &CumulativeProductOptions::start), + DataMember("skip_nulls", &CumulativeProductOptions::skip_nulls), + DataMember("check_overflow", &CumulativeProductOptions::check_overflow)); +static auto kCumulativeMinOptionsType = GetFunctionOptionsType( + DataMember("start", &CumulativeMinOptions::start), + DataMember("skip_nulls", &CumulativeMinOptions::skip_nulls)); +static auto kCumulativeMaxOptionsType = GetFunctionOptionsType( + DataMember("start", &CumulativeMaxOptions::start), + DataMember("skip_nulls", &CumulativeMaxOptions::skip_nulls)); static auto kRankOptionsType = GetFunctionOptionsType( DataMember("sort_keys", &RankOptions::sort_keys), DataMember("null_placement", &RankOptions::null_placement), @@ -218,6 +229,38 @@ CumulativeSumOptions::CumulativeSumOptions(std::shared_ptr start, bool s check_overflow(check_overflow) {} constexpr char CumulativeSumOptions::kTypeName[]; +CumulativeProductOptions::CumulativeProductOptions(double start, bool skip_nulls, + bool check_overflow) + : CumulativeProductOptions(std::make_shared(start), skip_nulls, + check_overflow) {} +CumulativeProductOptions::CumulativeProductOptions(std::shared_ptr start, + bool skip_nulls, bool check_overflow) + : FunctionOptions(internal::kCumulativeProductOptionsType), + start(std::move(start)), + skip_nulls(skip_nulls), + check_overflow(check_overflow) {} +constexpr char CumulativeProductOptions::kTypeName[]; + +CumulativeMinOptions::CumulativeMinOptions(bool skip_nulls) + : FunctionOptions(internal::kCumulativeMinOptionsType), skip_nulls(skip_nulls) {} +CumulativeMinOptions::CumulativeMinOptions(double start, bool skip_nulls) + : CumulativeMinOptions(std::make_shared(start), skip_nulls) {} +CumulativeMinOptions::CumulativeMinOptions(std::shared_ptr start, bool skip_nulls) + : FunctionOptions(internal::kCumulativeMinOptionsType), + start(std::move(start)), + skip_nulls(skip_nulls) {} +constexpr char CumulativeMinOptions::kTypeName[]; + +CumulativeMaxOptions::CumulativeMaxOptions(bool skip_nulls) + : FunctionOptions(internal::kCumulativeMaxOptionsType), skip_nulls(skip_nulls) {} +CumulativeMaxOptions::CumulativeMaxOptions(double start, bool skip_nulls) + : CumulativeMaxOptions(std::make_shared(start), skip_nulls) {} +CumulativeMaxOptions::CumulativeMaxOptions(std::shared_ptr start, bool skip_nulls) + : FunctionOptions(internal::kCumulativeMaxOptionsType), + start(std::move(start)), + skip_nulls(skip_nulls) {} +constexpr char CumulativeMaxOptions::kTypeName[]; + RankOptions::RankOptions(std::vector sort_keys, NullPlacement null_placement, RankOptions::Tiebreaker tiebreaker) : FunctionOptions(internal::kRankOptionsType), @@ -236,6 +279,9 @@ void RegisterVectorOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kSelectKOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeSumOptionsType)); + DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeProductOptionsType)); + DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeMinOptionsType)); + DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeMaxOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kRankOptionsType)); } } // namespace internal @@ -379,8 +425,27 @@ Result> DropNull(const Array& values, ExecContext* ctx) { Result CumulativeSum(const Datum& values, const CumulativeSumOptions& options, ExecContext* ctx) { - auto func_name = (options.check_overflow) ? "cumulative_sum_checked" : "cumulative_sum"; - return CallFunction(func_name, {Datum(values)}, &options, ctx); + return CallFunction( + options.check_overflow ? "cumulative_sum_checked" : "cumulative_sum", + {Datum(values)}, &options, ctx); +} + +Result CumulativeProduct(const Datum& values, + const CumulativeProductOptions& options, + ExecContext* ctx) { + return CallFunction( + options.check_overflow ? "cumulative_product_checked" : "cumulative_product", + {Datum(values)}, &options, ctx); +} + +Result CumulativeMin(const Datum& values, const CumulativeMinOptions& options, + ExecContext* ctx) { + return CallFunction("cumulative_min", {Datum(values)}, &options, ctx); +} + +Result CumulativeMax(const Datum& values, const CumulativeMaxOptions& options, + ExecContext* ctx) { + return CallFunction("cumulative_max", {Datum(values)}, &options, ctx); } // ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 88331b6e592..7e45172c824 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -235,7 +235,10 @@ class ARROW_EXPORT CumulativeSumOptions : public FunctionOptions { static constexpr char const kTypeName[] = "CumulativeSumOptions"; static CumulativeSumOptions Defaults() { return CumulativeSumOptions(); } - /// Optional starting value for cumulative operation computation + const bool is_minmax = false; + const bool is_max = false; + + /// Optional starting value for cumulative sum std::shared_ptr start; /// If true, nulls in the input are ignored and produce a corresponding null output. @@ -246,6 +249,70 @@ class ARROW_EXPORT CumulativeSumOptions : public FunctionOptions { bool check_overflow = false; }; +/// \brief Options for cumulative product function +class ARROW_EXPORT CumulativeProductOptions : public FunctionOptions { + public: + explicit CumulativeProductOptions(double start = 1, bool skip_nulls = false, + bool check_overflow = false); + explicit CumulativeProductOptions(std::shared_ptr start, + bool skip_nulls = false, bool check_overflow = false); + static constexpr char const kTypeName[] = "CumulativeProductOptions"; + static CumulativeProductOptions Defaults() { return CumulativeProductOptions(); } + + const bool is_minmax = false; + const bool is_max = false; + + /// Optional starting value for cumulative product + std::shared_ptr start; + + /// If true, nulls in the input are ignored and produce a corresponding null output. + /// When false, the first null encountered is propagated through the remaining output. + bool skip_nulls = false; + + /// When true, returns an Invalid Status when overflow is detected + bool check_overflow = false; +}; + +/// \brief Options for cumulative min functions +class ARROW_EXPORT CumulativeMinOptions : public FunctionOptions { + public: + explicit CumulativeMinOptions(bool skip_nulls = false); + explicit CumulativeMinOptions(double start, bool skip_nulls = false); + explicit CumulativeMinOptions(std::shared_ptr start, bool skip_nulls = false); + static constexpr char const kTypeName[] = "CumulativeMinOptions"; + static CumulativeMinOptions Defaults() { return CumulativeMinOptions(); } + + const bool is_minmax = true; + const bool is_max = false; + + /// Optional starting value for cumulative min + std::shared_ptr start; + + /// If true, nulls in the input are ignored and produce a corresponding null output. + /// When false, the first null encountered is propagated through the remaining output. + bool skip_nulls = false; +}; + +/// \brief Options for cumulative max functions +class ARROW_EXPORT CumulativeMaxOptions : public FunctionOptions { + public: + explicit CumulativeMaxOptions(bool skip_nulls = false); + explicit CumulativeMaxOptions(double start, bool skip_nulls = false); + explicit CumulativeMaxOptions(std::shared_ptr start, bool skip_nulls = false); + static constexpr char const kTypeName[] = "CumulativeMaxOptions"; + static CumulativeMaxOptions Defaults() { return CumulativeMaxOptions(); } + + const bool is_minmax = true; + const bool is_max = true; + + /// Optional starting value for cumulative max + std::shared_ptr start; + + /// If true, nulls in the input are ignored and produce a corresponding null output. + /// When false, the first null encountered is propagated through the remaining output. + bool skip_nulls = false; +}; + /// @} /// \brief Filter with a boolean selection filter @@ -586,6 +653,24 @@ Result CumulativeSum( const CumulativeSumOptions& options = CumulativeSumOptions::Defaults(), ExecContext* ctx = NULLPTR); +ARROW_EXPORT +Result CumulativeProduct( + const Datum& values, + const CumulativeProductOptions& options = CumulativeProductOptions::Defaults(), + ExecContext* ctx = NULLPTR); + +ARROW_EXPORT +Result CumulativeMin( + const Datum& values, + const CumulativeMinOptions& options = CumulativeMinOptions::Defaults(), + ExecContext* ctx = NULLPTR); + +ARROW_EXPORT +Result CumulativeMax( + const Datum& values, + const CumulativeMaxOptions& options = CumulativeMaxOptions::Defaults(), + ExecContext* ctx = NULLPTR); + // ---------------------------------------------------------------------- // Deprecated functions diff --git a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h index f416881ccb8..49da6117b89 100644 --- a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h +++ b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h @@ -597,6 +597,55 @@ struct Sign { } }; +struct Min { + template + static constexpr T Call(KernelContext*, Arg0 left, Arg1 right, Status*) { + return (left < right) ? left : right; + } +}; +struct Max { + template + static constexpr T Call(KernelContext*, Arg0 left, Arg1 right, Status*) { + return (left > right) ? left : right; + } +}; + +template +struct AntiExtrema { + static constexpr CType anti_min() { return std::numeric_limits::max(); } + static constexpr CType anti_max() { return std::numeric_limits::min(); } +}; + +template <> +struct AntiExtrema { + static constexpr bool anti_min() { return true; } + static constexpr bool anti_max() { return false; } +}; + +template <> +struct AntiExtrema { + static constexpr float anti_min() { return std::numeric_limits::infinity(); } + static constexpr float anti_max() { return -std::numeric_limits::infinity(); } +}; + +template <> +struct AntiExtrema { + static constexpr double anti_min() { return std::numeric_limits::infinity(); } + static constexpr double anti_max() { return -std::numeric_limits::infinity(); } +}; + +template <> +struct AntiExtrema { + static constexpr Decimal128 anti_min() { return BasicDecimal128::GetMaxSentinel(); } + static constexpr Decimal128 anti_max() { return BasicDecimal128::GetMinSentinel(); } +}; + +template <> +struct AntiExtrema { + static constexpr Decimal256 anti_min() { return BasicDecimal256::GetMaxSentinel(); } + static constexpr Decimal256 anti_max() { return BasicDecimal256::GetMinSentinel(); } +}; + } // namespace internal } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc index 49c88324a91..cb9d935b072 100644 --- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc +++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc @@ -36,6 +36,7 @@ #include "arrow/compute/kernel.h" #include "arrow/compute/kernels/aggregate_internal.h" #include "arrow/compute/kernels/aggregate_var_std_internal.h" +#include "arrow/compute/kernels/base_arithmetic_internal.h" #include "arrow/compute/kernels/common.h" #include "arrow/compute/kernels/row_encoder.h" #include "arrow/compute/kernels/util_internal.h" @@ -1199,43 +1200,6 @@ HashAggregateKernel MakeApproximateMedianKernel(HashAggregateFunction* tdigest_f // ---------------------------------------------------------------------- // MinMax implementation - -template -struct AntiExtrema { - static constexpr CType anti_min() { return std::numeric_limits::max(); } - static constexpr CType anti_max() { return std::numeric_limits::min(); } -}; - -template <> -struct AntiExtrema { - static constexpr bool anti_min() { return true; } - static constexpr bool anti_max() { return false; } -}; - -template <> -struct AntiExtrema { - static constexpr float anti_min() { return std::numeric_limits::infinity(); } - static constexpr float anti_max() { return -std::numeric_limits::infinity(); } -}; - -template <> -struct AntiExtrema { - static constexpr double anti_min() { return std::numeric_limits::infinity(); } - static constexpr double anti_max() { return -std::numeric_limits::infinity(); } -}; - -template <> -struct AntiExtrema { - static constexpr Decimal128 anti_min() { return BasicDecimal128::GetMaxSentinel(); } - static constexpr Decimal128 anti_max() { return BasicDecimal128::GetMinSentinel(); } -}; - -template <> -struct AntiExtrema { - static constexpr Decimal256 anti_min() { return BasicDecimal256::GetMaxSentinel(); } - static constexpr Decimal256 anti_max() { return BasicDecimal256::GetMinSentinel(); } -}; - template struct GroupedMinMaxImpl final : public GroupedAggregator { using CType = typename TypeTraits::CType; diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc index fb221aa9fe6..4450999ab09 100644 --- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc +++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc @@ -33,9 +33,10 @@ namespace internal { namespace { -template +template struct CumulativeOptionsWrapper : public OptionsWrapper { - using State = CumulativeOptionsWrapper; + using State = CumulativeOptionsWrapper; + using CType = typename TypeTraits::CType; explicit CumulativeOptionsWrapper(OptionsType options) : OptionsWrapper(std::move(options)) {} @@ -50,6 +51,14 @@ struct CumulativeOptionsWrapper : public OptionsWrapper { const auto& start = options->start; if (!start || !start->is_valid) { + if (options->is_minmax) { + auto new_scalar = std::make_shared>( + (options->is_max) ? AntiExtrema::anti_max() + : AntiExtrema::anti_min()); + + OptionsType new_options(new_scalar, options->skip_nulls); + return ::arrow::internal::make_unique(new_options); + } return Status::Invalid("Cumulative `start` option must be non-null and valid"); } @@ -119,7 +128,7 @@ struct Accumulator { template struct CumulativeKernel { static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { - const auto& options = CumulativeOptionsWrapper::Get(ctx); + const auto& options = CumulativeOptionsWrapper::Get(ctx); Accumulator accumulator(ctx); accumulator.current_value = UnboxScalar::Unbox(*(options.start)); accumulator.skip_nulls = options.skip_nulls; @@ -137,7 +146,7 @@ struct CumulativeKernel { template struct CumulativeKernelChunked { static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { - const auto& options = CumulativeOptionsWrapper::Get(ctx); + const auto& options = CumulativeOptionsWrapper::Get(ctx); Accumulator accumulator(ctx); accumulator.current_value = UnboxScalar::Unbox(*(options.start)); accumulator.skip_nulls = options.skip_nulls; @@ -172,8 +181,57 @@ const FunctionDoc cumulative_sum_checked_doc{ "function \"cumulative_sum\"."), {"values"}, "CumulativeSumOptions"}; + +const FunctionDoc cumulative_product_doc{ + "Compute the cumulative product over a numeric input", + ("`values` must be numeric. Return an array/chunked array which is the\n" + "cumulative product computed over `values`. Results will wrap around on\n" + "integer overflow. Use function \"cumulative_product_checked\" if you want\n" + "overflow to return an error."), + {"values"}, + "CumulativeProductOptions"}; + +const FunctionDoc cumulative_product_checked_doc{ + "Compute the cumulative product over a numeric input", + ("`values` must be numeric. Return an array/chunked array which is the\n" + "cumulative product computed over `values`. This function returns an error\n" + "on overflow. For a variant that doesn't fail on overflow, use\n" + "function \"cumulative_product\"."), + {"values"}, + "CumulativeProductOptions"}; + +const FunctionDoc cumulative_min_doc{ + "Compute the cumulative min over a numeric input", + ("`values` must be numeric. Return an array/chunked array which is the\n" + "cumulative min computed over `values`."), + {"values"}, + "CumulativeMinOptions"}; + +const FunctionDoc cumulative_max_doc{ + "Compute the cumulative max over a numeric input", + ("`values` must be numeric. Return an array/chunked array which is the\n" + "cumulative max computed over `values`."), + {"values"}, + "CumulativeMaxOptions"}; } // namespace +template +void AddCumulativeVectorKernel(std::shared_ptr& func) { + VectorKernel kernel; + auto ty = TypeTraits::type_singleton(); + + kernel.can_execute_chunkwise = false; + kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE; + kernel.mem_allocation = MemAllocation::type::NO_PREALLOCATE; + kernel.signature = KernelSignature::Make({InputType(ty)}, OutputType(ty)); + kernel.exec = + ArithmeticExecFromOp(ty); + kernel.exec_chunked = ArithmeticExecFromOp(ty); + kernel.init = CumulativeOptionsWrapper::Init; + DCHECK_OK(func->AddKernel(std::move(kernel))); +} + template void MakeVectorCumulativeFunction(FunctionRegistry* registry, const std::string func_name, const FunctionDoc doc) { @@ -181,32 +239,35 @@ void MakeVectorCumulativeFunction(FunctionRegistry* registry, const std::string auto func = std::make_shared(func_name, Arity::Unary(), doc, &kDefaultOptions); - std::vector> types; - types.insert(types.end(), NumericTypes().begin(), NumericTypes().end()); - - for (const auto& ty : types) { - VectorKernel kernel; - kernel.can_execute_chunkwise = false; - kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE; - kernel.mem_allocation = MemAllocation::type::NO_PREALLOCATE; - kernel.signature = KernelSignature::Make({ty}, OutputType(ty)); - kernel.exec = - ArithmeticExecFromOp(ty); - kernel.exec_chunked = - ArithmeticExecFromOp(ty); - kernel.init = CumulativeOptionsWrapper::Init; - DCHECK_OK(func->AddKernel(std::move(kernel))); - } + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); DCHECK_OK(registry->AddFunction(std::move(func))); } -void RegisterVectorCumulativeSum(FunctionRegistry* registry) { +void RegisterVectorCumulativeOps(FunctionRegistry* registry) { MakeVectorCumulativeFunction(registry, "cumulative_sum", cumulative_sum_doc); MakeVectorCumulativeFunction( registry, "cumulative_sum_checked", cumulative_sum_checked_doc); + + MakeVectorCumulativeFunction( + registry, "cumulative_product", cumulative_product_doc); + MakeVectorCumulativeFunction( + registry, "cumulative_product_checked", cumulative_product_checked_doc); + + MakeVectorCumulativeFunction(registry, "cumulative_min", + cumulative_min_doc); + MakeVectorCumulativeFunction(registry, "cumulative_max", + cumulative_max_doc); } } // namespace internal diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc index 9ec287b537d..fed447f10f8 100644 --- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc @@ -34,77 +34,34 @@ namespace arrow { namespace compute { -TEST(TestCumulativeSum, Empty) { - CumulativeSumOptions options; +void TestEmpty(std::string func, FunctionOptions* options) { for (auto ty : NumericTypes()) { auto empty_arr = ArrayFromJSON(ty, "[]"); auto empty_chunked = ChunkedArrayFromJSON(ty, {"[]"}); - CheckVectorUnary("cumulative_sum", empty_arr, empty_arr, &options); - CheckVectorUnary("cumulative_sum_checked", empty_arr, empty_arr, &options); - - CheckVectorUnary("cumulative_sum", empty_chunked, empty_chunked, &options); - CheckVectorUnary("cumulative_sum_checked", empty_chunked, empty_chunked, &options); + CheckVectorUnary(func, empty_arr, empty_arr, options); + CheckVectorUnary(func, empty_chunked, empty_chunked, options); } } -TEST(TestCumulativeSum, AllNulls) { - CumulativeSumOptions options; +void TestAllNulls(std::string func, FunctionOptions* options) { for (auto ty : NumericTypes()) { auto nulls_arr = ArrayFromJSON(ty, "[null, null, null]"); auto nulls_one_chunk = ChunkedArrayFromJSON(ty, {"[null, null, null]"}); auto nulls_three_chunks = ChunkedArrayFromJSON(ty, {"[null]", "[null]", "[null]"}); - CheckVectorUnary("cumulative_sum", nulls_arr, nulls_arr, &options); - CheckVectorUnary("cumulative_sum_checked", nulls_arr, nulls_arr, &options); - - CheckVectorUnary("cumulative_sum", nulls_one_chunk, nulls_one_chunk, &options); - CheckVectorUnary("cumulative_sum_checked", nulls_one_chunk, nulls_one_chunk, - &options); - - CheckVectorUnary("cumulative_sum", nulls_three_chunks, nulls_one_chunk, &options); - CheckVectorUnary("cumulative_sum_checked", nulls_three_chunks, nulls_one_chunk, - &options); + CheckVectorUnary(func, nulls_arr, nulls_arr, options); + CheckVectorUnary(func, nulls_one_chunk, nulls_one_chunk, options); + CheckVectorUnary(func, nulls_three_chunks, nulls_one_chunk, options); } } -TEST(TestCumulativeSum, ScalarInput) { - CumulativeSumOptions no_start_no_skip; - CumulativeSumOptions no_start_do_skip(0, true); - CumulativeSumOptions has_start_no_skip(10); - CumulativeSumOptions has_start_do_skip(10, true); - - for (auto ty : NumericTypes()) { - CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "10"), - ArrayFromJSON(ty, "[10]"), &no_start_no_skip); - CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "10"), - ArrayFromJSON(ty, "[10]"), &no_start_no_skip); - - CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "10"), - ArrayFromJSON(ty, "[20]"), &has_start_no_skip); - CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "10"), - ArrayFromJSON(ty, "[20]"), &has_start_no_skip); - - CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "null"), - ArrayFromJSON(ty, "[null]"), &no_start_no_skip); - CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "null"), - ArrayFromJSON(ty, "[null]"), &no_start_no_skip); - CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "null"), - ArrayFromJSON(ty, "[null]"), &has_start_no_skip); - CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "null"), - ArrayFromJSON(ty, "[null]"), &has_start_no_skip); +using testing::HasSubstr; - CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "null"), - ArrayFromJSON(ty, "[null]"), &no_start_do_skip); - CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "null"), - ArrayFromJSON(ty, "[null]"), &no_start_do_skip); - CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "null"), - ArrayFromJSON(ty, "[null]"), &has_start_do_skip); - CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "null"), - ArrayFromJSON(ty, "[null]"), &has_start_do_skip); - } +void TestScalarNotSupported(std::string func, FunctionOptions* options) { + EXPECT_RAISES_WITH_MESSAGE_THAT( + NotImplemented, HasSubstr("no kernel"), + CallFunction(func, {std::make_shared(5)}, options)); } -using testing::HasSubstr; - template void CheckCumulativeSumUnsignedOverflow() { using CType = typename TypeTraits::CType; @@ -116,17 +73,17 @@ void CheckCumulativeSumUnsignedOverflow() { BuilderType builder; std::shared_ptr max_arr; - std::shared_ptr min_arr; + std::shared_ptr overflow_arr; ASSERT_OK(builder.Append(max)); ASSERT_OK(builder.Finish(&max_arr)); builder.Reset(); ASSERT_OK(builder.Append(min)); - ASSERT_OK(builder.Finish(&min_arr)); + ASSERT_OK(builder.Finish(&overflow_arr)); EXPECT_RAISES_WITH_MESSAGE_THAT( Invalid, HasSubstr("overflow"), CallFunction("cumulative_sum_checked", {max_arr}, &pos_overflow)); - CheckVectorUnary("cumulative_sum", max_arr, min_arr, &pos_overflow); + CheckVectorUnary("cumulative_sum", max_arr, overflow_arr, &pos_overflow); } template @@ -141,20 +98,166 @@ void CheckCumulativeSumSignedOverflow() { auto min = std::numeric_limits::lowest(); BuilderType builder; - std::shared_ptr max_arr; std::shared_ptr min_arr; + std::shared_ptr overflow_arr; + ASSERT_OK(builder.Append(min)); + ASSERT_OK(builder.Finish(&min_arr)); + builder.Reset(); + ASSERT_OK(builder.Append(max)); + ASSERT_OK(builder.Finish(&overflow_arr)); + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, HasSubstr("overflow"), + CallFunction("cumulative_sum_checked", {min_arr}, &neg_overflow)); + CheckVectorUnary("cumulative_sum", min_arr, overflow_arr, &neg_overflow); +} + +template +void CheckCumulativeProductUnsignedOverflow() { + using CType = typename TypeTraits::CType; + using BuilderType = typename TypeTraits::BuilderType; + + CumulativeProductOptions pos_overflow(2); + auto max = std::numeric_limits::max(); + + BuilderType builder; + std::shared_ptr max_arr; + std::shared_ptr overflow_arr; ASSERT_OK(builder.Append(max)); ASSERT_OK(builder.Finish(&max_arr)); builder.Reset(); + ASSERT_OK(builder.Append(max << 1)); + ASSERT_OK(builder.Finish(&overflow_arr)); + + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, HasSubstr("overflow"), + CallFunction("cumulative_product_checked", {max_arr}, &pos_overflow)); + CheckVectorUnary("cumulative_product", max_arr, overflow_arr, &pos_overflow); +} + +template +void CheckCumulativeProductSignedOverflow() { + using CType = typename TypeTraits::CType; + using BuilderType = typename TypeTraits::BuilderType; + + CheckCumulativeProductUnsignedOverflow(); + + CumulativeProductOptions neg_overflow(2); + auto min = std::numeric_limits::lowest(); + + BuilderType builder; + std::shared_ptr min_arr; + std::shared_ptr overflow_arr; ASSERT_OK(builder.Append(min)); ASSERT_OK(builder.Finish(&min_arr)); + builder.Reset(); + ASSERT_OK(builder.Append(0)); + ASSERT_OK(builder.Finish(&overflow_arr)); EXPECT_RAISES_WITH_MESSAGE_THAT( Invalid, HasSubstr("overflow"), - CallFunction("cumulative_sum_checked", {min_arr}, &neg_overflow)); - CheckVectorUnary("cumulative_sum", min_arr, max_arr, &neg_overflow); + CallFunction("cumulative_product_checked", {min_arr}, &neg_overflow)); + CheckVectorUnary("cumulative_product", min_arr, overflow_arr, &neg_overflow); +} + +TEST(TestCumulativeOps, Empty) { + CumulativeSumOptions sum_options; + TestEmpty("cumulative_sum", &sum_options); + TestEmpty("cumulative_sum_checked", &sum_options); + + CumulativeProductOptions product_options; + TestEmpty("cumulative_product", &product_options); + TestEmpty("cumulative_product_checked", &product_options); + + CumulativeMinOptions min_options; + TestEmpty("cumulative_min", &min_options); + + CumulativeMaxOptions max_options; + TestEmpty("cumulative_max", &max_options); +} + +TEST(TestCumulativeOps, AllNulls) { + CumulativeSumOptions sum_options; + TestAllNulls("cumulative_sum", &sum_options); + TestAllNulls("cumulative_sum_checked", &sum_options); + + CumulativeProductOptions product_options; + TestAllNulls("cumulative_product", &product_options); + TestAllNulls("cumulative_product_checked", &product_options); + + CumulativeMinOptions min_options; + TestAllNulls("cumulative_min", &min_options); + + CumulativeMaxOptions max_options; + TestAllNulls("cumulative_max", &max_options); +} + +TEST(TestCumulativeSum, ScalarInput) { + CumulativeSumOptions no_start; + CumulativeSumOptions has_start(10); + + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "10"), + ArrayFromJSON(ty, "[10]"), &no_start); + CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "10"), + ArrayFromJSON(ty, "[10]"), &no_start); + + CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "10"), + ArrayFromJSON(ty, "[20]"), &has_start); + CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "10"), + ArrayFromJSON(ty, "[20]"), &has_start); + + CheckVectorUnary("cumulative_sum", ScalarFromJSON(ty, "null"), + ArrayFromJSON(ty, "[null]"), &no_start); + CheckVectorUnary("cumulative_sum_checked", ScalarFromJSON(ty, "null"), + ArrayFromJSON(ty, "[null]"), &no_start); + } } -TEST(TestCumulativeSum, IntegerOverflow) { +TEST(TestCumulativeProduct, ScalarInput) { + CumulativeProductOptions no_start; + CumulativeProductOptions has_start(2); + + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_product", ScalarFromJSON(ty, "10"), + ArrayFromJSON(ty, "[10]"), &no_start); + CheckVectorUnary("cumulative_product_checked", ScalarFromJSON(ty, "10"), + ArrayFromJSON(ty, "[10]"), &no_start); + + CheckVectorUnary("cumulative_product", ScalarFromJSON(ty, "10"), + ArrayFromJSON(ty, "[20]"), &has_start); + CheckVectorUnary("cumulative_product_checked", ScalarFromJSON(ty, "10"), + ArrayFromJSON(ty, "[20]"), &has_start); + + CheckVectorUnary("cumulative_product", ScalarFromJSON(ty, "null"), + ArrayFromJSON(ty, "[null]"), &no_start); + CheckVectorUnary("cumulative_product_checked", ScalarFromJSON(ty, "null"), + ArrayFromJSON(ty, "[null]"), &no_start); + } +} + +TEST(TestCumulativeMinMax, ScalarInput) { + CumulativeMinOptions min_no_start; + CumulativeMinOptions min_has_start(1.0); + + CumulativeMaxOptions max_no_start; + CumulativeMaxOptions max_has_start(20.0); + + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_min", ScalarFromJSON(ty, "10"), + ArrayFromJSON(ty, "[10]"), &min_no_start); + CheckVectorUnary("cumulative_max", ScalarFromJSON(ty, "10"), + ArrayFromJSON(ty, "[10]"), &max_no_start); + + CheckVectorUnary("cumulative_min", ScalarFromJSON(ty, "10"), ArrayFromJSON(ty, "[1]"), + &min_has_start); + + CheckVectorUnary("cumulative_min", ScalarFromJSON(ty, "null"), + ArrayFromJSON(ty, "[null]"), &min_no_start); + CheckVectorUnary("cumulative_max", ScalarFromJSON(ty, "null"), + ArrayFromJSON(ty, "[null]"), &max_no_start); + } +} + +TEST(TestCumulativeOps, IntegerOverflow) { CheckCumulativeSumUnsignedOverflow(); CheckCumulativeSumUnsignedOverflow(); CheckCumulativeSumUnsignedOverflow(); @@ -163,6 +266,15 @@ TEST(TestCumulativeSum, IntegerOverflow) { CheckCumulativeSumSignedOverflow(); CheckCumulativeSumSignedOverflow(); CheckCumulativeSumSignedOverflow(); + + CheckCumulativeProductUnsignedOverflow(); + CheckCumulativeProductUnsignedOverflow(); + CheckCumulativeProductUnsignedOverflow(); + CheckCumulativeProductUnsignedOverflow(); + CheckCumulativeProductSignedOverflow(); + CheckCumulativeProductSignedOverflow(); + CheckCumulativeProductSignedOverflow(); + CheckCumulativeProductSignedOverflow(); } TEST(TestCumulativeSum, NoStartNoSkip) { @@ -210,6 +322,96 @@ TEST(TestCumulativeSum, NoStartNoSkip) { } } +TEST(TestCumulativeProduct, NoStartNoSkip) { + CumulativeProductOptions options; + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, 3, 4, 5]"), + ArrayFromJSON(ty, "[1, 2, 6, 24, 120]"), &options); + CheckVectorUnary("cumulative_product_checked", ArrayFromJSON(ty, "[1, 2, 3, 4, 5]"), + ArrayFromJSON(ty, "[1, 2, 6, 24, 120]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, null, 4, null]"), + ArrayFromJSON(ty, "[1, 2, null, null, null]"), &options); + CheckVectorUnary("cumulative_product_checked", + ArrayFromJSON(ty, "[1, 2, null, 4, null]"), + ArrayFromJSON(ty, "[1, 2, null, null, null]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[null, 2, null, 4, null]"), + ArrayFromJSON(ty, "[null, null, null, null, null]"), &options); + CheckVectorUnary("cumulative_product_checked", + ArrayFromJSON(ty, "[null, 2, null, 4, null]"), + ArrayFromJSON(ty, "[null, null, null, null, null]"), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, 6, 24, 120]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, 6, 24, 120]"}), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, null, null, null]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, null, null, null]"}), &options); + + CheckVectorUnary( + "cumulative_product", ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null, null]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null, null]"}), + &options); + } +} + +TEST(TestCumulativeMinMax, NoStartNoSkip) { + CumulativeMinOptions min_options; + CumulativeMaxOptions max_options; + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[9, 9, 7, 7, 5, 5]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[9, 24, 24, 123, 123, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[9, 9, null, null, null, null]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[9, 24, null, null, null, null]"), &max_options); + + CheckVectorUnary( + "cumulative_min", ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &min_options); + CheckVectorUnary( + "cumulative_max", ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 9, 7, 7, 5, 5]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 24, 24, 123, 123, 123]"}), &max_options); + + CheckVectorUnary( + "cumulative_min", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 9, null, null, null, null]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 24, null, null, null, null]"}), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), + &min_options); + CheckVectorUnary("cumulative_max", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), + &max_options); + } +} + TEST(TestCumulativeSum, NoStartDoSkip) { CumulativeSumOptions options(0, true); for (auto ty : NumericTypes()) { @@ -254,6 +456,95 @@ TEST(TestCumulativeSum, NoStartDoSkip) { } } +TEST(TestCumulativeProduct, NoStartDoSkip) { + CumulativeProductOptions options(1, true); + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, 3, 4, 5]"), + ArrayFromJSON(ty, "[1, 2, 6, 24, 120]"), &options); + CheckVectorUnary("cumulative_product_checked", ArrayFromJSON(ty, "[1, 2, 3, 4, 5]"), + ArrayFromJSON(ty, "[1, 2, 6, 24, 120]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, null, 4, null]"), + ArrayFromJSON(ty, "[1, 2, null, 8, null]"), &options); + CheckVectorUnary("cumulative_product_checked", + ArrayFromJSON(ty, "[1, 2, null, 4, null]"), + ArrayFromJSON(ty, "[1, 2, null, 8, null]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[null, 2, null, 4, null]"), + ArrayFromJSON(ty, "[null, 2, null, 8, null]"), &options); + CheckVectorUnary("cumulative_product_checked", + ArrayFromJSON(ty, "[null, 2, null, 4, null]"), + ArrayFromJSON(ty, "[null, 2, null, 8, null]"), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, 6, 24, 120]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, 6, 24, 120]"}), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, null, 8, null]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, null, 8, null]"}), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[null, 2, null, 8, null]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[null, 2, null, 8, null]"}), &options); + } +} + +TEST(TestCumulativeMinMax, NoStartDoSkip) { + CumulativeMinOptions min_options(true); + CumulativeMaxOptions max_options(true); + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[9, 9, 7, 7, 5, 5]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[9, 24, 24, 123, 123, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[9, 9, null, 9, null, 9]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[9, 24, null, 123, null, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", + ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, 24, null, 24, null, 24]"), &min_options); + CheckVectorUnary("cumulative_max", + ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, 24, null, 123, null, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 9, 7, 7, 5, 5]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 24, 24, 123, 123, 123]"}), &max_options); + + CheckVectorUnary( + "cumulative_min", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 9, null, 9, null, 9]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 24, null, 123, null, 123]"}), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, 24, null, 24, null, 24]"}), + &min_options); + CheckVectorUnary("cumulative_max", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, 24, null, 123, null, 123]"}), + &max_options); + } +} + TEST(TestCumulativeSum, HasStartNoSkip) { CumulativeSumOptions options(10); for (auto ty : NumericTypes()) { @@ -299,6 +590,93 @@ TEST(TestCumulativeSum, HasStartNoSkip) { } } +TEST(TestCumulativeProduct, HasStartNoSkip) { + CumulativeProductOptions options(5); + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, 3, 4]"), + ArrayFromJSON(ty, "[5, 10, 30, 120]"), &options); + CheckVectorUnary("cumulative_product_checked", ArrayFromJSON(ty, "[1, 2, 3, 4]"), + ArrayFromJSON(ty, "[5, 10, 30, 120]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, null, 4]"), + ArrayFromJSON(ty, "[5, 10, null, null]"), &options); + CheckVectorUnary("cumulative_product_checked", ArrayFromJSON(ty, "[1, 2, null, 4]"), + ArrayFromJSON(ty, "[5, 10, null, null]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[null, 2, null, 4]"), + ArrayFromJSON(ty, "[null, null, null, null]"), &options); + CheckVectorUnary("cumulative_product_checked", + ArrayFromJSON(ty, "[null, 2, null, 4]"), + ArrayFromJSON(ty, "[null, null, null, null]"), &options); + + CheckVectorUnary("cumulative_product", ChunkedArrayFromJSON(ty, {"[1, 2]", "[3, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, 30, 120]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2]", "[3, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, 30, 120]"}), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[1, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, null, null]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, null, null]"}), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[null, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[null, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null]"}), &options); + } +} + +TEST(TestCumulativeMinMax, HasStartNoSkip) { + CumulativeMinOptions min_options(8, false); + CumulativeMaxOptions max_options(32, false); + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[8, 8, 7, 7, 5, 5]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[32, 32, 32, 123, 123, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[8, 8, null, null, null, null]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[32, 32, null, null, null, null]"), &max_options); + + CheckVectorUnary( + "cumulative_min", ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &min_options); + CheckVectorUnary( + "cumulative_max", ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[8, 8, 7, 7, 5, 5]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[32, 32, 32, 123, 123, 123]"}), &max_options); + + CheckVectorUnary( + "cumulative_min", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[8, 8, null, null, null, null]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[32, 32, null, null, null, null]"}), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), + &min_options); + CheckVectorUnary("cumulative_max", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), + &max_options); + } +} + TEST(TestCumulativeSum, HasStartDoSkip) { CumulativeSumOptions options(10, true); for (auto ty : NumericTypes()) { @@ -344,5 +722,92 @@ TEST(TestCumulativeSum, HasStartDoSkip) { } } +TEST(TestCumulativeProduct, HasStartDoSkip) { + CumulativeProductOptions options(5, true); + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, 3, 4]"), + ArrayFromJSON(ty, "[5, 10, 30, 120]"), &options); + CheckVectorUnary("cumulative_product_checked", ArrayFromJSON(ty, "[1, 2, 3, 4]"), + ArrayFromJSON(ty, "[5, 10, 30, 120]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, null, 4]"), + ArrayFromJSON(ty, "[5, 10, null, 40]"), &options); + CheckVectorUnary("cumulative_product_checked", ArrayFromJSON(ty, "[1, 2, null, 4]"), + ArrayFromJSON(ty, "[5, 10, null, 40]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[null, 2, null, 4]"), + ArrayFromJSON(ty, "[null, 10, null, 40]"), &options); + CheckVectorUnary("cumulative_product_checked", + ArrayFromJSON(ty, "[null, 2, null, 4]"), + ArrayFromJSON(ty, "[null, 10, null, 40]"), &options); + + CheckVectorUnary("cumulative_product", ChunkedArrayFromJSON(ty, {"[1, 2]", "[3, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, 30, 120]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2]", "[3, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, 30, 120]"}), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[1, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, null, 40]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, null, 40]"}), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[null, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[null, 10, null, 40]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[null, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[null, 10, null, 40]"}), &options); + } +} + +TEST(TestCumulativeMinMax, HasStartDoSkip) { + CumulativeMinOptions min_options(8, true); + CumulativeMaxOptions max_options(32, true); + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[8, 8, 7, 7, 5, 5]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[32, 32, 32, 123, 123, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[8, 8, null, 8, null, 8]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[32, 32, null, 123, null, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", + ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, 8, null, 8, null, 8]"), &min_options); + CheckVectorUnary("cumulative_max", + ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, 32, null, 123, null, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[8, 8, 7, 7, 5, 5]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[32, 32, 32, 123, 123, 123]"}), &max_options); + + CheckVectorUnary( + "cumulative_min", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[8, 8, null, 8, null, 8]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[32, 32, null, 123, null, 123]"}), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, 8, null, 8, null, 8]"}), + &min_options); + CheckVectorUnary("cumulative_max", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, 32, null, 123, null, 123]"}), + &max_options); + } +} + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc index fe7c6fa8ad1..43857dc8f10 100644 --- a/cpp/src/arrow/compute/registry.cc +++ b/cpp/src/arrow/compute/registry.cc @@ -291,7 +291,7 @@ static std::unique_ptr CreateBuiltInRegistry() { // Vector functions RegisterVectorArraySort(registry.get()); - RegisterVectorCumulativeSum(registry.get()); + RegisterVectorCumulativeOps(registry.get()); RegisterVectorHash(registry.get()); RegisterVectorNested(registry.get()); RegisterVectorReplace(registry.get()); diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h index 38f81e98889..1ee44ab4406 100644 --- a/cpp/src/arrow/compute/registry_internal.h +++ b/cpp/src/arrow/compute/registry_internal.h @@ -43,7 +43,7 @@ void RegisterScalarOptions(FunctionRegistry* registry); // Vector functions void RegisterVectorArraySort(FunctionRegistry* registry); -void RegisterVectorCumulativeSum(FunctionRegistry* registry); +void RegisterVectorCumulativeOps(FunctionRegistry* registry); void RegisterVectorHash(FunctionRegistry* registry); void RegisterVectorNested(FunctionRegistry* registry); void RegisterVectorReplace(FunctionRegistry* registry); diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index a354f42a4b1..276ad666b2a 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1562,13 +1562,21 @@ numeric type. By default these functions do not detect overflow. They are also available in an overflow-checking variant, suffixed ``_checked``, which returns an ``Invalid`` :class:`Status` when overflow is detected. -+------------------------+-------+-------------+-------------+--------------------------------+-------+ -| Function name | Arity | Input types | Output type | Options class | Notes | -+========================+=======+=============+=============+================================+=======+ -| cumulative_sum | Unary | Numeric | Numeric | :struct:`CumulativeSumOptions` | \(1) | -+------------------------+-------+-------------+-------------+--------------------------------+-------+ -| cumulative_sum_checked | Unary | Numeric | Numeric | :struct:`CumulativeSumOptions` | \(1) | -+------------------------+-------+-------------+-------------+--------------------------------+-------+ ++----------------------------+-------+-------------+-------------+------------------------------------+-------+ +| Function name | Arity | Input types | Output type | Options class | Notes | ++============================+=======+=============+=============+====================================+=======+ +| cumulative_sum | Unary | Numeric | Numeric | :struct:`CumulativeSumOptions` | \(1) | ++----------------------------+-------+-------------+-------------+------------------------------------+-------+ +| cumulative_sum_checked | Unary | Numeric | Numeric | :struct:`CumulativeSumOptions` | \(1) | ++----------------------------+-------+-------------+-------------+------------------------------------+-------+ +| cumulative_product | Unary | Numeric | Numeric | :struct:`CumulativeProductOptions` | \(2) | ++----------------------------+-------+-------------+-------------+------------------------------------+-------+ +| cumulative_product_checked | Unary | Numeric | Numeric | :struct:`CumulativeProductOptions` | \(2) | ++----------------------------+-------+-------------+-------------+------------------------------------+-------+ +| cumulative_min | Unary | Numeric | Numeric | :struct:`CumulativeMinOptions` | \(3) | ++----------------------------+-------+-------------+-------------+------------------------------------+-------+ +| cumulative_max | Unary | Numeric | Numeric | :struct:`CumulativeMaxOptions` | \(4) | ++----------------------------+-------+-------------+-------------+------------------------------------+-------+ * \(1) CumulativeSumOptions has two optional parameters. The first parameter :member:`CumulativeSumOptions::start` is a starting value for the running @@ -1578,6 +1586,30 @@ an ``Invalid`` :class:`Status` when overflow is detected. false (the default), the first encountered null is propagated. When set to true, each null in the input produces a corresponding null in the output. +* \(2) CumulativeProductOptions has two optional parameters. The first parameter + :member:`CumulativeProductOptions::start` is a starting value for the running + product. It has a default value of 1. Specified values of ``start`` must have + the same type as the input. The second parameter + :member:`CumulativeProductOptions::skip_nulls` is a boolean. When set to + false (the default), the first encountered null is propagated. When set to + true, each null in the input produces a corresponding null in the output. + +* \(3) CumulativeMinOptions has two optional parameters. The first parameter + :member:`CumulativeMinOptions::start` is a starting value for the running + minimum. It's default value will be maximum value of the input type. Specified + values of ``start`` must have the same type as the input. The second parameter + :member:`CumulativeMinOptions::skip_nulls` is a boolean. When set to + false (the default), the first encountered null is propagated. When set to + true, each null in the input produces a corresponding null in the output. + +* \(4) CumulativeMaxOptions has two optional parameters. The first parameter + :member:`CumulativeMaxOptions::start` is a starting value for the running + maximum. It's default value will be minimum value of the input type. Specified + values of ``start`` must have the same type as the input. The second parameter + :member:`CumulativeMaxOptions::skip_nulls` is a boolean. When set to + false (the default), the first encountered null is propagated. When set to + true, each null in the input produces a corresponding null in the output. + Associative transforms ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index 4a9208fd31b..93208a7944b 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -59,6 +59,10 @@ throws an ``ArrowInvalid`` exception when overflow is detected. cumulative_sum cumulative_sum_checked + cumulative_product + cumulative_product_checked + cumulative_min + cumulative_max Arithmetic Functions -------------------- @@ -518,6 +522,9 @@ Compute Options CountOptions CountOptions CumulativeSumOptions + CumulativeMaxOptions + CumulativeMinOptions + CumulativeProductOptions DayOfWeekOptions DictionaryEncodeOptions ElementWiseAggregateOptions diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index b9594d90e85..5fc52ea06e0 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -1824,6 +1824,90 @@ class CumulativeSumOptions(_CumulativeSumOptions): self._set_options(start, skip_nulls) +cdef class _CumulativeProductOptions(FunctionOptions): + def _set_options(self, start, skip_nulls): + if not isinstance(start, Scalar): + try: + start = lib.scalar(start) + except Exception: + _raise_invalid_function_option( + start, "`start` type for CumulativeProductOptions", TypeError) + + self.wrapped.reset(new CCumulativeProductOptions(( start).unwrap(), skip_nulls)) + + +class CumulativeProductOptions(_CumulativeProductOptions): + """ + Options for `cumulative_product` function. + + Parameters + ---------- + start : Scalar, default 1.0 + Starting value for product computation + skip_nulls : bool, default False + When false, the first encountered null is propagated. + """ + + def __init__(self, start=1.0, *, skip_nulls=False): + self._set_options(start, skip_nulls) + + +cdef class _CumulativeMinOptions(FunctionOptions): + def _set_options(self, start, skip_nulls): + if not isinstance(start, Scalar): + try: + start = lib.scalar(start) + except Exception: + _raise_invalid_function_option( + start, "`start` type for CumulativeMinOptions", TypeError) + + self.wrapped.reset(new CCumulativeMinOptions(( start).unwrap(), skip_nulls)) + + +class CumulativeMinOptions(_CumulativeMinOptions): + """ + Options for `cumulative_min` function. + + Parameters + ---------- + start : Scalar, default None + Starting value for min computation + skip_nulls : bool, default False + When false, the first encountered null is propagated. + """ + + def __init__(self, start=None, *, skip_nulls=False): + self._set_options(start, skip_nulls) + + +cdef class _CumulativeMaxOptions(FunctionOptions): + def _set_options(self, start, skip_nulls): + if not isinstance(start, Scalar): + try: + start = lib.scalar(start) + except Exception: + _raise_invalid_function_option( + start, "`start` type for CumulativeMaxOptions", TypeError) + + self.wrapped.reset(new CCumulativeMaxOptions(( start).unwrap(), skip_nulls)) + + +class CumulativeMaxOptions(_CumulativeMaxOptions): + """ + Options for `cumulative_max` function. + + Parameters + ---------- + start : Scalar, default None + Starting value for max computation + skip_nulls : bool, default False + When false, the first encountered null is propagated. + """ + + def __init__(self, start=None, *, skip_nulls=False): + self._set_options(start, skip_nulls) + + cdef class _ArraySortOptions(FunctionOptions): def _set_options(self, order, null_placement): self.wrapped.reset(new CArraySortOptions( diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 5873571c5a0..446585a0775 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -34,6 +34,9 @@ CastOptions, CountOptions, CumulativeSumOptions, + CumulativeProductOptions, + CumulativeMinOptions, + CumulativeMaxOptions, DayOfWeekOptions, DictionaryEncodeOptions, ElementWiseAggregateOptions, diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 4cbcef84e88..70efbe35fff 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -2276,6 +2276,24 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: shared_ptr[CScalar] start c_bool skip_nulls + cdef cppclass CCumulativeProductOptions \ + "arrow::compute::CumulativeProductOptions"(CFunctionOptions): + CCumulativeProductOptions(shared_ptr[CScalar] start, c_bool skip_nulls) + shared_ptr[CScalar] start + c_bool skip_nulls + + cdef cppclass CCumulativeMinOptions \ + "arrow::compute::CumulativeMinOptions"(CFunctionOptions): + CCumulativeMinOptions(shared_ptr[CScalar] start, c_bool skip_nulls) + shared_ptr[CScalar] start + c_bool skip_nulls + + cdef cppclass CCumulativeMaxOptions \ + "arrow::compute::CumulativeMaxOptions"(CFunctionOptions): + CCumulativeMaxOptions(shared_ptr[CScalar] start, c_bool skip_nulls) + shared_ptr[CScalar] start + c_bool skip_nulls + cdef cppclass CArraySortOptions \ "arrow::compute::ArraySortOptions"(CFunctionOptions): CArraySortOptions(CSortOrder, CNullPlacement) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 6664f2f824c..95cb4da282e 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -148,6 +148,9 @@ def test_option_class_equality(): pc.PadOptions(5), pc.PartitionNthOptions(1, null_placement="at_start"), pc.CumulativeSumOptions(start=0, skip_nulls=False), + pc.CumulativeProductOptions(start=0, skip_nulls=False), + pc.CumulativeMinOptions(start=0, skip_nulls=False), + pc.CumulativeMaxOptions(start=0, skip_nulls=False), pc.QuantileOptions(), pc.RandomOptions(), pc.RankOptions(sort_keys="ascending", @@ -2605,6 +2608,204 @@ def test_cumulative_sum(start, skip_nulls): pc.cumulative_sum([1, 2, 3], start=strt) +@pytest.mark.parametrize('start', (1.25, 10.5, -10.5)) +@pytest.mark.parametrize('skip_nulls', (True, False)) +def test_cumulative_product(start, skip_nulls): + # Exact tests (e.g., integral types) + start_int = int(start) + starts = [start_int, pa.scalar(start_int, type=pa.int8()), + pa.scalar(start_int, type=pa.int64())] + for strt in starts: + arrays = [ + pa.array([1, 2, 3]), + pa.array([1, None, 3, 4]), + pa.chunked_array([[1, None], [3, 4]]) + ] + expected_arrays = [ + pa.array([1, 2, 6]), + pa.array([1, None, 3, 12]) + if skip_nulls else pa.array([1, None, None, None]), + pa.chunked_array([[1, None, 3, 12]]) + if skip_nulls else pa.chunked_array([[1, None, None, None]]) + ] + for i, arr in enumerate(arrays): + result = pc.cumulative_product( + arr, start=strt, skip_nulls=skip_nulls) + # Add `start` offset to expected array before comparing + expected = pc.multiply(expected_arrays[i], strt) + assert result.equals(expected) + + starts = [start, pa.scalar(start, type=pa.float32()), + pa.scalar(start, type=pa.float64())] + for strt in starts: + arrays = [ + pa.array([1.125, 2.25, 3.03125]), + pa.array([1, np.nan, 2, -3, 4, 5]), + pa.array([1, np.nan, None, 3, None, 5]) + ] + expected_arrays = [ + np.array([1.125, 2.53125, 7.6728515625]), + np.array([1, np.nan, np.nan, np.nan, np.nan, np.nan]), + np.array([1, np.nan, None, np.nan, None, np.nan]) + if skip_nulls else np.array([1, np.nan, None, None, None, None]) + ] + for i, arr in enumerate(arrays): + result = pc.cumulative_product( + arr, start=strt, skip_nulls=skip_nulls) + # Add `start` offset to expected array before comparing + expected = pc.multiply(expected_arrays[i], strt) + np.testing.assert_array_almost_equal(result.to_numpy( + zero_copy_only=False), expected.to_numpy(zero_copy_only=False)) + + for strt in ['a', pa.scalar('arrow'), 1.1]: + with pytest.raises(pa.ArrowInvalid): + pc.cumulative_product([1, 2, 3], start=strt) + + +@pytest.mark.parametrize('start', (1.25, 10.5, -10.5)) +@pytest.mark.parametrize('skip_nulls', (True, False)) +def test_cumulative_max(start, skip_nulls): + # Exact tests (e.g., integral types) + start_int = int(start) + starts = [start_int, pa.scalar(start_int, type=pa.int8()), + pa.scalar(start_int, type=pa.int64())] + for strt in starts: + arrays = [ + pa.array([9, 24, 7]), + pa.array([7, None, 5, 74]), + pa.chunked_array([[7, None], [5, 74]]) + ] + expected_arrays = [ + pa.array([9, 24, 24]), + pa.array([7, None, 7, 74]) + if skip_nulls else pa.array([7, None, None, None]), + pa.chunked_array([[7, None, 7, 74]]) + if skip_nulls else pa.chunked_array([[7, None, None, None]]) + ] + for i, arr in enumerate(arrays): + result = pc.cumulative_max(arr, start=strt, skip_nulls=skip_nulls) + # Add `start` offset to expected array before comparing + expected_values = [] + if isinstance(strt, pa.Scalar): + strt = strt.as_py() + + for val in expected_arrays[i]: + py_val = val.as_py() + maximum = max(py_val, strt) if py_val is not None else None + expected_values.append(maximum) + + if isinstance(expected_arrays[i], pa.Array): + expected = pa.array(expected_values) + else: + expected = pa.chunked_array([expected_values]) + assert result.equals(expected) + + starts = [start, pa.scalar(start, type=pa.float32()), + pa.scalar(start, type=pa.float64())] + for strt in starts: + arrays = [ + pa.array([9.125, 27.25, 7.03125]), + pa.array([1, np.nan, 2, -3, 4, 5]), + pa.array([1, np.nan, None, 3, None, 5]) + ] + expected_arrays = [ + np.array([9.125, 27.25, 27.25]), + np.array([1, 1, 2, 2, 4, 5]), + np.array([1, 1, None, 3, None, 5]) + if skip_nulls else np.array([1, 1, None, None, None, None]) + ] + for i, arr in enumerate(arrays): + result = pc.cumulative_max(arr, start=strt, skip_nulls=skip_nulls) + # Add `start` offset to expected array before comparing + expected_values = [] + if isinstance(strt, pa.Scalar): + strt = strt.as_py() + + for val in expected_arrays[i]: + maximum = max(val, strt) if val is not None else None + expected_values.append(maximum) + + expected = pa.array(expected_values) + np.testing.assert_array_almost_equal(result.to_numpy( + zero_copy_only=False), expected.to_numpy(zero_copy_only=False)) + + for strt in ['a', pa.scalar('arrow'), 1.1]: + with pytest.raises(pa.ArrowInvalid): + pc.cumulative_max([1, 2, 3], start=strt) + + +@pytest.mark.parametrize('start', (1.25, 10.5, -10.5)) +@pytest.mark.parametrize('skip_nulls', (True, False)) +def test_cumulative_min(start, skip_nulls): + # Exact tests (e.g., integral types) + start_int = int(start) + starts = [start_int, pa.scalar(start_int, type=pa.int8()), + pa.scalar(start_int, type=pa.int64())] + for strt in starts: + arrays = [ + pa.array([9, 24, 7]), + pa.array([7, None, 5, 74]), + pa.chunked_array([[7, None], [5, 74]]) + ] + expected_arrays = [ + pa.array([9, 9, 7]), + pa.array([7, None, 5, 5]) + if skip_nulls else pa.array([7, None, None, None]), + pa.chunked_array([[7, None, 5, 5]]) + if skip_nulls else pa.chunked_array([[7, None, None, None]]) + ] + for i, arr in enumerate(arrays): + result = pc.cumulative_min(arr, start=strt, skip_nulls=skip_nulls) + # Add `start` offset to expected array before comparing + expected_values = [] + if isinstance(strt, pa.Scalar): + strt = strt.as_py() + + for val in expected_arrays[i]: + py_val = val.as_py() + minimum = min(py_val, strt) if py_val is not None else None + expected_values.append(minimum) + + if isinstance(expected_arrays[i], pa.Array): + expected = pa.array(expected_values) + else: + expected = pa.chunked_array([expected_values]) + assert result.equals(expected) + + starts = [start, pa.scalar(start, type=pa.float32()), + pa.scalar(start, type=pa.float64())] + for strt in starts: + arrays = [ + pa.array([9.125, 27.25, 7.03125]), + pa.array([1, np.nan, 2, -3, 4, 5]), + pa.array([1, np.nan, None, 3, None, 5]) + ] + expected_arrays = [ + np.array([9.125, 9.125, 7.03125]), + np.array([1, 1, 1, -3, -3, -3]), + np.array([1, 1, None, 1, None, 1]) + if skip_nulls else np.array([1, 1, None, None, None, None]) + ] + for i, arr in enumerate(arrays): + result = pc.cumulative_min(arr, start=strt, skip_nulls=skip_nulls) + # Add `start` offset to expected array before comparing + expected_values = [] + if isinstance(strt, pa.Scalar): + strt = strt.as_py() + + for val in expected_arrays[i]: + minimum = min(val, strt) if val is not None else None + expected_values.append(minimum) + + expected = pa.array(expected_values) + np.testing.assert_array_almost_equal(result.to_numpy( + zero_copy_only=False), expected.to_numpy(zero_copy_only=False)) + + for strt in ['a', pa.scalar('arrow'), 1.1]: + with pytest.raises(pa.ArrowInvalid): + pc.cumulative_min([1, 2, 3], start=strt) + + def test_make_struct(): assert pc.make_struct(1, 'a').as_py() == {'0': 1, '1': 'a'}