From c664f8ef81386299ce9a0fa0715fb7a93d740d46 Mon Sep 17 00:00:00 2001 From: JabariBooker Date: Tue, 21 Jun 2022 19:39:20 -0400 Subject: [PATCH 01/13] Adding CumulativeProduct compute function --- cpp/src/arrow/compute/api_vector.cc | 25 ++++++++++++++++ cpp/src/arrow/compute/api_vector.h | 29 ++++++++++++++++++- .../compute/kernels/vector_cumulative_ops.cc | 25 +++++++++++++++- cpp/src/arrow/compute/registry.cc | 2 +- cpp/src/arrow/compute/registry_internal.h | 2 +- python/pyarrow/_compute.pyx | 28 ++++++++++++++++++ python/pyarrow/compute.py | 1 + python/pyarrow/includes/libarrow.pxd | 6 ++++ 8 files changed, 114 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index 4ebdecf5e78..b35c705d8c1 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -161,6 +161,10 @@ static auto kCumulativeSumOptionsType = GetFunctionOptionsType( + DataMember("start", &CumulativeProductOptions::start), + DataMember("skip_nulls", &CumulativeProductOptions::skip_nulls), + DataMember("check_overflow", &CumulativeProductOptions::check_overflow)); static auto kRankOptionsType = GetFunctionOptionsType( DataMember("sort_keys", &RankOptions::sort_keys), DataMember("null_placement", &RankOptions::null_placement), @@ -218,6 +222,18 @@ CumulativeSumOptions::CumulativeSumOptions(std::shared_ptr start, bool s check_overflow(check_overflow) {} constexpr char CumulativeSumOptions::kTypeName[]; +CumulativeProductOptions::CumulativeProductOptions(double start, bool skip_nulls, + bool check_overflow) + : CumulativeProductOptions(std::make_shared(start), skip_nulls, + check_overflow) {} +CumulativeProductOptions::CumulativeProductOptions(std::shared_ptr start, bool skip_nulls, + bool check_overflow) + : FunctionOptions(internal::kCumulativeProductOptionsType), + start(std::move(start)), + skip_nulls(skip_nulls), + check_overflow(check_overflow) {} +constexpr char CumulativeProductOptions::kTypeName[]; + RankOptions::RankOptions(std::vector sort_keys, NullPlacement null_placement, RankOptions::Tiebreaker tiebreaker) : FunctionOptions(internal::kRankOptionsType), @@ -236,6 +252,7 @@ void RegisterVectorOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kSelectKOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeSumOptionsType)); + DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeProductOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kRankOptionsType)); } } // namespace internal @@ -383,6 +400,14 @@ Result CumulativeSum(const Datum& values, const CumulativeSumOptions& opt return CallFunction(func_name, {Datum(values)}, &options, ctx); } +Result CumulativeProduct(const Datum& values, + const CumulativeProductOptions& options, + ExecContext* ctx) { + auto func_name = + (options.check_overflow) ? "cumulative_product_checked" : "cumulative_product"; + return CallFunction(func_name, {Datum(values)}, &options, ctx); +} + // ---------------------------------------------------------------------- // Deprecated functions diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 88331b6e592..1dc9e1649cd 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -235,7 +235,28 @@ class ARROW_EXPORT CumulativeSumOptions : public FunctionOptions { static constexpr char const kTypeName[] = "CumulativeSumOptions"; static CumulativeSumOptions Defaults() { return CumulativeSumOptions(); } - /// Optional starting value for cumulative operation computation + /// Optional starting value for cumulative sum + std::shared_ptr start; + + /// If true, nulls in the input are ignored and produce a corresponding null output. + /// When false, the first null encountered is propagated through the remaining output. + bool skip_nulls = false; + + /// When true, returns an Invalid Status when overflow is detected + bool check_overflow = false; +}; + +/// \brief Options for cumulative product function +class ARROW_EXPORT CumulativeProductOptions : public FunctionOptions { + public: + explicit CumulativeProductOptions(double start = 1, bool skip_nulls = false, + bool check_overflow = false); + explicit CumulativeProductOptions(std::shared_ptr start, bool skip_nulls = false, + bool check_overflow = false); + static constexpr char const kTypeName[] = "CumulativeProductOptions"; + static CumulativeProductOptions Defaults() { return CumulativeProductOptions(); } + + /// Optional starting value for cumulative product std::shared_ptr start; /// If true, nulls in the input are ignored and produce a corresponding null output. @@ -586,6 +607,12 @@ Result CumulativeSum( const CumulativeSumOptions& options = CumulativeSumOptions::Defaults(), ExecContext* ctx = NULLPTR); +ARROW_EXPORT +Result CumulativeProduct( + const Datum& values, + const CumulativeProductOptions& options = CumulativeProductOptions::Defaults(), + ExecContext* ctx = NULLPTR); + // ---------------------------------------------------------------------- // Deprecated functions diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc index 241438c529e..df260c26b86 100644 --- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc +++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc @@ -179,6 +179,24 @@ const FunctionDoc cumulative_sum_checked_doc{ "function \"cumulative_sum\"."), {"values"}, "CumulativeSumOptions"}; + +const FunctionDoc cumulative_product_doc{ + "Compute the cumulative product over a numeric input", + ("`values` must be numeric. Return an array/chunked array which is the\n" + "cumulative product computed over `values`. Results will wrap around on\n" + "integer overflow. Use function \"cumulative_product_checked\" if you want\n" + "overflow to return an error."), + {"values"}, + "CumulativeProductOptions"}; + +const FunctionDoc cumulative_product_checked_doc{ + "Compute the cumulative product over a numeric input", + ("`values` must be numeric. Return an array/chunked array which is the\n" + "cumulative product computed over `values`. This function returns an error\n" + "on overflow. For a variant that doesn't fail on overflow, use\n" + "function \"cumulative_product\"."), + {"values"}, + "CumulativeProductOptions"}; } // namespace template @@ -210,11 +228,16 @@ void MakeVectorCumulativeFunction(FunctionRegistry* registry, const std::string DCHECK_OK(registry->AddFunction(std::move(func))); } -void RegisterVectorCumulativeSum(FunctionRegistry* registry) { +void RegisterVectorCumulativeOps(FunctionRegistry* registry) { MakeVectorCumulativeFunction(registry, "cumulative_sum", cumulative_sum_doc); MakeVectorCumulativeFunction( registry, "cumulative_sum_checked", cumulative_sum_checked_doc); + + MakeVectorCumulativeFunction(registry, "cumulative_product", + cumulative_product_doc); + MakeVectorCumulativeFunction( + registry, "cumulative_product_checked", cumulative_product_checked_doc); } } // namespace internal diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc index fe7c6fa8ad1..43857dc8f10 100644 --- a/cpp/src/arrow/compute/registry.cc +++ b/cpp/src/arrow/compute/registry.cc @@ -291,7 +291,7 @@ static std::unique_ptr CreateBuiltInRegistry() { // Vector functions RegisterVectorArraySort(registry.get()); - RegisterVectorCumulativeSum(registry.get()); + RegisterVectorCumulativeOps(registry.get()); RegisterVectorHash(registry.get()); RegisterVectorNested(registry.get()); RegisterVectorReplace(registry.get()); diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h index 38f81e98889..1ee44ab4406 100644 --- a/cpp/src/arrow/compute/registry_internal.h +++ b/cpp/src/arrow/compute/registry_internal.h @@ -43,7 +43,7 @@ void RegisterScalarOptions(FunctionRegistry* registry); // Vector functions void RegisterVectorArraySort(FunctionRegistry* registry); -void RegisterVectorCumulativeSum(FunctionRegistry* registry); +void RegisterVectorCumulativeOps(FunctionRegistry* registry); void RegisterVectorHash(FunctionRegistry* registry); void RegisterVectorNested(FunctionRegistry* registry); void RegisterVectorReplace(FunctionRegistry* registry); diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 04b57859ad4..3987b0d5a65 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -1824,6 +1824,34 @@ class CumulativeSumOptions(_CumulativeSumOptions): self._set_options(start, skip_nulls) +cdef class _CumulativeProductOptions(FunctionOptions): + def _set_options(self, start, skip_nulls): + if not isinstance(start, Scalar): + try: + start = lib.scalar(start) + except Exception: + _raise_invalid_function_option( + start, "`start` type for CumulativeProductOptions", TypeError) + + self.wrapped.reset(new CCumulativeProductOptions(( start).unwrap(), skip_nulls)) + + +class CumulativeProductOptions(_CumulativeProductOptions): + """ + Options for `cumulative_product` function. + + Parameters + ---------- + start : Scalar, default 1.0 + Starting value for product computation + skip_nulls : bool, default False + When false, the first encountered null is propagated. + """ + + def __init__(self, start=1.0, *, skip_nulls=False): + self._set_options(start, skip_nulls) + + cdef class _ArraySortOptions(FunctionOptions): def _set_options(self, order, null_placement): self.wrapped.reset(new CArraySortOptions( diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 526f0e4f7b7..bc69d39e3f5 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -34,6 +34,7 @@ CastOptions, CountOptions, CumulativeSumOptions, + CumulativeProductOptions, DayOfWeekOptions, DictionaryEncodeOptions, ElementWiseAggregateOptions, diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 302ac99c36a..6b68dd0e5b1 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -2265,6 +2265,12 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: shared_ptr[CScalar] start c_bool skip_nulls + cdef cppclass CCumulativeProductOptions \ + "arrow::compute::CumulativeProductOptions"(CFunctionOptions): + CCumulativeProductOptions(shared_ptr[CScalar] start, c_bool skip_nulls) + shared_ptr[CScalar] start + c_bool skip_nulls + cdef cppclass CArraySortOptions \ "arrow::compute::ArraySortOptions"(CFunctionOptions): CArraySortOptions(CSortOrder, CNullPlacement) From b7605dd7f459c1527a89ac0712077c2f9cb5ff20 Mon Sep 17 00:00:00 2001 From: JabariBooker Date: Tue, 21 Jun 2022 19:56:50 -0400 Subject: [PATCH 02/13] Correcting linting errors --- cpp/src/arrow/compute/api_vector.cc | 17 +++++++++-------- cpp/src/arrow/compute/api_vector.h | 4 ++-- .../compute/kernels/vector_cumulative_ops.cc | 4 ++-- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index b35c705d8c1..9593d34629b 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -161,10 +161,11 @@ static auto kCumulativeSumOptionsType = GetFunctionOptionsType( - DataMember("start", &CumulativeProductOptions::start), - DataMember("skip_nulls", &CumulativeProductOptions::skip_nulls), - DataMember("check_overflow", &CumulativeProductOptions::check_overflow)); +static auto kCumulativeProductOptionsType = + GetFunctionOptionsType( + DataMember("start", &CumulativeProductOptions::start), + DataMember("skip_nulls", &CumulativeProductOptions::skip_nulls), + DataMember("check_overflow", &CumulativeProductOptions::check_overflow)); static auto kRankOptionsType = GetFunctionOptionsType( DataMember("sort_keys", &RankOptions::sort_keys), DataMember("null_placement", &RankOptions::null_placement), @@ -223,11 +224,11 @@ CumulativeSumOptions::CumulativeSumOptions(std::shared_ptr start, bool s constexpr char CumulativeSumOptions::kTypeName[]; CumulativeProductOptions::CumulativeProductOptions(double start, bool skip_nulls, - bool check_overflow) + bool check_overflow) : CumulativeProductOptions(std::make_shared(start), skip_nulls, - check_overflow) {} -CumulativeProductOptions::CumulativeProductOptions(std::shared_ptr start, bool skip_nulls, - bool check_overflow) + check_overflow) {} +CumulativeProductOptions::CumulativeProductOptions(std::shared_ptr start, + bool skip_nulls, bool check_overflow) : FunctionOptions(internal::kCumulativeProductOptionsType), start(std::move(start)), skip_nulls(skip_nulls), diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 1dc9e1649cd..4d6c0af19f1 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -251,8 +251,8 @@ class ARROW_EXPORT CumulativeProductOptions : public FunctionOptions { public: explicit CumulativeProductOptions(double start = 1, bool skip_nulls = false, bool check_overflow = false); - explicit CumulativeProductOptions(std::shared_ptr start, bool skip_nulls = false, - bool check_overflow = false); + explicit CumulativeProductOptions(std::shared_ptr start, + bool skip_nulls = false, bool check_overflow = false); static constexpr char const kTypeName[] = "CumulativeProductOptions"; static CumulativeProductOptions Defaults() { return CumulativeProductOptions(); } diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc index df260c26b86..45b45bec3cc 100644 --- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc +++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc @@ -234,8 +234,8 @@ void RegisterVectorCumulativeOps(FunctionRegistry* registry) { MakeVectorCumulativeFunction( registry, "cumulative_sum_checked", cumulative_sum_checked_doc); - MakeVectorCumulativeFunction(registry, "cumulative_product", - cumulative_product_doc); + MakeVectorCumulativeFunction( + registry, "cumulative_product", cumulative_product_doc); MakeVectorCumulativeFunction( registry, "cumulative_product_checked", cumulative_product_checked_doc); } From e197309cdac38e9fde29f6578c5f7484ebfc73a6 Mon Sep 17 00:00:00 2001 From: JabariBooker Date: Sun, 26 Jun 2022 21:31:56 -0400 Subject: [PATCH 03/13] Added CumulativeMin and CumulativeMax compute functions --- cpp/src/arrow/compute/api_vector.cc | 49 +++++++++-- cpp/src/arrow/compute/api_vector.h | 56 +++++++++++++ .../kernels/base_arithmetic_internal.h | 49 +++++++++++ .../arrow/compute/kernels/hash_aggregate.cc | 38 +-------- .../compute/kernels/vector_cumulative_ops.cc | 82 ++++++++++++++----- python/pyarrow/_compute.pyx | 56 +++++++++++++ python/pyarrow/compute.py | 2 + python/pyarrow/includes/libarrow.pxd | 12 +++ 8 files changed, 280 insertions(+), 64 deletions(-) diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index 9593d34629b..869fca7acc1 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -166,6 +166,12 @@ static auto kCumulativeProductOptionsType = DataMember("start", &CumulativeProductOptions::start), DataMember("skip_nulls", &CumulativeProductOptions::skip_nulls), DataMember("check_overflow", &CumulativeProductOptions::check_overflow)); +static auto kCumulativeMinOptionsType = GetFunctionOptionsType( + DataMember("start", &CumulativeMinOptions::start), + DataMember("skip_nulls", &CumulativeMinOptions::skip_nulls)); +static auto kCumulativeMaxOptionsType = GetFunctionOptionsType( + DataMember("start", &CumulativeMaxOptions::start), + DataMember("skip_nulls", &CumulativeMaxOptions::skip_nulls)); static auto kRankOptionsType = GetFunctionOptionsType( DataMember("sort_keys", &RankOptions::sort_keys), DataMember("null_placement", &RankOptions::null_placement), @@ -235,6 +241,26 @@ CumulativeProductOptions::CumulativeProductOptions(std::shared_ptr start check_overflow(check_overflow) {} constexpr char CumulativeProductOptions::kTypeName[]; +CumulativeMinOptions::CumulativeMinOptions(bool skip_nulls) + : FunctionOptions(internal::kCumulativeMinOptionsType), skip_nulls(skip_nulls) {} +CumulativeMinOptions::CumulativeMinOptions(double start, bool skip_nulls) + : CumulativeMinOptions(std::make_shared(start), skip_nulls) {} +CumulativeMinOptions::CumulativeMinOptions(std::shared_ptr start, bool skip_nulls) + : FunctionOptions(internal::kCumulativeMinOptionsType), + start(std::move(start)), + skip_nulls(skip_nulls) {} +constexpr char CumulativeMinOptions::kTypeName[]; + +CumulativeMaxOptions::CumulativeMaxOptions(bool skip_nulls) + : FunctionOptions(internal::kCumulativeMaxOptionsType), skip_nulls(skip_nulls) {} +CumulativeMaxOptions::CumulativeMaxOptions(double start, bool skip_nulls) + : CumulativeMaxOptions(std::make_shared(start), skip_nulls) {} +CumulativeMaxOptions::CumulativeMaxOptions(std::shared_ptr start, bool skip_nulls) + : FunctionOptions(internal::kCumulativeMaxOptionsType), + start(std::move(start)), + skip_nulls(skip_nulls) {} +constexpr char CumulativeMaxOptions::kTypeName[]; + RankOptions::RankOptions(std::vector sort_keys, NullPlacement null_placement, RankOptions::Tiebreaker tiebreaker) : FunctionOptions(internal::kRankOptionsType), @@ -254,6 +280,8 @@ void RegisterVectorOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kSelectKOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeSumOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeProductOptionsType)); + DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeMinOptionsType)); + DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeMaxOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kRankOptionsType)); } } // namespace internal @@ -397,16 +425,27 @@ Result> DropNull(const Array& values, ExecContext* ctx) { Result CumulativeSum(const Datum& values, const CumulativeSumOptions& options, ExecContext* ctx) { - auto func_name = (options.check_overflow) ? "cumulative_sum_checked" : "cumulative_sum"; - return CallFunction(func_name, {Datum(values)}, &options, ctx); + return CallFunction( + options.check_overflow ? "cumulative_sum_checked" : "cumulative_sum", + {Datum(values)}, &options, ctx); } Result CumulativeProduct(const Datum& values, const CumulativeProductOptions& options, ExecContext* ctx) { - auto func_name = - (options.check_overflow) ? "cumulative_product_checked" : "cumulative_product"; - return CallFunction(func_name, {Datum(values)}, &options, ctx); + return CallFunction( + options.check_overflow ? "cumulative_product_checked" : "cumulative_product", + {Datum(values)}, &options, ctx); +} + +Result CumulativeMin(const Datum& values, const CumulativeMinOptions& options, + ExecContext* ctx) { + return CallFunction("cumulative_min", {Datum(values)}, &options, ctx); +} + +Result CumulativeMax(const Datum& values, const CumulativeMaxOptions& options, + ExecContext* ctx) { + return CallFunction("cumulative_max", {Datum(values)}, &options, ctx); } // ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 4d6c0af19f1..2ecd5671a0c 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -235,6 +235,8 @@ class ARROW_EXPORT CumulativeSumOptions : public FunctionOptions { static constexpr char const kTypeName[] = "CumulativeSumOptions"; static CumulativeSumOptions Defaults() { return CumulativeSumOptions(); } + const bool is_minmax = false; + /// Optional starting value for cumulative sum std::shared_ptr start; @@ -256,6 +258,8 @@ class ARROW_EXPORT CumulativeProductOptions : public FunctionOptions { static constexpr char const kTypeName[] = "CumulativeProductOptions"; static CumulativeProductOptions Defaults() { return CumulativeProductOptions(); } + const bool is_minmax = false; + /// Optional starting value for cumulative product std::shared_ptr start; @@ -267,6 +271,46 @@ class ARROW_EXPORT CumulativeProductOptions : public FunctionOptions { bool check_overflow = false; }; +/// \brief Options for cumulative min functions +class ARROW_EXPORT CumulativeMinOptions : public FunctionOptions { + public: + explicit CumulativeMinOptions(bool skip_nulls = false); + explicit CumulativeMinOptions(double start, bool skip_nulls = false); + explicit CumulativeMinOptions(std::shared_ptr start, bool skip_nulls = false); + static constexpr char const kTypeName[] = "CumulativeMinOptions"; + static CumulativeMinOptions Defaults() { return CumulativeMinOptions(); } + + const bool is_minmax = true; + const bool is_max = false; + + /// Optional starting value for cumulative min + std::shared_ptr start; + + /// If true, nulls in the input are ignored and produce a corresponding null output. + /// When false, the first null encountered is propagated through the remaining output. + bool skip_nulls = false; +}; + +/// \brief Options for cumulative max functions +class ARROW_EXPORT CumulativeMaxOptions : public FunctionOptions { + public: + explicit CumulativeMaxOptions(bool skip_nulls = false); + explicit CumulativeMaxOptions(double start, bool skip_nulls = false); + explicit CumulativeMaxOptions(std::shared_ptr start, bool skip_nulls = false); + static constexpr char const kTypeName[] = "CumulativeMaxOptions"; + static CumulativeMaxOptions Defaults() { return CumulativeMaxOptions(); } + + const bool is_minmax = true; + const bool is_max = true; + + /// Optional starting value for cumulative max + std::shared_ptr start; + + /// If true, nulls in the input are ignored and produce a corresponding null output. + /// When false, the first null encountered is propagated through the remaining output. + bool skip_nulls = false; +}; + /// @} /// \brief Filter with a boolean selection filter @@ -613,6 +657,18 @@ Result CumulativeProduct( const CumulativeProductOptions& options = CumulativeProductOptions::Defaults(), ExecContext* ctx = NULLPTR); +ARROW_EXPORT +Result CumulativeMin( + const Datum& values, + const CumulativeMinOptions& options = CumulativeMinOptions::Defaults(), + ExecContext* ctx = NULLPTR); + +ARROW_EXPORT +Result CumulativeMax( + const Datum& values, + const CumulativeMaxOptions& options = CumulativeMaxOptions::Defaults(), + ExecContext* ctx = NULLPTR); + // ---------------------------------------------------------------------- // Deprecated functions diff --git a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h index f416881ccb8..49da6117b89 100644 --- a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h +++ b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h @@ -597,6 +597,55 @@ struct Sign { } }; +struct Min { + template + static constexpr T Call(KernelContext*, Arg0 left, Arg1 right, Status*) { + return (left < right) ? left : right; + } +}; +struct Max { + template + static constexpr T Call(KernelContext*, Arg0 left, Arg1 right, Status*) { + return (left > right) ? left : right; + } +}; + +template +struct AntiExtrema { + static constexpr CType anti_min() { return std::numeric_limits::max(); } + static constexpr CType anti_max() { return std::numeric_limits::min(); } +}; + +template <> +struct AntiExtrema { + static constexpr bool anti_min() { return true; } + static constexpr bool anti_max() { return false; } +}; + +template <> +struct AntiExtrema { + static constexpr float anti_min() { return std::numeric_limits::infinity(); } + static constexpr float anti_max() { return -std::numeric_limits::infinity(); } +}; + +template <> +struct AntiExtrema { + static constexpr double anti_min() { return std::numeric_limits::infinity(); } + static constexpr double anti_max() { return -std::numeric_limits::infinity(); } +}; + +template <> +struct AntiExtrema { + static constexpr Decimal128 anti_min() { return BasicDecimal128::GetMaxSentinel(); } + static constexpr Decimal128 anti_max() { return BasicDecimal128::GetMinSentinel(); } +}; + +template <> +struct AntiExtrema { + static constexpr Decimal256 anti_min() { return BasicDecimal256::GetMaxSentinel(); } + static constexpr Decimal256 anti_max() { return BasicDecimal256::GetMinSentinel(); } +}; + } // namespace internal } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc index de632935955..aecadc3c5de 100644 --- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc +++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc @@ -36,6 +36,7 @@ #include "arrow/compute/kernel.h" #include "arrow/compute/kernels/aggregate_internal.h" #include "arrow/compute/kernels/aggregate_var_std_internal.h" +#include "arrow/compute/kernels/base_arithmetic_internal.h" #include "arrow/compute/kernels/common.h" #include "arrow/compute/kernels/row_encoder.h" #include "arrow/compute/kernels/util_internal.h" @@ -1204,43 +1205,6 @@ HashAggregateKernel MakeApproximateMedianKernel(HashAggregateFunction* tdigest_f // ---------------------------------------------------------------------- // MinMax implementation - -template -struct AntiExtrema { - static constexpr CType anti_min() { return std::numeric_limits::max(); } - static constexpr CType anti_max() { return std::numeric_limits::min(); } -}; - -template <> -struct AntiExtrema { - static constexpr bool anti_min() { return true; } - static constexpr bool anti_max() { return false; } -}; - -template <> -struct AntiExtrema { - static constexpr float anti_min() { return std::numeric_limits::infinity(); } - static constexpr float anti_max() { return -std::numeric_limits::infinity(); } -}; - -template <> -struct AntiExtrema { - static constexpr double anti_min() { return std::numeric_limits::infinity(); } - static constexpr double anti_max() { return -std::numeric_limits::infinity(); } -}; - -template <> -struct AntiExtrema { - static constexpr Decimal128 anti_min() { return BasicDecimal128::GetMaxSentinel(); } - static constexpr Decimal128 anti_max() { return BasicDecimal128::GetMinSentinel(); } -}; - -template <> -struct AntiExtrema { - static constexpr Decimal256 anti_min() { return BasicDecimal256::GetMaxSentinel(); } - static constexpr Decimal256 anti_max() { return BasicDecimal256::GetMinSentinel(); } -}; - template struct GroupedMinMaxImpl final : public GroupedAggregator { using CType = typename TypeTraits::CType; diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc index 45b45bec3cc..2bbd4c3d319 100644 --- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc +++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc @@ -33,9 +33,10 @@ namespace internal { namespace { -template +template struct CumulativeOptionsWrapper : public OptionsWrapper { - using State = CumulativeOptionsWrapper; + using State = CumulativeOptionsWrapper; + using CType = typename TypeTraits::CType; explicit CumulativeOptionsWrapper(OptionsType options) : OptionsWrapper(std::move(options)) {} @@ -50,6 +51,14 @@ struct CumulativeOptionsWrapper : public OptionsWrapper { const auto& start = options->start; if (!start || !start->is_valid) { + if (options->is_minmax) { + auto new_scalar = std::make_shared>( + (options->is_max) ? AntiExtrema::anti_max() + : AntiExtrema::anti_min()); + + OptionsType new_options(new_scalar, options->skip_nulls); + return ::arrow::internal::make_unique(new_options); + } return Status::Invalid("Cumulative `start` option must be non-null and valid"); } @@ -119,7 +128,7 @@ struct Accumulator { template struct CumulativeKernel { static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { - const auto& options = CumulativeOptionsWrapper::Get(ctx); + const auto& options = CumulativeOptionsWrapper::Get(ctx); Accumulator accumulator(ctx); accumulator.current_value = UnboxScalar::Unbox(*(options.start)); accumulator.skip_nulls = options.skip_nulls; @@ -144,7 +153,7 @@ struct CumulativeKernel { template struct CumulativeKernelChunked { static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { - const auto& options = CumulativeOptionsWrapper::Get(ctx); + const auto& options = CumulativeOptionsWrapper::Get(ctx); Accumulator accumulator(ctx); accumulator.current_value = UnboxScalar::Unbox(*(options.start)); accumulator.skip_nulls = options.skip_nulls; @@ -197,8 +206,40 @@ const FunctionDoc cumulative_product_checked_doc{ "function \"cumulative_product\"."), {"values"}, "CumulativeProductOptions"}; + +const FunctionDoc cumulative_min_doc{ + "Compute the cumulative min over a numeric input", + ("`values` must be numeric. Return an array/chunked array which is the\n" + "cumulative min computed over `values`."), + {"values"}, + "CumulativeMinOptions"}; + +const FunctionDoc cumulative_max_doc{ + "Compute the cumulative max over a numeric input", + ("`values` must be numeric. Return an array/chunked array which is the\n" + "cumulative max computed over `values`."), + {"values"}, + "CumulativeMaxOptions"}; } // namespace +template +void AddCumulativeVectorKernel(std::shared_ptr& func) { + VectorKernel kernel; + auto ty = TypeTraits::type_singleton(); + + kernel.can_execute_chunkwise = false; + kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE; + kernel.mem_allocation = MemAllocation::type::NO_PREALLOCATE; + kernel.signature = + KernelSignature::Make({InputType::Array(ty)}, OutputType(ValueDescr(ty))); + kernel.exec = + ArithmeticExecFromOp(ty); + kernel.exec_chunked = ArithmeticExecFromOp(ty); + kernel.init = CumulativeOptionsWrapper::Init; + DCHECK_OK(func->AddKernel(std::move(kernel))); +} + template void MakeVectorCumulativeFunction(FunctionRegistry* registry, const std::string func_name, const FunctionDoc doc) { @@ -206,24 +247,16 @@ void MakeVectorCumulativeFunction(FunctionRegistry* registry, const std::string auto func = std::make_shared(func_name, Arity::Unary(), doc, &kDefaultOptions); - std::vector> types; - types.insert(types.end(), NumericTypes().begin(), NumericTypes().end()); - - for (const auto& ty : types) { - VectorKernel kernel; - kernel.can_execute_chunkwise = false; - kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE; - kernel.mem_allocation = MemAllocation::type::NO_PREALLOCATE; - kernel.signature = - KernelSignature::Make({InputType::Array(ty)}, OutputType(ValueDescr(ty))); - kernel.exec = - ArithmeticExecFromOp(ty); - kernel.exec_chunked = - ArithmeticExecFromOp(ty); - kernel.init = CumulativeOptionsWrapper::Init; - DCHECK_OK(func->AddKernel(std::move(kernel))); - } + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); + AddCumulativeVectorKernel(func); DCHECK_OK(registry->AddFunction(std::move(func))); } @@ -238,6 +271,11 @@ void RegisterVectorCumulativeOps(FunctionRegistry* registry) { registry, "cumulative_product", cumulative_product_doc); MakeVectorCumulativeFunction( registry, "cumulative_product_checked", cumulative_product_checked_doc); + + MakeVectorCumulativeFunction(registry, "cumulative_min", + cumulative_min_doc); + MakeVectorCumulativeFunction(registry, "cumulative_max", + cumulative_max_doc); } } // namespace internal diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 3987b0d5a65..429f08a54c3 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -1852,6 +1852,62 @@ class CumulativeProductOptions(_CumulativeProductOptions): self._set_options(start, skip_nulls) +cdef class _CumulativeMinOptions(FunctionOptions): + def _set_options(self, start, skip_nulls): + if not isinstance(start, Scalar): + try: + start = lib.scalar(start) + except Exception: + _raise_invalid_function_option( + start, "`start` type for CumulativeMinOptions", TypeError) + + self.wrapped.reset(new CCumulativeMinOptions(( start).unwrap(), skip_nulls)) + + +class CumulativeMinOptions(_CumulativeMinOptions): + """ + Options for `cumulative_min` function. + + Parameters + ---------- + start : Scalar, default None + Starting value for min computation + skip_nulls : bool, default False + When false, the first encountered null is propagated. + """ + + def __init__(self, start=None, *, skip_nulls=False): + self._set_options(start, skip_nulls) + + +cdef class _CumulativeMaxOptions(FunctionOptions): + def _set_options(self, start, skip_nulls): + if not isinstance(start, Scalar): + try: + start = lib.scalar(start) + except Exception: + _raise_invalid_function_option( + start, "`start` type for CumulativeMaxOptions", TypeError) + + self.wrapped.reset(new CCumulativeMaxOptions(( start).unwrap(), skip_nulls)) + + +class CumulativeMaxOptions(_CumulativeMaxOptions): + """ + Options for `cumulative_max` function. + + Parameters + ---------- + start : Scalar, default None + Starting value for max computation + skip_nulls : bool, default False + When false, the first encountered null is propagated. + """ + + def __init__(self, start=None, *, skip_nulls=False): + self._set_options(start, skip_nulls) + + cdef class _ArraySortOptions(FunctionOptions): def _set_options(self, order, null_placement): self.wrapped.reset(new CArraySortOptions( diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index bc69d39e3f5..ae887cba7d7 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -35,6 +35,8 @@ CountOptions, CumulativeSumOptions, CumulativeProductOptions, + CumulativeMinOptions, + CumulativeMaxOptions, DayOfWeekOptions, DictionaryEncodeOptions, ElementWiseAggregateOptions, diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 6b68dd0e5b1..74ff9a2a65b 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -2271,6 +2271,18 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: shared_ptr[CScalar] start c_bool skip_nulls + cdef cppclass CCumulativeMinOptions \ + "arrow::compute::CumulativeMinOptions"(CFunctionOptions): + CCumulativeMinOptions(shared_ptr[CScalar] start, c_bool skip_nulls) + shared_ptr[CScalar] start + c_bool skip_nulls + + cdef cppclass CCumulativeMaxOptions \ + "arrow::compute::CumulativeMaxOptions"(CFunctionOptions): + CCumulativeMaxOptions(shared_ptr[CScalar] start, c_bool skip_nulls) + shared_ptr[CScalar] start + c_bool skip_nulls + cdef cppclass CArraySortOptions \ "arrow::compute::ArraySortOptions"(CFunctionOptions): CArraySortOptions(CSortOrder, CNullPlacement) From 6e451ee8bf82ad03d124657a84cf0d9ff92b043c Mon Sep 17 00:00:00 2001 From: JabariBooker Date: Sun, 26 Jun 2022 21:57:28 -0400 Subject: [PATCH 04/13] Missing variable in CumulativeSum and -Product --- cpp/src/arrow/compute/api_vector.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 2ecd5671a0c..7e45172c824 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -236,6 +236,7 @@ class ARROW_EXPORT CumulativeSumOptions : public FunctionOptions { static CumulativeSumOptions Defaults() { return CumulativeSumOptions(); } const bool is_minmax = false; + const bool is_max = false; /// Optional starting value for cumulative sum std::shared_ptr start; @@ -259,6 +260,7 @@ class ARROW_EXPORT CumulativeProductOptions : public FunctionOptions { static CumulativeProductOptions Defaults() { return CumulativeProductOptions(); } const bool is_minmax = false; + const bool is_max = false; /// Optional starting value for cumulative product std::shared_ptr start; From 10608843417dbce22facfdb0c68e10b0eac6c532 Mon Sep 17 00:00:00 2001 From: JabariBooker Date: Fri, 1 Jul 2022 16:50:16 -0400 Subject: [PATCH 05/13] Updating docs for CumulativeProduct, -Min, and -Max --- docs/source/cpp/compute.rst | 46 +++++++++++++++++++++++++----- docs/source/python/api/compute.rst | 7 +++++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index a354f42a4b1..276ad666b2a 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -1562,13 +1562,21 @@ numeric type. By default these functions do not detect overflow. They are also available in an overflow-checking variant, suffixed ``_checked``, which returns an ``Invalid`` :class:`Status` when overflow is detected. -+------------------------+-------+-------------+-------------+--------------------------------+-------+ -| Function name | Arity | Input types | Output type | Options class | Notes | -+========================+=======+=============+=============+================================+=======+ -| cumulative_sum | Unary | Numeric | Numeric | :struct:`CumulativeSumOptions` | \(1) | -+------------------------+-------+-------------+-------------+--------------------------------+-------+ -| cumulative_sum_checked | Unary | Numeric | Numeric | :struct:`CumulativeSumOptions` | \(1) | -+------------------------+-------+-------------+-------------+--------------------------------+-------+ ++----------------------------+-------+-------------+-------------+------------------------------------+-------+ +| Function name | Arity | Input types | Output type | Options class | Notes | ++============================+=======+=============+=============+====================================+=======+ +| cumulative_sum | Unary | Numeric | Numeric | :struct:`CumulativeSumOptions` | \(1) | ++----------------------------+-------+-------------+-------------+------------------------------------+-------+ +| cumulative_sum_checked | Unary | Numeric | Numeric | :struct:`CumulativeSumOptions` | \(1) | ++----------------------------+-------+-------------+-------------+------------------------------------+-------+ +| cumulative_product | Unary | Numeric | Numeric | :struct:`CumulativeProductOptions` | \(2) | ++----------------------------+-------+-------------+-------------+------------------------------------+-------+ +| cumulative_product_checked | Unary | Numeric | Numeric | :struct:`CumulativeProductOptions` | \(2) | ++----------------------------+-------+-------------+-------------+------------------------------------+-------+ +| cumulative_min | Unary | Numeric | Numeric | :struct:`CumulativeMinOptions` | \(3) | ++----------------------------+-------+-------------+-------------+------------------------------------+-------+ +| cumulative_max | Unary | Numeric | Numeric | :struct:`CumulativeMaxOptions` | \(4) | ++----------------------------+-------+-------------+-------------+------------------------------------+-------+ * \(1) CumulativeSumOptions has two optional parameters. The first parameter :member:`CumulativeSumOptions::start` is a starting value for the running @@ -1578,6 +1586,30 @@ an ``Invalid`` :class:`Status` when overflow is detected. false (the default), the first encountered null is propagated. When set to true, each null in the input produces a corresponding null in the output. +* \(2) CumulativeProductOptions has two optional parameters. The first parameter + :member:`CumulativeProductOptions::start` is a starting value for the running + product. It has a default value of 1. Specified values of ``start`` must have + the same type as the input. The second parameter + :member:`CumulativeProductOptions::skip_nulls` is a boolean. When set to + false (the default), the first encountered null is propagated. When set to + true, each null in the input produces a corresponding null in the output. + +* \(3) CumulativeMinOptions has two optional parameters. The first parameter + :member:`CumulativeMinOptions::start` is a starting value for the running + minimum. It's default value will be maximum value of the input type. Specified + values of ``start`` must have the same type as the input. The second parameter + :member:`CumulativeMinOptions::skip_nulls` is a boolean. When set to + false (the default), the first encountered null is propagated. When set to + true, each null in the input produces a corresponding null in the output. + +* \(4) CumulativeMaxOptions has two optional parameters. The first parameter + :member:`CumulativeMaxOptions::start` is a starting value for the running + maximum. It's default value will be minimum value of the input type. Specified + values of ``start`` must have the same type as the input. The second parameter + :member:`CumulativeMaxOptions::skip_nulls` is a boolean. When set to + false (the default), the first encountered null is propagated. When set to + true, each null in the input produces a corresponding null in the output. + Associative transforms ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index 4a9208fd31b..93208a7944b 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -59,6 +59,10 @@ throws an ``ArrowInvalid`` exception when overflow is detected. cumulative_sum cumulative_sum_checked + cumulative_product + cumulative_product_checked + cumulative_min + cumulative_max Arithmetic Functions -------------------- @@ -518,6 +522,9 @@ Compute Options CountOptions CountOptions CumulativeSumOptions + CumulativeMaxOptions + CumulativeMinOptions + CumulativeProductOptions DayOfWeekOptions DictionaryEncodeOptions ElementWiseAggregateOptions From 122dad80f2e2882363b8f204d5339b501b41745b Mon Sep 17 00:00:00 2001 From: JabariBooker Date: Tue, 5 Jul 2022 12:33:19 -0400 Subject: [PATCH 06/13] Adding tests for CumulativeProduct --- .../kernels/vector_cumulative_ops_test.cc | 200 +++++++++++++++--- 1 file changed, 165 insertions(+), 35 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc index f3fec8870fd..9c73eba8b02 100644 --- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc @@ -34,51 +34,32 @@ namespace arrow { namespace compute { -TEST(TestCumulativeSum, Empty) { - CumulativeSumOptions options; +void TestEmpty(std::string func, FunctionOptions* options) { for (auto ty : NumericTypes()) { auto empty_arr = ArrayFromJSON(ty, "[]"); auto empty_chunked = ChunkedArrayFromJSON(ty, {"[]"}); - CheckVectorUnary("cumulative_sum", empty_arr, empty_arr, &options); - CheckVectorUnary("cumulative_sum_checked", empty_arr, empty_arr, &options); - - CheckVectorUnary("cumulative_sum", empty_chunked, empty_chunked, &options); - CheckVectorUnary("cumulative_sum_checked", empty_chunked, empty_chunked, &options); + CheckVectorUnary(func, empty_arr, empty_arr, options); + CheckVectorUnary(func, empty_chunked, empty_chunked, options); } } -TEST(TestCumulativeSum, AllNulls) { - CumulativeSumOptions options; +void TestAllNulls(std::string func, FunctionOptions* options) { for (auto ty : NumericTypes()) { auto nulls_arr = ArrayFromJSON(ty, "[null, null, null]"); auto nulls_one_chunk = ChunkedArrayFromJSON(ty, {"[null, null, null]"}); auto nulls_three_chunks = ChunkedArrayFromJSON(ty, {"[null]", "[null]", "[null]"}); - CheckVectorUnary("cumulative_sum", nulls_arr, nulls_arr, &options); - CheckVectorUnary("cumulative_sum_checked", nulls_arr, nulls_arr, &options); - - CheckVectorUnary("cumulative_sum", nulls_one_chunk, nulls_one_chunk, &options); - CheckVectorUnary("cumulative_sum_checked", nulls_one_chunk, nulls_one_chunk, - &options); - - CheckVectorUnary("cumulative_sum", nulls_three_chunks, nulls_one_chunk, &options); - CheckVectorUnary("cumulative_sum_checked", nulls_three_chunks, nulls_one_chunk, - &options); + CheckVectorUnary(func, nulls_arr, nulls_arr, options); + CheckVectorUnary(func, nulls_one_chunk, nulls_one_chunk, options); + CheckVectorUnary(func, nulls_three_chunks, nulls_one_chunk, options); } } using testing::HasSubstr; -TEST(TestCumulativeSum, ScalarNotSupported) { - CumulativeSumOptions options; - +void TestScalarNotSupported(std::string func, FunctionOptions* options) { EXPECT_RAISES_WITH_MESSAGE_THAT( NotImplemented, HasSubstr("no kernel"), - CallFunction("cumulative_sum", {std::make_shared(5)}, &options)); - - EXPECT_RAISES_WITH_MESSAGE_THAT( - NotImplemented, HasSubstr("no kernel"), - CallFunction("cumulative_sum_checked", {std::make_shared(5)}, - &options)); + CallFunction(func, {std::make_shared(5)}, options)); } template @@ -92,17 +73,17 @@ void CheckCumulativeSumUnsignedOverflow() { BuilderType builder; std::shared_ptr max_arr; - std::shared_ptr min_arr; + std::shared_ptr overflow_arr; ASSERT_OK(builder.Append(max)); ASSERT_OK(builder.Finish(&max_arr)); builder.Reset(); ASSERT_OK(builder.Append(min)); - ASSERT_OK(builder.Finish(&min_arr)); + ASSERT_OK(builder.Finish(&overflow_arr)); EXPECT_RAISES_WITH_MESSAGE_THAT( Invalid, HasSubstr("overflow"), CallFunction("cumulative_sum_checked", {max_arr}, &pos_overflow)); - CheckVectorUnary("cumulative_sum", max_arr, min_arr, &pos_overflow); + CheckVectorUnary("cumulative_sum", max_arr, overflow_arr, &pos_overflow); } template @@ -117,20 +98,115 @@ void CheckCumulativeSumSignedOverflow() { auto min = std::numeric_limits::lowest(); BuilderType builder; - std::shared_ptr max_arr; std::shared_ptr min_arr; + std::shared_ptr overflow_arr; + ASSERT_OK(builder.Append(min)); + ASSERT_OK(builder.Finish(&min_arr)); + builder.Reset(); + ASSERT_OK(builder.Append(max)); + ASSERT_OK(builder.Finish(&overflow_arr)); + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, HasSubstr("overflow"), + CallFunction("cumulative_sum_checked", {min_arr}, &neg_overflow)); + CheckVectorUnary("cumulative_sum", min_arr, overflow_arr, &neg_overflow); +} + +template +void CheckCumulativeProductUnsignedOverflow() { + using CType = typename TypeTraits::CType; + using BuilderType = typename TypeTraits::BuilderType; + + CumulativeProductOptions pos_overflow(2); + auto max = std::numeric_limits::max(); + + BuilderType builder; + std::shared_ptr max_arr; + std::shared_ptr overflow_arr; ASSERT_OK(builder.Append(max)); ASSERT_OK(builder.Finish(&max_arr)); builder.Reset(); + ASSERT_OK(builder.Append(max << 1)); + ASSERT_OK(builder.Finish(&overflow_arr)); + + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, HasSubstr("overflow"), + CallFunction("cumulative_product_checked", {max_arr}, &pos_overflow)); + CheckVectorUnary("cumulative_product", max_arr, overflow_arr, &pos_overflow); +} + +template +void CheckCumulativeProductSignedOverflow() { + using CType = typename TypeTraits::CType; + using BuilderType = typename TypeTraits::BuilderType; + + CheckCumulativeProductUnsignedOverflow(); + + CumulativeProductOptions neg_overflow(2); + auto min = std::numeric_limits::lowest(); + + BuilderType builder; + std::shared_ptr min_arr; + std::shared_ptr overflow_arr; ASSERT_OK(builder.Append(min)); ASSERT_OK(builder.Finish(&min_arr)); + builder.Reset(); + ASSERT_OK(builder.Append(0)); + ASSERT_OK(builder.Finish(&overflow_arr)); EXPECT_RAISES_WITH_MESSAGE_THAT( Invalid, HasSubstr("overflow"), - CallFunction("cumulative_sum_checked", {min_arr}, &neg_overflow)); - CheckVectorUnary("cumulative_sum", min_arr, max_arr, &neg_overflow); + CallFunction("cumulative_product_checked", {min_arr}, &neg_overflow)); + CheckVectorUnary("cumulative_product", min_arr, overflow_arr, &neg_overflow); } -TEST(TestCumulativeSum, IntegerOverflow) { +TEST(TestCumulativeOps, Empty) { + CumulativeSumOptions sum_options; + TestEmpty("cumulative_sum", &sum_options); + TestEmpty("cumulative_sum_checked", &sum_options); + + CumulativeProductOptions product_options; + TestEmpty("cumulative_product", &product_options); + TestEmpty("cumulative_product_checked", &product_options); + + CumulativeMinOptions min_options; + TestEmpty("cumulative_min", &min_options); + + CumulativeMaxOptions max_options; + TestEmpty("cumulative_max", &max_options); +} + +TEST(TestCumulativeOps, AllNulls) { + CumulativeSumOptions sum_options; + TestAllNulls("cumulative_sum", &sum_options); + TestAllNulls("cumulative_sum_checked", &sum_options); + + CumulativeProductOptions product_options; + TestAllNulls("cumulative_product", &product_options); + TestAllNulls("cumulative_product_checked", &product_options); + + CumulativeMinOptions min_options; + TestAllNulls("cumulative_min", &min_options); + + CumulativeMaxOptions max_options; + TestAllNulls("cumulative_max", &max_options); +} + +TEST(TestCumulativeOps, ScalarNotSupported) { + CumulativeSumOptions sum_options; + TestScalarNotSupported("cumulative_sum", &sum_options); + TestScalarNotSupported("cumulative_sum_checked", &sum_options); + + CumulativeProductOptions product_options; + TestScalarNotSupported("cumulative_product", &product_options); + TestScalarNotSupported("cumulative_product_checked", &product_options); + + CumulativeMinOptions min_options; + TestScalarNotSupported("cumulative_min", &min_options); + + CumulativeMaxOptions max_options; + TestScalarNotSupported("cumulative_max", &max_options); +} + +TEST(TestCumulativeOps, IntegerOverflow) { CheckCumulativeSumUnsignedOverflow(); CheckCumulativeSumUnsignedOverflow(); CheckCumulativeSumUnsignedOverflow(); @@ -139,6 +215,15 @@ TEST(TestCumulativeSum, IntegerOverflow) { CheckCumulativeSumSignedOverflow(); CheckCumulativeSumSignedOverflow(); CheckCumulativeSumSignedOverflow(); + + CheckCumulativeProductUnsignedOverflow(); + CheckCumulativeProductUnsignedOverflow(); + CheckCumulativeProductUnsignedOverflow(); + CheckCumulativeProductUnsignedOverflow(); + CheckCumulativeProductSignedOverflow(); + CheckCumulativeProductSignedOverflow(); + CheckCumulativeProductSignedOverflow(); + CheckCumulativeProductSignedOverflow(); } TEST(TestCumulativeSum, NoStartNoSkip) { @@ -186,6 +271,51 @@ TEST(TestCumulativeSum, NoStartNoSkip) { } } +TEST(TestCumulativeProduct, NoStartNoSkip) { + CumulativeProductOptions options; + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, 3, 4, 5]"), + ArrayFromJSON(ty, "[1, 2, 6, 24, 120]"), &options); + CheckVectorUnary("cumulative_product_checked", ArrayFromJSON(ty, "[1, 2, 3, 4, 5]"), + ArrayFromJSON(ty, "[1, 2, 6, 24, 120]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, null, 4, null]"), + ArrayFromJSON(ty, "[1, 2, null, null, null]"), &options); + CheckVectorUnary("cumulative_product_checked", + ArrayFromJSON(ty, "[1, 2, null, 4, null]"), + ArrayFromJSON(ty, "[1, 2, null, null, null]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[null, 2, null, 4, null]"), + ArrayFromJSON(ty, "[null, null, null, null, null]"), &options); + CheckVectorUnary("cumulative_product_checked", + ArrayFromJSON(ty, "[null, 2, null, 4, null]"), + ArrayFromJSON(ty, "[null, null, null, null, null]"), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, 6, 24, 120]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5, 6]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, 6, 24, 120]"}), &options); + + CheckVectorUnary( + "cumulative_product", ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, null, null, null]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, null, null, null]"}), + &options); + + CheckVectorUnary( + "cumulative_product", ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null, null]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null, null]"}), + &options); + } +} + TEST(TestCumulativeSum, NoStartDoSkip) { CumulativeSumOptions options(0, true); for (auto ty : NumericTypes()) { From f560d2daf82108390ed93c75fd5e4029b36eccb3 Mon Sep 17 00:00:00 2001 From: JabariBooker Date: Tue, 5 Jul 2022 14:11:49 -0400 Subject: [PATCH 07/13] Adding the rest of tests for CumulativeProduct --- .../kernels/vector_cumulative_ops_test.cc | 136 +++++++++++++++++- 1 file changed, 130 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc index 9c73eba8b02..84a91cbde18 100644 --- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc @@ -295,16 +295,15 @@ TEST(TestCumulativeProduct, NoStartNoSkip) { ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5]"}), ChunkedArrayFromJSON(ty, {"[1, 2, 6, 24, 120]"}), &options); CheckVectorUnary("cumulative_product_checked", - ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5, 6]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5]"}), ChunkedArrayFromJSON(ty, {"[1, 2, 6, 24, 120]"}), &options); - CheckVectorUnary( - "cumulative_product", ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null]"}), - ChunkedArrayFromJSON(ty, {"[1, 2, null, null, null]"}), &options); + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, null, null, null]"}), &options); CheckVectorUnary("cumulative_product_checked", ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null]"}), - ChunkedArrayFromJSON(ty, {"[1, 2, null, null, null]"}), - &options); + ChunkedArrayFromJSON(ty, {"[1, 2, null, null, null]"}), &options); CheckVectorUnary( "cumulative_product", ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null]"}), @@ -360,6 +359,49 @@ TEST(TestCumulativeSum, NoStartDoSkip) { } } +TEST(TestCumulativeProduct, NoStartDoSkip) { + CumulativeProductOptions options(1, true); + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, 3, 4, 5]"), + ArrayFromJSON(ty, "[1, 2, 6, 24, 120]"), &options); + CheckVectorUnary("cumulative_product_checked", ArrayFromJSON(ty, "[1, 2, 3, 4, 5]"), + ArrayFromJSON(ty, "[1, 2, 6, 24, 120]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, null, 4, null]"), + ArrayFromJSON(ty, "[1, 2, null, 8, null]"), &options); + CheckVectorUnary("cumulative_product_checked", + ArrayFromJSON(ty, "[1, 2, null, 4, null]"), + ArrayFromJSON(ty, "[1, 2, null, 8, null]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[null, 2, null, 4, null]"), + ArrayFromJSON(ty, "[null, 2, null, 8, null]"), &options); + CheckVectorUnary("cumulative_product_checked", + ArrayFromJSON(ty, "[null, 2, null, 4, null]"), + ArrayFromJSON(ty, "[null, 2, null, 8, null]"), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, 6, 24, 120]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2, 3]", "[4, 5]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, 6, 24, 120]"}), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, null, 8, null]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[1, 2, null, 8, null]"}), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[null, 2, null, 8, null]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[null, 2, null]", "[4, null]"}), + ChunkedArrayFromJSON(ty, {"[null, 2, null, 8, null]"}), &options); + } +} + TEST(TestCumulativeSum, HasStartNoSkip) { CumulativeSumOptions options(10); for (auto ty : NumericTypes()) { @@ -405,6 +447,47 @@ TEST(TestCumulativeSum, HasStartNoSkip) { } } +TEST(TestCumulativeProduct, HasStartNoSkip) { + CumulativeProductOptions options(5); + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, 3, 4]"), + ArrayFromJSON(ty, "[5, 10, 30, 120]"), &options); + CheckVectorUnary("cumulative_product_checked", ArrayFromJSON(ty, "[1, 2, 3, 4]"), + ArrayFromJSON(ty, "[5, 10, 30, 120]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, null, 4]"), + ArrayFromJSON(ty, "[5, 10, null, null]"), &options); + CheckVectorUnary("cumulative_product_checked", ArrayFromJSON(ty, "[1, 2, null, 4]"), + ArrayFromJSON(ty, "[5, 10, null, null]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[null, 2, null, 4]"), + ArrayFromJSON(ty, "[null, null, null, null]"), &options); + CheckVectorUnary("cumulative_product_checked", + ArrayFromJSON(ty, "[null, 2, null, 4]"), + ArrayFromJSON(ty, "[null, null, null, null]"), &options); + + CheckVectorUnary("cumulative_product", ChunkedArrayFromJSON(ty, {"[1, 2]", "[3, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, 30, 120]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2]", "[3, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, 30, 120]"}), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[1, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, null, null]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, null, null]"}), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[null, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[null, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null]"}), &options); + } +} + TEST(TestCumulativeSum, HasStartDoSkip) { CumulativeSumOptions options(10, true); for (auto ty : NumericTypes()) { @@ -450,5 +533,46 @@ TEST(TestCumulativeSum, HasStartDoSkip) { } } +TEST(TestCumulativeProduct, HasStartDoSkip) { + CumulativeProductOptions options(5, true); + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, 3, 4]"), + ArrayFromJSON(ty, "[5, 10, 30, 120]"), &options); + CheckVectorUnary("cumulative_product_checked", ArrayFromJSON(ty, "[1, 2, 3, 4]"), + ArrayFromJSON(ty, "[5, 10, 30, 120]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[1, 2, null, 4]"), + ArrayFromJSON(ty, "[5, 10, null, 40]"), &options); + CheckVectorUnary("cumulative_product_checked", ArrayFromJSON(ty, "[1, 2, null, 4]"), + ArrayFromJSON(ty, "[5, 10, null, 40]"), &options); + + CheckVectorUnary("cumulative_product", ArrayFromJSON(ty, "[null, 2, null, 4]"), + ArrayFromJSON(ty, "[null, 10, null, 40]"), &options); + CheckVectorUnary("cumulative_product_checked", + ArrayFromJSON(ty, "[null, 2, null, 4]"), + ArrayFromJSON(ty, "[null, 10, null, 40]"), &options); + + CheckVectorUnary("cumulative_product", ChunkedArrayFromJSON(ty, {"[1, 2]", "[3, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, 30, 120]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2]", "[3, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, 30, 120]"}), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[1, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, null, 40]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[1, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[5, 10, null, 40]"}), &options); + + CheckVectorUnary("cumulative_product", + ChunkedArrayFromJSON(ty, {"[null, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[null, 10, null, 40]"}), &options); + CheckVectorUnary("cumulative_product_checked", + ChunkedArrayFromJSON(ty, {"[null, 2]", "[null, 4]"}), + ChunkedArrayFromJSON(ty, {"[null, 10, null, 40]"}), &options); + } +} + } // namespace compute } // namespace arrow From 7987b90ed484905652d07f1d78311c53226d911e Mon Sep 17 00:00:00 2001 From: JabariBooker Date: Tue, 5 Jul 2022 16:02:49 -0400 Subject: [PATCH 08/13] Added tests for CumulativeMin and -Max --- .../kernels/vector_cumulative_ops_test.cc | 184 ++++++++++++++++++ 1 file changed, 184 insertions(+) diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc index 84a91cbde18..ed530b8995b 100644 --- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc @@ -315,6 +315,52 @@ TEST(TestCumulativeProduct, NoStartNoSkip) { } } +TEST(TestCumulativeMinMax, NoStartNoSkip) { + CumulativeMinOptions min_options; + CumulativeMaxOptions max_options; + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[9, 9, 7, 7, 5, 5]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[9, 24, 24, 123, 123, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[9, 9, null, null, null, null]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[9, 24, null, null, null, null]"), &max_options); + + CheckVectorUnary( + "cumulative_min", ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &min_options); + CheckVectorUnary( + "cumulative_max", ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 9, 7, 7, 5, 5]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 24, 24, 123, 123, 123]"}), &max_options); + + CheckVectorUnary( + "cumulative_min", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 9, null, null, null, null]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 24, null, null, null, null]"}), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), + &min_options); + CheckVectorUnary("cumulative_max", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), + &max_options); + } +} + TEST(TestCumulativeSum, NoStartDoSkip) { CumulativeSumOptions options(0, true); for (auto ty : NumericTypes()) { @@ -402,6 +448,52 @@ TEST(TestCumulativeProduct, NoStartDoSkip) { } } +TEST(TestCumulativeMinMax, NoStartDoSkip) { + CumulativeMinOptions min_options(true); + CumulativeMaxOptions max_options(true); + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[9, 9, 7, 7, 5, 5]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[9, 24, 24, 123, 123, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[9, 9, null, 9, null, 9]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[9, 24, null, 123, null, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", + ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, 24, null, 24, null, 24]"), &min_options); + CheckVectorUnary("cumulative_max", + ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, 24, null, 123, null, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 9, 7, 7, 5, 5]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 24, 24, 123, 123, 123]"}), &max_options); + + CheckVectorUnary( + "cumulative_min", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 9, null, 9, null, 9]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[9, 24, null, 123, null, 123]"}), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, 24, null, 24, null, 24]"}), + &min_options); + CheckVectorUnary("cumulative_max", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, 24, null, 123, null, 123]"}), + &max_options); + } +} + TEST(TestCumulativeSum, HasStartNoSkip) { CumulativeSumOptions options(10); for (auto ty : NumericTypes()) { @@ -488,6 +580,52 @@ TEST(TestCumulativeProduct, HasStartNoSkip) { } } +TEST(TestCumulativeMinMax, HasStartNoSkip) { + CumulativeMinOptions min_options(8, false); + CumulativeMaxOptions max_options(32, false); + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[8, 8, 7, 7, 5, 5]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[32, 32, 32, 123, 123, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[8, 8, null, null, null, null]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[32, 32, null, null, null, null]"), &max_options); + + CheckVectorUnary( + "cumulative_min", ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &min_options); + CheckVectorUnary( + "cumulative_max", ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, null, null, null, null, null]"), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[8, 8, 7, 7, 5, 5]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[32, 32, 32, 123, 123, 123]"}), &max_options); + + CheckVectorUnary( + "cumulative_min", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[8, 8, null, null, null, null]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[32, 32, null, null, null, null]"}), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), + &min_options); + CheckVectorUnary("cumulative_max", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, null, null, null, null, null]"}), + &max_options); + } +} + TEST(TestCumulativeSum, HasStartDoSkip) { CumulativeSumOptions options(10, true); for (auto ty : NumericTypes()) { @@ -574,5 +712,51 @@ TEST(TestCumulativeProduct, HasStartDoSkip) { } } +TEST(TestCumulativeMinMax, HasStartDoSkip) { + CumulativeMinOptions min_options(8, true); + CumulativeMaxOptions max_options(32, true); + for (auto ty : NumericTypes()) { + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[8, 8, 7, 7, 5, 5]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, 7, 123, 5, 74]"), + ArrayFromJSON(ty, "[32, 32, 32, 123, 123, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[8, 8, null, 8, null, 8]"), &min_options); + CheckVectorUnary("cumulative_max", ArrayFromJSON(ty, "[9, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[32, 32, null, 123, null, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", + ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, 8, null, 8, null, 8]"), &min_options); + CheckVectorUnary("cumulative_max", + ArrayFromJSON(ty, "[null, 24, null, 123, null, 74]"), + ArrayFromJSON(ty, "[null, 32, null, 123, null, 123]"), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[8, 8, 7, 7, 5, 5]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, 7]", "[123, 5, 74]"}), + ChunkedArrayFromJSON(ty, {"[32, 32, 32, 123, 123, 123]"}), &max_options); + + CheckVectorUnary( + "cumulative_min", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[8, 8, null, 8, null, 8]"}), &min_options); + CheckVectorUnary( + "cumulative_max", ChunkedArrayFromJSON(ty, {"[9, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[32, 32, null, 123, null, 123]"}), &max_options); + + CheckVectorUnary("cumulative_min", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, 8, null, 8, null, 8]"}), + &min_options); + CheckVectorUnary("cumulative_max", + ChunkedArrayFromJSON(ty, {"[null, 24, null]", "[123, null, 74]"}), + ChunkedArrayFromJSON(ty, {"[null, 32, null, 123, null, 123]"}), + &max_options); + } +} + } // namespace compute } // namespace arrow From 29176e15437dc26e4a840afd1556698e63f9c98f Mon Sep 17 00:00:00 2001 From: JabariBooker Date: Tue, 5 Jul 2022 18:03:45 -0400 Subject: [PATCH 09/13] Added pytests for CumulativeProduct, -Min, and -Max --- python/pyarrow/tests/test_compute.py | 156 +++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 67857ed6ec8..1e3c6994dbc 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -2585,6 +2585,162 @@ def test_cumulative_sum(start, skip_nulls): pc.cumulative_sum([1, 2, 3], start=strt) +@pytest.mark.parametrize('start', (1.25, 10.5, -10.5)) +@pytest.mark.parametrize('skip_nulls', (True, False)) +def test_cumulative_product(start, skip_nulls): + # Exact tests (e.g., integral types) + start_int = int(start) + starts = [start_int, pa.scalar(start_int, type=pa.int8()), + pa.scalar(start_int, type=pa.int64())] + for strt in starts: + arrays = [ + pa.array([1, 2, 3]), + pa.array([1, None, 3, 4]), + pa.chunked_array([[1, None], [3, 4]]) + ] + expected_arrays = [ + pa.array([1, 2, 6]), + pa.array([1, None, 3, 12]) + if skip_nulls else pa.array([1, None, None, None]), + pa.chunked_array([[1, None, 3, 12]]) + if skip_nulls else pa.chunked_array([[1, None, None, None]]) + ] + for i, arr in enumerate(arrays): + result = pc.cumulative_product(arr, start=strt, skip_nulls=skip_nulls) + # Add `start` offset to expected array before comparing + expected = pc.multiply(expected_arrays[i], strt) + assert result.equals(expected) + + starts = [start, pa.scalar(start, type=pa.float32()), + pa.scalar(start, type=pa.float64())] + for strt in starts: + arrays = [ + pa.array([1.125, 2.25, 3.03125]), + pa.array([1, np.nan, 2, -3, 4, 5]), + pa.array([1, np.nan, None, 3, None, 5]) + ] + expected_arrays = [ + np.array([1.125, 2.53125, 7.6728515625]), + np.array([1, np.nan, np.nan, np.nan, np.nan, np.nan]), + np.array([1, np.nan, None, np.nan, None, np.nan]) + if skip_nulls else np.array([1, np.nan, None, None, None, None]) + ] + for i, arr in enumerate(arrays): + result = pc.cumulative_product(arr, start=strt, skip_nulls=skip_nulls) + # Add `start` offset to expected array before comparing + expected = pc.multiply(expected_arrays[i], strt) + np.testing.assert_array_almost_equal(result.to_numpy( + zero_copy_only=False), expected.to_numpy(zero_copy_only=False)) + + for strt in ['a', pa.scalar('arrow'), 1.1]: + with pytest.raises(pa.ArrowInvalid): + pc.cumulative_product([1, 2, 3], start=strt) + + +@pytest.mark.parametrize('start', (1.25, 10.5, -10.5)) +@pytest.mark.parametrize('skip_nulls', (True, False)) +def test_cumulative_max(start, skip_nulls): + # Exact tests (e.g., integral types) + start_int = int(start) + starts = [start_int, pa.scalar(start_int, type=pa.int8()), + pa.scalar(start_int, type=pa.int64())] + for strt in starts: + arrays = [ + pa.array([9, 24, 7]), + pa.array([7, None, 5, 74]), + pa.chunked_array([[7, None], [5, 74]]) + ] + expected_arrays = [ + pa.array([9, 24, 24]), + pa.array([7, None, 7, 74]) + if skip_nulls else pa.array([7, None, None, None]), + pa.chunked_array([[7, None, 7, 74]]) + if skip_nulls else pa.chunked_array([[7, None, None, None]]) + ] + for i, arr in enumerate(arrays): + result = pc.cumulative_max(arr, start=strt, skip_nulls=skip_nulls) + # Add `start` offset to expected array before comparing + expected = pc.max(pa.array([expected_arrays[i], strt])) + assert result.equals(expected) + + starts = [start, pa.scalar(start, type=pa.float32()), + pa.scalar(start, type=pa.float64())] + for strt in starts: + arrays = [ + pa.array([9.125, 27.25, 7.03125]), + pa.array([1, np.nan, 2, -3, 4, 5]), + pa.array([1, np.nan, None, 3, None, 5]) + ] + expected_arrays = [ + np.array([9.125, 27.25, 27.25]), + np.array([1, np.nan, np.nan, np.nan, np.nan, np.nan]), + np.array([1, np.nan, None, np.nan, None, np.nan]) + if skip_nulls else np.array([1, np.nan, None, None, None, None]) + ] + for i, arr in enumerate(arrays): + result = pc.cumulative_max(arr, start=strt, skip_nulls=skip_nulls) + # Add `start` offset to expected array before comparing + expected = pc.max(pa.array([expected_arrays[i], strt])) + np.testing.assert_array_almost_equal(result.to_numpy( + zero_copy_only=False), expected.to_numpy(zero_copy_only=False)) + + for strt in ['a', pa.scalar('arrow'), 1.1]: + with pytest.raises(pa.ArrowInvalid): + pc.cumulative_max([1, 2, 3], start=strt) + + +@pytest.mark.parametrize('start', (1.25, 10.5, -10.5)) +@pytest.mark.parametrize('skip_nulls', (True, False)) +def test_cumulative_min(start, skip_nulls): + # Exact tests (e.g., integral types) + start_int = int(start) + starts = [start_int, pa.scalar(start_int, type=pa.int8()), + pa.scalar(start_int, type=pa.int64())] + for strt in starts: + arrays = [ + pa.array([9, 24, 7]), + pa.array([7, None, 5, 74]), + pa.chunked_array([[7, None], [5, 74]]) + ] + expected_arrays = [ + pa.array([9, 9, 7]), + pa.array([7, None, 5, 5]) + if skip_nulls else pa.array([7, None, None, None]), + pa.chunked_array([[7, None, 5, 5]]) + if skip_nulls else pa.chunked_array([[7, None, None, None]]) + ] + for i, arr in enumerate(arrays): + result = pc.cumulative_min(arr, start=strt, skip_nulls=skip_nulls) + # Add `start` offset to expected array before comparing + expected = pc.min(pa.array([expected_arrays[i], strt])) + assert result.equals(expected) + + starts = [start, pa.scalar(start, type=pa.float32()), + pa.scalar(start, type=pa.float64())] + for strt in starts: + arrays = [ + pa.array([9.125, 27.25, 7.03125]), + pa.array([1, np.nan, 2, -3, 4, 5]), + pa.array([1, np.nan, None, 3, None, 5]) + ] + expected_arrays = [ + np.array([9.125, 9.125, 7.03125]), + np.array([1, np.nan, np.nan, np.nan, np.nan, np.nan]), + np.array([1, np.nan, None, np.nan, None, np.nan]) + if skip_nulls else np.array([1, np.nan, None, None, None, None]) + ] + for i, arr in enumerate(arrays): + result = pc.cumulative_min(arr, start=strt, skip_nulls=skip_nulls) + # Add `start` offset to expected array before comparing + expected = pc.min(pa.array([expected_arrays[i], strt])) + np.testing.assert_array_almost_equal(result.to_numpy( + zero_copy_only=False), expected.to_numpy(zero_copy_only=False)) + + for strt in ['a', pa.scalar('arrow'), 1.1]: + with pytest.raises(pa.ArrowInvalid): + pc.cumulative_min([1, 2, 3], start=strt) + + def test_make_struct(): assert pc.make_struct(1, 'a').as_py() == {'0': 1, '1': 'a'} From 1036779430bc1efb475137e85dfc46665eb08630 Mon Sep 17 00:00:00 2001 From: JabariBooker Date: Tue, 5 Jul 2022 18:12:45 -0400 Subject: [PATCH 10/13] Forgot linting issue --- python/pyarrow/tests/test_compute.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 1e3c6994dbc..0c6f7d6ae03 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -2606,7 +2606,8 @@ def test_cumulative_product(start, skip_nulls): if skip_nulls else pa.chunked_array([[1, None, None, None]]) ] for i, arr in enumerate(arrays): - result = pc.cumulative_product(arr, start=strt, skip_nulls=skip_nulls) + result = pc.cumulative_product( + arr, start=strt, skip_nulls=skip_nulls) # Add `start` offset to expected array before comparing expected = pc.multiply(expected_arrays[i], strt) assert result.equals(expected) @@ -2626,7 +2627,8 @@ def test_cumulative_product(start, skip_nulls): if skip_nulls else np.array([1, np.nan, None, None, None, None]) ] for i, arr in enumerate(arrays): - result = pc.cumulative_product(arr, start=strt, skip_nulls=skip_nulls) + result = pc.cumulative_product( + arr, start=strt, skip_nulls=skip_nulls) # Add `start` offset to expected array before comparing expected = pc.multiply(expected_arrays[i], strt) np.testing.assert_array_almost_equal(result.to_numpy( From 2e6527d7a707258f819d8be715cb3a0017218b07 Mon Sep 17 00:00:00 2001 From: JabariBooker Date: Wed, 27 Jul 2022 22:45:42 -0400 Subject: [PATCH 11/13] Completed C++ and Python unit tests --- .../compute/kernels/vector_cumulative_ops.cc | 2 +- .../kernels/vector_cumulative_ops_test.cc | 75 ++++++++++++++++--- python/pyarrow/tests/test_compute.py | 64 +++++++++++++--- 3 files changed, 119 insertions(+), 22 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc index 267bbe99a24..b77bd174bdd 100644 --- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc +++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc @@ -224,7 +224,7 @@ void AddCumulativeVectorKernel(std::shared_ptr& func) { kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE; kernel.mem_allocation = MemAllocation::type::NO_PREALLOCATE; kernel.signature = - KernelSignature::Make({InputType::Array(ty)}, OutputType(ValueDescr(ty))); + KernelSignature::Make({InputType(ty)}, OutputType(ty)); kernel.exec = ArithmeticExecFromOp(ty); kernel.exec_chunked = ArithmeticExecFromOp Date: Wed, 27 Jul 2022 22:52:38 -0400 Subject: [PATCH 12/13] Correcting linting --- cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc | 3 +-- cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc index b77bd174bdd..4450999ab09 100644 --- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc +++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops.cc @@ -223,8 +223,7 @@ void AddCumulativeVectorKernel(std::shared_ptr& func) { kernel.can_execute_chunkwise = false; kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE; kernel.mem_allocation = MemAllocation::type::NO_PREALLOCATE; - kernel.signature = - KernelSignature::Make({InputType(ty)}, OutputType(ty)); + kernel.signature = KernelSignature::Make({InputType(ty)}, OutputType(ty)); kernel.exec = ArithmeticExecFromOp(ty); kernel.exec_chunked = ArithmeticExecFromOp Date: Thu, 28 Jul 2022 11:56:26 -0400 Subject: [PATCH 13/13] Removing unused math.isnan import --- python/pyarrow/tests/test_compute.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index def8fe8a49b..95cb4da282e 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -18,7 +18,6 @@ from datetime import datetime from functools import lru_cache, partial import inspect -from math import isnan import os import pickle import pytest