diff --git a/cpp/src/arrow/compute/api_aggregate.cc b/cpp/src/arrow/compute/api_aggregate.cc index be05c3c11d0..1b00c366bfd 100644 --- a/cpp/src/arrow/compute/api_aggregate.cc +++ b/cpp/src/arrow/compute/api_aggregate.cc @@ -155,12 +155,14 @@ Result MinMax(const Datum& value, const ScalarAggregateOptions& options, return CallFunction("min_max", {value}, &options, ctx); } -Result Any(const Datum& value, ExecContext* ctx) { - return CallFunction("any", {value}, ctx); +Result Any(const Datum& value, const ScalarAggregateOptions& options, + ExecContext* ctx) { + return CallFunction("any", {value}, &options, ctx); } -Result All(const Datum& value, ExecContext* ctx) { - return CallFunction("all", {value}, ctx); +Result All(const Datum& value, const ScalarAggregateOptions& options, + ExecContext* ctx) { + return CallFunction("all", {value}, &options, ctx); } Result Mode(const Datum& value, const ModeOptions& options, ExecContext* ctx) { diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h index 7b6e2ef96de..7a6c44bd923 100644 --- a/cpp/src/arrow/compute/api_aggregate.h +++ b/cpp/src/arrow/compute/api_aggregate.h @@ -205,30 +205,44 @@ Result MinMax( /// \brief Test whether any element in a boolean array evaluates to true. /// /// This function returns true if any of the elements in the array evaluates -/// to true and false otherwise. Null values are skipped. +/// to true and false otherwise. Null values are ignored by default. +/// If null values are taken into account by setting ScalarAggregateOptions +/// parameter skip_nulls = false then Kleene logic is used. +/// See KleeneOr for more details on Kleene logic. /// /// \param[in] value input datum, expecting a boolean array +/// \param[in] options see ScalarAggregateOptions for more information /// \param[in] ctx the function execution context, optional /// \return resulting datum as a BooleanScalar /// /// \since 3.0.0 /// \note API not yet finalized ARROW_EXPORT -Result Any(const Datum& value, ExecContext* ctx = NULLPTR); +Result Any( + const Datum& value, + const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(), + ExecContext* ctx = NULLPTR); /// \brief Test whether all elements in a boolean array evaluate to true. /// /// This function returns true if all of the elements in the array evaluate -/// to true and false otherwise. Null values are skipped. +/// to true and false otherwise. Null values are ignored by default. +/// If null values are taken into account by setting ScalarAggregateOptions +/// parameter skip_nulls = false then Kleene logic is used. +/// See KleeneAnd for more details on Kleene logic. /// /// \param[in] value input datum, expecting a boolean array +/// \param[in] options see ScalarAggregateOptions for more information /// \param[in] ctx the function execution context, optional /// \return resulting datum as a BooleanScalar /// \since 3.0.0 /// \note API not yet finalized ARROW_EXPORT -Result All(const Datum& value, ExecContext* ctx = NULLPTR); +Result All( + const Datum& value, + const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults(), + ExecContext* ctx = NULLPTR); /// \brief Calculate the modal (most common) value of a numeric array /// diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc index 6a844817686..5e0454c9c4d 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc @@ -142,13 +142,15 @@ Result> MinMaxInit(KernelContext* ctx, // Any implementation struct BooleanAnyImpl : public ScalarAggregator { + explicit BooleanAnyImpl(ScalarAggregateOptions options) : options(std::move(options)) {} + Status Consume(KernelContext*, const ExecBatch& batch) override { // short-circuit if seen a True already if (this->any == true) { return Status::OK(); } - const auto& data = *batch[0].array(); + this->has_nulls = data.GetNullCount() > 0; arrow::internal::OptionalBinaryBitBlockCounter counter( data.buffers[0], data.offset, data.buffers[1], data.offset, data.length); int64_t position = 0; @@ -166,32 +168,48 @@ struct BooleanAnyImpl : public ScalarAggregator { Status MergeFrom(KernelContext*, KernelState&& src) override { const auto& other = checked_cast(src); this->any |= other.any; + this->has_nulls |= other.has_nulls; return Status::OK(); } - Status Finalize(KernelContext*, Datum* out) override { - out->value = std::make_shared(this->any); + Status Finalize(KernelContext* ctx, Datum* out) override { + if (!options.skip_nulls && !this->any && this->has_nulls) { + out->value = std::make_shared(); + } else { + out->value = std::make_shared(this->any); + } return Status::OK(); } bool any = false; + bool has_nulls = false; + ScalarAggregateOptions options; }; Result> AnyInit(KernelContext*, const KernelInitArgs& args) { - return ::arrow::internal::make_unique(); + const ScalarAggregateOptions options = + static_cast(*args.options); + return ::arrow::internal::make_unique( + static_cast(*args.options)); } // ---------------------------------------------------------------------- // All implementation struct BooleanAllImpl : public ScalarAggregator { + explicit BooleanAllImpl(ScalarAggregateOptions options) : options(std::move(options)) {} + Status Consume(KernelContext*, const ExecBatch& batch) override { // short-circuit if seen a false already if (this->all == false) { return Status::OK(); } - + // short-circuit if seen a null already + if (!options.skip_nulls && this->has_nulls) { + return Status::OK(); + } const auto& data = *batch[0].array(); + this->has_nulls = data.GetNullCount() > 0; arrow::internal::OptionalBinaryBitBlockCounter counter( data.buffers[1], data.offset, data.buffers[0], data.offset, data.length); int64_t position = 0; @@ -210,19 +228,27 @@ struct BooleanAllImpl : public ScalarAggregator { Status MergeFrom(KernelContext*, KernelState&& src) override { const auto& other = checked_cast(src); this->all &= other.all; + this->has_nulls |= other.has_nulls; return Status::OK(); } Status Finalize(KernelContext*, Datum* out) override { - out->value = std::make_shared(this->all); + if (!options.skip_nulls && this->all && this->has_nulls) { + out->value = std::make_shared(); + } else { + out->value = std::make_shared(this->all); + } return Status::OK(); } bool all = true; + bool has_nulls = false; + ScalarAggregateOptions options; }; Result> AllInit(KernelContext*, const KernelInitArgs& args) { - return ::arrow::internal::make_unique(); + return ::arrow::internal::make_unique( + static_cast(*args.options)); } // ---------------------------------------------------------------------- @@ -407,12 +433,22 @@ const FunctionDoc min_max_doc{"Compute the minimum and maximum values of a numer "ScalarAggregateOptions"}; const FunctionDoc any_doc{"Test whether any element in a boolean array evaluates to true", - ("Null values are ignored."), - {"array"}}; + ("Null values are ignored by default.\n" + "If null values are taken into account by setting " + "ScalarAggregateOptions parameter skip_nulls = false then " + "Kleene logic is used.\n" + "See KleeneOr for more details on Kleene logic."), + {"array"}, + "ScalarAggregateOptions"}; const FunctionDoc all_doc{"Test whether all elements in a boolean array evaluate to true", - ("Null values are ignored."), - {"array"}}; + ("Null values are ignored by default.\n" + "If null values are taken into account by setting " + "ScalarAggregateOptions parameter skip_nulls = false then " + "Kleene logic is used.\n" + "See KleeneAnd for more details on Kleene logic."), + {"array"}, + "ScalarAggregateOptions"}; const FunctionDoc index_doc{"Find the index of the first occurrence of a given value", ("The result is always computed as an int64_t, regardless\n" @@ -496,12 +532,14 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunction(std::move(func))); // any - func = std::make_shared("any", Arity::Unary(), &any_doc); + func = std::make_shared("any", Arity::Unary(), &any_doc, + &default_scalar_aggregate_options); aggregate::AddBasicAggKernels(aggregate::AnyInit, {boolean()}, boolean(), func.get()); DCHECK_OK(registry->AddFunction(std::move(func))); // all - func = std::make_shared("all", Arity::Unary(), &all_doc); + func = std::make_shared("all", Arity::Unary(), &all_doc, + &default_scalar_aggregate_options); aggregate::AddBasicAggKernels(aggregate::AllInit, {boolean()}, boolean(), func.get()); DCHECK_OK(registry->AddFunction(std::move(func))); diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc index 4bce02a990b..7318539df7f 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_test.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc @@ -942,21 +942,26 @@ TYPED_TEST(TestRandomNumericMinMaxKernel, RandomArrayMinMax) { class TestPrimitiveAnyKernel : public ::testing::Test { public: - void AssertAnyIs(const Datum& array, bool expected) { - ASSERT_OK_AND_ASSIGN(Datum out, Any(array)); + void AssertAnyIs(const Datum& array, const std::shared_ptr& expected, + const ScalarAggregateOptions& options) { + ASSERT_OK_AND_ASSIGN(Datum out, Any(array, options, nullptr)); const BooleanScalar& out_any = out.scalar_as(); - const auto expected_any = static_cast(expected); - ASSERT_EQ(out_any, expected_any); + ASSERT_EQ(out_any, *expected); } - void AssertAnyIs(const std::string& json, bool expected) { + void AssertAnyIs( + const std::string& json, const std::shared_ptr& expected, + const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) { auto array = ArrayFromJSON(type_singleton(), json); - AssertAnyIs(array, expected); + AssertAnyIs(array, expected, options); } - void AssertAnyIs(const std::vector& json, bool expected) { + void AssertAnyIs( + const std::vector& json, + const std::shared_ptr& expected, + const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) { auto array = ChunkedArrayFromJSON(type_singleton(), json); - AssertAnyIs(array, expected); + AssertAnyIs(array, expected, options); } std::shared_ptr type_singleton() { @@ -967,26 +972,47 @@ class TestPrimitiveAnyKernel : public ::testing::Test { class TestAnyKernel : public TestPrimitiveAnyKernel {}; TEST_F(TestAnyKernel, Basics) { + auto true_value = std::make_shared(true); + auto false_value = std::make_shared(false); + auto null_value = std::make_shared(); + null_value->is_valid = false; + std::vector chunked_input0 = {"[]", "[true]"}; std::vector chunked_input1 = {"[true, true, null]", "[true, null]"}; std::vector chunked_input2 = {"[false, false, false]", "[false]"}; std::vector chunked_input3 = {"[false, null]", "[null, false]"}; std::vector chunked_input4 = {"[true, null]", "[null, false]"}; - this->AssertAnyIs("[]", false); - this->AssertAnyIs("[false]", false); - this->AssertAnyIs("[true, false]", true); - this->AssertAnyIs("[null, null, null]", false); - this->AssertAnyIs("[false, false, false]", false); - this->AssertAnyIs("[false, false, false, null]", false); - this->AssertAnyIs("[true, null, true, true]", true); - this->AssertAnyIs("[false, null, false, true]", true); - this->AssertAnyIs("[true, null, false, true]", true); - this->AssertAnyIs(chunked_input0, true); - this->AssertAnyIs(chunked_input1, true); - this->AssertAnyIs(chunked_input2, false); - this->AssertAnyIs(chunked_input3, false); - this->AssertAnyIs(chunked_input4, true); + this->AssertAnyIs("[]", false_value); + this->AssertAnyIs("[false]", false_value); + this->AssertAnyIs("[true, false]", true_value); + this->AssertAnyIs("[null, null, null]", false_value); + this->AssertAnyIs("[false, false, false]", false_value); + this->AssertAnyIs("[false, false, false, null]", false_value); + this->AssertAnyIs("[true, null, true, true]", true_value); + this->AssertAnyIs("[false, null, false, true]", true_value); + this->AssertAnyIs("[true, null, false, true]", true_value); + this->AssertAnyIs(chunked_input0, true_value); + this->AssertAnyIs(chunked_input1, true_value); + this->AssertAnyIs(chunked_input2, false_value); + this->AssertAnyIs(chunked_input3, false_value); + this->AssertAnyIs(chunked_input4, true_value); + + const ScalarAggregateOptions& keep_nulls = ScalarAggregateOptions(/*skip_nulls=*/false); + this->AssertAnyIs("[]", false_value, keep_nulls); + this->AssertAnyIs("[false]", false_value, keep_nulls); + this->AssertAnyIs("[true, false]", true_value, keep_nulls); + this->AssertAnyIs("[null, null, null]", null_value, keep_nulls); + this->AssertAnyIs("[false, false, false]", false_value, keep_nulls); + this->AssertAnyIs("[false, false, false, null]", null_value, keep_nulls); + this->AssertAnyIs("[true, null, true, true]", true_value, keep_nulls); + this->AssertAnyIs("[false, null, false, true]", true_value, keep_nulls); + this->AssertAnyIs("[true, null, false, true]", true_value, keep_nulls); + this->AssertAnyIs(chunked_input0, true_value, keep_nulls); + this->AssertAnyIs(chunked_input1, true_value, keep_nulls); + this->AssertAnyIs(chunked_input2, false_value, keep_nulls); + this->AssertAnyIs(chunked_input3, null_value, keep_nulls); + this->AssertAnyIs(chunked_input4, true_value, keep_nulls); } // @@ -995,21 +1021,26 @@ TEST_F(TestAnyKernel, Basics) { class TestPrimitiveAllKernel : public ::testing::Test { public: - void AssertAllIs(const Datum& array, bool expected) { - ASSERT_OK_AND_ASSIGN(Datum out, All(array)); + void AssertAllIs(const Datum& array, const std::shared_ptr& expected, + const ScalarAggregateOptions& options) { + ASSERT_OK_AND_ASSIGN(Datum out, All(array, options, nullptr)); const BooleanScalar& out_all = out.scalar_as(); - const auto expected_all = static_cast(expected); - ASSERT_EQ(out_all, expected_all); + ASSERT_EQ(out_all, *expected); } - void AssertAllIs(const std::string& json, bool expected) { + void AssertAllIs( + const std::string& json, const std::shared_ptr& expected, + const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) { auto array = ArrayFromJSON(type_singleton(), json); - AssertAllIs(array, expected); + AssertAllIs(array, expected, options); } - void AssertAllIs(const std::vector& json, bool expected) { + void AssertAllIs( + const std::vector& json, + const std::shared_ptr& expected, + const ScalarAggregateOptions& options = ScalarAggregateOptions::Defaults()) { auto array = ChunkedArrayFromJSON(type_singleton(), json); - AssertAllIs(array, expected); + AssertAllIs(array, expected, options); } std::shared_ptr type_singleton() { @@ -1020,6 +1051,11 @@ class TestPrimitiveAllKernel : public ::testing::Test { class TestAllKernel : public TestPrimitiveAllKernel {}; TEST_F(TestAllKernel, Basics) { + auto true_value = std::make_shared(true); + auto false_value = std::make_shared(false); + auto null_value = std::make_shared(); + null_value->is_valid = false; + std::vector chunked_input0 = {"[]", "[true]"}; std::vector chunked_input1 = {"[true, true, null]", "[true, null]"}; std::vector chunked_input2 = {"[false, false, false]", "[false]"}; @@ -1027,21 +1063,38 @@ TEST_F(TestAllKernel, Basics) { std::vector chunked_input4 = {"[true, null]", "[null, false]"}; std::vector chunked_input5 = {"[false, null]", "[null, true]"}; - this->AssertAllIs("[]", true); - this->AssertAllIs("[false]", false); - this->AssertAllIs("[true, false]", false); - this->AssertAllIs("[null, null, null]", true); - this->AssertAllIs("[false, false, false]", false); - this->AssertAllIs("[false, false, false, null]", false); - this->AssertAllIs("[true, null, true, true]", true); - this->AssertAllIs("[false, null, false, true]", false); - this->AssertAllIs("[true, null, false, true]", false); - this->AssertAllIs(chunked_input0, true); - this->AssertAllIs(chunked_input1, true); - this->AssertAllIs(chunked_input2, false); - this->AssertAllIs(chunked_input3, false); - this->AssertAllIs(chunked_input4, false); - this->AssertAllIs(chunked_input5, false); + this->AssertAllIs("[]", true_value); + this->AssertAllIs("[false]", false_value); + this->AssertAllIs("[true, false]", false_value); + this->AssertAllIs("[null, null, null]", true_value); + this->AssertAllIs("[false, false, false]", false_value); + this->AssertAllIs("[false, false, false, null]", false_value); + this->AssertAllIs("[true, null, true, true]", true_value); + this->AssertAllIs("[false, null, false, true]", false_value); + this->AssertAllIs("[true, null, false, true]", false_value); + this->AssertAllIs(chunked_input0, true_value); + this->AssertAllIs(chunked_input1, true_value); + this->AssertAllIs(chunked_input2, false_value); + this->AssertAllIs(chunked_input3, false_value); + this->AssertAllIs(chunked_input4, false_value); + this->AssertAllIs(chunked_input5, false_value); + + const ScalarAggregateOptions keep_nulls = ScalarAggregateOptions(/*skip_nulls=*/false); + this->AssertAllIs("[]", true_value, keep_nulls); + this->AssertAllIs("[false]", false_value, keep_nulls); + this->AssertAllIs("[true, false]", false_value, keep_nulls); + this->AssertAllIs("[null, null, null]", null_value, keep_nulls); + this->AssertAllIs("[false, false, false]", false_value, keep_nulls); + this->AssertAllIs("[false, false, false, null]", false_value, keep_nulls); + this->AssertAllIs("[true, null, true, true]", null_value, keep_nulls); + this->AssertAllIs("[false, null, false, true]", false_value, keep_nulls); + this->AssertAllIs("[true, null, false, true]", false_value, keep_nulls); + this->AssertAllIs(chunked_input0, true_value, keep_nulls); + this->AssertAllIs(chunked_input1, null_value, keep_nulls); + this->AssertAllIs(chunked_input2, false_value, keep_nulls); + this->AssertAllIs(chunked_input3, false_value, keep_nulls); + this->AssertAllIs(chunked_input4, false_value, keep_nulls); + this->AssertAllIs(chunked_input5, false_value, keep_nulls); } // diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index fc6c8b7c7e1..94ecd5ac45a 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -186,9 +186,9 @@ Aggregations +---------------+-------+-------------+----------------+----------------------------------+-------+ | Function name | Arity | Input types | Output type | Options class | Notes | +===============+=======+=============+================+==================================+=======+ -| all | Unary | Boolean | Scalar Boolean | | | +| all | Unary | Boolean | Scalar Boolean | :struct:`ScalarAggregateOptions` | \(1) | +---------------+-------+-------------+----------------+----------------------------------+-------+ -| any | Unary | Boolean | Scalar Boolean | | | +| any | Unary | Boolean | Scalar Boolean | :struct:`ScalarAggregateOptions` | \(1) | +---------------+-------+-------------+----------------+----------------------------------+-------+ | count | Unary | Any | Scalar Int64 | :struct:`ScalarAggregateOptions` | | +---------------+-------+-------------+----------------+----------------------------------+-------+ @@ -196,15 +196,15 @@ Aggregations +---------------+-------+-------------+----------------+----------------------------------+-------+ | mean | Unary | Numeric | Scalar Float64 | :struct:`ScalarAggregateOptions` | | +---------------+-------+-------------+----------------+----------------------------------+-------+ -| min_max | Unary | Numeric | Scalar Struct | :struct:`ScalarAggregateOptions` | \(1) | +| min_max | Unary | Numeric | Scalar Struct | :struct:`ScalarAggregateOptions` | \(2) | +---------------+-------+-------------+----------------+----------------------------------+-------+ -| mode | Unary | Numeric | Struct | :struct:`ModeOptions` | \(2) | +| mode | Unary | Numeric | Struct | :struct:`ModeOptions` | \(3) | +---------------+-------+-------------+----------------+----------------------------------+-------+ -| quantile | Unary | Numeric | Scalar Numeric | :struct:`QuantileOptions` | \(3) | +| quantile | Unary | Numeric | Scalar Numeric | :struct:`QuantileOptions` | \(4) | +---------------+-------+-------------+----------------+----------------------------------+-------+ | stddev | Unary | Numeric | Scalar Float64 | :struct:`VarianceOptions` | | +---------------+-------+-------------+----------------+----------------------------------+-------+ -| sum | Unary | Numeric | Scalar Numeric | :struct:`ScalarAggregateOptions` | \(4) | +| sum | Unary | Numeric | Scalar Numeric | :struct:`ScalarAggregateOptions` | \(5) | +---------------+-------+-------------+----------------+----------------------------------+-------+ | tdigest | Unary | Numeric | Scalar Float64 | :struct:`TDigestOptions` | | +---------------+-------+-------------+----------------+----------------------------------+-------+ @@ -213,18 +213,21 @@ Aggregations Notes: -* \(1) Output is a ``{"min": input type, "max": input type}`` Struct. +* \(1) If null values are taken into account by setting ScalarAggregateOptions + parameter skip_nulls = false then `Kleene logic`_ logic is applied. -* \(2) Output is an array of ``{"mode": input type, "count": Int64}`` Struct. +* \(2) Output is a ``{"min": input type, "max": input type}`` Struct. + +* \(3) Output is an array of ``{"mode": input type, "count": Int64}`` Struct. It contains the *N* most common elements in the input, in descending order, where *N* is given in :member:`ModeOptions::n`. If two values have the same count, the smallest one comes first. Note that the output can have less than *N* elements if the input has less than *N* distinct values. -* \(3) Output is Float64 or input type, depending on QuantileOptions. +* \(4) Output is Float64 or input type, depending on QuantileOptions. -* \(4) Output is Int64, UInt64 or Float64, depending on the input type. +* \(5) Output is Int64, UInt64 or Float64, depending on the input type. Element-wise ("scalar") functions --------------------------------- diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 37040ec86b5..b65970745ec 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -548,30 +548,40 @@ def test_min_max(): def test_any(): # ARROW-1846 + + options = pc.ScalarAggregateOptions(skip_nulls=False) a = pa.array([False, None, True]) assert pc.any(a).as_py() is True + assert pc.any(a, options=options).as_py() is True a = pa.array([False, None, False]) assert pc.any(a).as_py() is False + assert pc.any(a, options=options).as_py() is None def test_all(): # ARROW-10301 + options = pc.ScalarAggregateOptions(skip_nulls=False) a = pa.array([], type='bool') assert pc.all(a).as_py() is True + assert pc.all(a, options=options).as_py() is True a = pa.array([False, True]) assert pc.all(a).as_py() is False + assert pc.all(a, options=options).as_py() is False a = pa.array([True, None]) assert pc.all(a).as_py() is True + assert pc.all(a, options=options).as_py() is None a = pa.chunked_array([[True], [True, None]]) assert pc.all(a).as_py() is True + assert pc.all(a, options=options).as_py() is None a = pa.chunked_array([[True], [False]]) assert pc.all(a).as_py() is False + assert pc.all(a, options=options).as_py() is False def test_is_valid(): diff --git a/r/R/compute.R b/r/R/compute.R index 5a00e884980..4277ad8d6df 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -203,31 +203,12 @@ unique.ArrowDatum <- function(x, incomparables = FALSE, ...) { #' @export any.ArrowDatum <- function(..., na.rm = FALSE) { - - a <- collect_arrays_from_dots(list(...)) - result <- call_function("any", a) - - if (!na.rm && a$null_count > 0 && !as.vector(result)) { - # Three-valued logic: with na.rm = FALSE, any(c(TRUE, NA)) returns TRUE but any(c(FALSE, NA)) returns NA - # TODO: C++ library should take na.rm for any/all (like ARROW-9054) - Scalar$create(NA) - } else { - result - } + scalar_aggregate("any", ..., na.rm = na.rm) } #' @export all.ArrowDatum <- function(..., na.rm = FALSE) { - - a <- collect_arrays_from_dots(list(...)) - result <- call_function("all", a) - - if (!na.rm && a$null_count > 0 && as.vector(result)) { - # See comment above in any() about three-valued logic - Scalar$create(NA) - } else { - result - } + scalar_aggregate("all", ..., na.rm = na.rm) } #' `match` and `%in%` for Arrow objects diff --git a/r/src/compute.cpp b/r/src/compute.cpp index cfa895ecb1e..6bb55cbe208 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -172,7 +172,7 @@ std::shared_ptr make_compute_options( } if (func_name == "min_max" || func_name == "sum" || func_name == "mean" || - func_name == "count") { + func_name == "count" || func_name == "any" || func_name == "all") { using Options = arrow::compute::ScalarAggregateOptions; auto out = std::make_shared(Options::Defaults()); out->min_count = cpp11::as_cpp(options["na.min_count"]); diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R index 41418014bea..428f799c97b 100644 --- a/r/tests/testthat/test-compute-aggregate.R +++ b/r/tests/testthat/test-compute-aggregate.R @@ -394,12 +394,14 @@ test_that("any.Array and any.ChunkedArray", { data <- c(1:10, NA, NA) expect_vector_equal(any(input > 5), data) + expect_vector_equal(any(input > 5, na.rm = TRUE), data) expect_vector_equal(any(input < 1), data) expect_vector_equal(any(input < 1, na.rm = TRUE), data) data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE) expect_vector_equal(any(input), data_logical) + expect_vector_equal(any(input, na.rm = FALSE), data_logical) expect_vector_equal(any(input, na.rm = TRUE), data_logical) }) @@ -409,6 +411,8 @@ test_that("all.Array and all.ChunkedArray", { data <- c(1:10, NA, NA) expect_vector_equal(all(input > 5), data) + expect_vector_equal(all(input > 5, na.rm = TRUE), data) + expect_vector_equal(all(input < 11), data) expect_vector_equal(all(input < 11, na.rm = TRUE), data)