Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cpp/src/arrow/compute/api_aggregate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ Result<Datum> MinMax(const Datum& value, const MinMaxOptions& options, ExecConte
return CallFunction("min_max", {value}, &options, ctx);
}

Result<Datum> Any(const Datum& value, ExecContext* ctx) {
return CallFunction("any", {value}, ctx);
}

Result<Datum> Mode(const Datum& value, const ModeOptions& options, ExecContext* ctx) {
return CallFunction("mode", {value}, &options, ctx);
}
Expand Down
16 changes: 15 additions & 1 deletion cpp/src/arrow/compute/api_aggregate.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,21 @@ Result<Datum> MinMax(const Datum& value,
const MinMaxOptions& options = MinMaxOptions::Defaults(),
ExecContext* ctx = NULLPTR);

/// \brief Calculate the modal (most common) values of a numeric array
/// \brief Test whether any element in a boolean array evaluates to true.
///
/// This function returns true if any of the elements in the array evaluates
/// to true and false otherwise. Null values are skipped.
///
/// \param[in] value input datum, expecting a boolean array
/// \param[in] ctx the function execution context, optional
/// \return resulting datum as a BooleanScalar

/// \since 3.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> Any(const Datum& value, ExecContext* ctx = NULLPTR);

/// \brief Calculate the modal (most common) value of a numeric array
///
/// This function returns top-n most common values and number of times they occur as
/// an array of `struct<mode: T, count: int64>`, where T is the input type.
Expand Down
49 changes: 49 additions & 0 deletions cpp/src/arrow/compute/kernels/aggregate_basic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,45 @@ std::unique_ptr<KernelState> MinMaxInit(KernelContext* ctx, const KernelInitArgs
return visitor.Create();
}

// ----------------------------------------------------------------------
// Any implementation

struct BooleanAnyImpl : public ScalarAggregator {
void Consume(KernelContext*, const ExecBatch& batch) override {
// short-circuit if seen a True already
if (this->any == true) {
return;
}

const auto& data = *batch[0].array();
arrow::internal::OptionalBinaryBitBlockCounter counter(
data.buffers[0], data.offset, data.buffers[1], data.offset, data.length);
int64_t position = 0;
while (position < data.length) {
const auto block = counter.NextAndBlock();
if (block.popcount > 0) {
this->any = true;
break;
}
position += block.length;
}
}

void MergeFrom(KernelContext*, KernelState&& src) override {
const auto& other = checked_cast<const BooleanAnyImpl&>(src);
this->any |= other.any;
}

void Finalize(KernelContext*, Datum* out) override {
out->value = std::make_shared<BooleanScalar>(this->any);
}
bool any = false;
};

std::unique_ptr<KernelState> AnyInit(KernelContext*, const KernelInitArgs& args) {
return ::arrow::internal::make_unique<BooleanAnyImpl>();
}

void AddBasicAggKernels(KernelInit init,
const std::vector<std::shared_ptr<DataType>>& types,
std::shared_ptr<DataType> out_ty, ScalarAggregateFunction* func,
Expand Down Expand Up @@ -198,6 +237,11 @@ const FunctionDoc min_max_doc{"Compute the minimum and maximum values of a numer
{"array"},
"MinMaxOptions"};

const FunctionDoc any_doc{
"Test whether any element in a boolean array evaluates to true.",
("Null values are ignored."),
{"array"}};

} // namespace

void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
Expand Down Expand Up @@ -268,6 +312,11 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
#endif

DCHECK_OK(registry->AddFunction(std::move(func)));

// any
func = std::make_shared<ScalarAggregateFunction>("any", Arity::Unary(), &any_doc);
aggregate::AddBasicAggKernels(aggregate::AnyInit, {boolean()}, boolean(), func.get());
DCHECK_OK(registry->AddFunction(std::move(func)));
}

} // namespace internal
Expand Down
53 changes: 53 additions & 0 deletions cpp/src/arrow/compute/kernels/aggregate_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,59 @@ TYPED_TEST(TestRandomNumericMinMaxKernel, RandomArrayMinMax) {
}
}

//
// Any
//

class TestPrimitiveAnyKernel : public ::testing::Test {
public:
void AssertAnyIs(const Datum& array, bool expected) {
ASSERT_OK_AND_ASSIGN(Datum out, Any(array));
const BooleanScalar& out_any = out.scalar_as<BooleanScalar>();
const auto expected_any = static_cast<const BooleanScalar>(expected);
ASSERT_EQ(out_any, expected_any);
}

void AssertAnyIs(const std::string& json, bool expected) {
auto array = ArrayFromJSON(type_singleton(), json);
AssertAnyIs(array, expected);
}

void AssertAnyIs(const std::vector<std::string>& json, bool expected) {
auto array = ChunkedArrayFromJSON(type_singleton(), json);
AssertAnyIs(array, expected);
}

std::shared_ptr<DataType> type_singleton() {
return TypeTraits<BooleanType>::type_singleton();
}
};

class TestAnyKernel : public TestPrimitiveAnyKernel {};

TEST_F(TestAnyKernel, Basics) {
std::vector<std::string> chunked_input0 = {"[]", "[true]"};
std::vector<std::string> chunked_input1 = {"[true, true, null]", "[true, null]"};
std::vector<std::string> chunked_input2 = {"[false, false, false]", "[false]"};
std::vector<std::string> chunked_input3 = {"[false, null]", "[null, false]"};
std::vector<std::string> chunked_input4 = {"[true, null]", "[null, false]"};

this->AssertAnyIs("[]", false);
this->AssertAnyIs("[false]", false);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please also test with an empty array.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

this->AssertAnyIs("[true, false]", true);
this->AssertAnyIs("[null, null, null]", false);
this->AssertAnyIs("[false, false, false]", false);
this->AssertAnyIs("[false, false, false, null]", false);
this->AssertAnyIs("[true, null, true, true]", true);
this->AssertAnyIs("[false, null, false, true]", true);
this->AssertAnyIs("[true, null, false, true]", true);
this->AssertAnyIs(chunked_input0, true);
this->AssertAnyIs(chunked_input1, true);
this->AssertAnyIs(chunked_input2, false);
this->AssertAnyIs(chunked_input3, false);
this->AssertAnyIs(chunked_input4, true);
}

//
// Mode
//
Expand Down
3 changes: 3 additions & 0 deletions docs/source/cpp/compute.rst
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ Aggregations
+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
| Function name | Arity | Input types | Output type | Options class |
+==========================+============+====================+=======================+============================================+
+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
| any | Unary | Boolean | Scalar Boolean | |
+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
| count | Unary | Any | Scalar Int64 | :struct:`CountOptions` |
+--------------------------+------------+--------------------+-----------------------+--------------------------------------------+
| mean | Unary | Numeric | Scalar Float64 | |
Expand Down
1 change: 1 addition & 0 deletions docs/source/python/api/compute.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ logic variants are provided (suffixed ``_kleene``). See User Guide for details.

and_
and_kleene
any
invert
or_
or_kleene
Expand Down
9 changes: 9 additions & 0 deletions python/pyarrow/tests/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,15 @@ def test_min_max():
s = pc.min_max()


def test_any():
# ARROW-1846
a = pa.array([False, None, True])
assert pc.any(a).as_py() is True

a = pa.array([False, None, False])
assert pc.any(a).as_py() is False


def test_is_valid():
# An example generated function wrapper without options
data = [4, 5, None]
Expand Down