diff --git a/cpp/src/arrow/array/array_primitive.cc b/cpp/src/arrow/array/array_primitive.cc index 0f70e3c280e..519a7f21f43 100644 --- a/cpp/src/arrow/array/array_primitive.cc +++ b/cpp/src/arrow/array/array_primitive.cc @@ -22,6 +22,7 @@ #include "arrow/array/array_base.h" #include "arrow/type.h" +#include "arrow/util/bit_block_counter.h" #include "arrow/util/logging.h" namespace arrow { @@ -49,6 +50,31 @@ BooleanArray::BooleanArray(int64_t length, const std::shared_ptr& data, int64_t offset) : PrimitiveArray(boolean(), length, data, null_bitmap, null_count, offset) {} +int64_t BooleanArray::false_count() const { + return this->length() - this->null_count() - this->true_count(); +} + +int64_t BooleanArray::true_count() const { + if (data_->null_count.load() != 0) { + DCHECK(data_->buffers[0]); + internal::BinaryBitBlockCounter bit_counter(data_->buffers[0]->data(), data_->offset, + data_->buffers[1]->data(), data_->offset, + data_->length); + int64_t count = 0; + while (true) { + internal::BitBlockCount block = bit_counter.NextAndWord(); + if (block.length == 0) { + break; + } + count += block.popcount; + } + return count; + } else { + return internal::CountSetBits(data_->buffers[1]->data(), data_->offset, + data_->length); + } +} + // ---------------------------------------------------------------------- // Day time interval diff --git a/cpp/src/arrow/array/array_primitive.h b/cpp/src/arrow/array/array_primitive.h index e58f5f4c8b6..c58fee77cef 100644 --- a/cpp/src/arrow/array/array_primitive.h +++ b/cpp/src/arrow/array/array_primitive.h @@ -84,6 +84,14 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray { bool GetView(int64_t i) const { return Value(i); } + /// \brief Return the number of false (0) values among the valid + /// values. Result is not cached. + int64_t false_count() const; + + /// \brief Return the number of true (1) values among the valid + /// values. Result is not cached. + int64_t true_count() const; + protected: using PrimitiveArray::PrimitiveArray; }; diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index 21c49093585..24f180f73c5 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -656,6 +656,33 @@ void TestPrimitiveBuilder::Check(const std::unique_ptr ASSERT_EQ(0, builder->null_count()); } +TEST(TestBooleanArray, TrueCountFalseCount) { + random::RandomArrayGenerator rng(/*seed=*/0); + + const int64_t length = 10000; + auto arr = rng.Boolean(length, /*true_probability=*/0.5, /*null_probability=*/0.1); + + auto CheckArray = [&](const BooleanArray& values) { + int64_t expected_false = 0; + int64_t expected_true = 0; + for (int64_t i = 0; i < values.length(); ++i) { + if (values.IsValid(i)) { + if (values.Value(i)) { + ++expected_true; + } else { + ++expected_false; + } + } + } + ASSERT_EQ(values.true_count(), expected_true); + ASSERT_EQ(values.false_count(), expected_false); + }; + + CheckArray(checked_cast(*arr)); + CheckArray(checked_cast(*arr->Slice(5))); + CheckArray(checked_cast(*arr->Slice(0, 0))); +} + TEST(TestPrimitiveAdHoc, TestType) { Int8Builder i8(default_memory_pool()); ASSERT_TRUE(i8.type()->Equals(int8())); diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 94e30c8fe6b..8be7ebac7f4 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1158,6 +1158,13 @@ cdef class BooleanArray(Array): """ Concrete class for Arrow arrays of boolean data type. """ + @property + def false_count(self): + return ( self.ap).false_count() + + @property + def true_count(self): + return ( self.ap).true_count() cdef class NumericArray(Array): diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index d8084f0c401..757d8a70fdb 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -445,6 +445,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CBooleanArray" arrow::BooleanArray"(CArray): c_bool Value(int i) + int64_t false_count() + int64_t true_count() cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray): uint8_t Value(int i) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 8bfb072702a..883261e2031 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -1905,6 +1905,13 @@ def test_array_from_strided_bool(): assert result.equals(expected) +def test_boolean_true_count_false_count(): + # ARROW-9145 + arr = pa.array([True, True, None, False, None, True] * 1000) + assert arr.true_count == 3000 + assert arr.false_count == 1000 + + def test_buffers_primitive(): a = pa.array([1, 2, None, 4], type=pa.int16()) buffers = a.buffers()