diff --git a/cpp/src/arrow/array/array_primitive.h b/cpp/src/arrow/array/array_primitive.h index 000934ea45c..e58f5f4c8b6 100644 --- a/cpp/src/arrow/array/array_primitive.h +++ b/cpp/src/arrow/array/array_primitive.h @@ -26,6 +26,7 @@ #include "arrow/array/array_base.h" #include "arrow/array/data.h" #include "arrow/type.h" +#include "arrow/type_fwd.h" // IWYU pragma: export #include "arrow/type_traits.h" #include "arrow/util/bit_util.h" #include "arrow/util/macros.h" diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index bd31cc6d6a9..c968cdd1a43 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -41,6 +41,7 @@ endif() add_arrow_test(utility-test SOURCES align_util_test.cc + bit_block_counter_test.cc bit_util_test.cc checked_cast_test.cc compression_test.cc @@ -68,6 +69,7 @@ add_arrow_test(threading-utility-test task_group_test thread_pool_test) +add_arrow_benchmark(bit_block_counter_benchmark) add_arrow_benchmark(bit_util_benchmark) add_arrow_benchmark(compression_benchmark) add_arrow_benchmark(decimal_benchmark) diff --git a/cpp/src/arrow/util/bit_block_counter.cc b/cpp/src/arrow/util/bit_block_counter.cc index a596c07ef85..ca5c159350c 100644 --- a/cpp/src/arrow/util/bit_block_counter.cc +++ b/cpp/src/arrow/util/bit_block_counter.cc @@ -17,59 +17,143 @@ #include "arrow/util/bit_block_counter.h" +#include #include #include #include "arrow/util/bit_util.h" -#include "arrow/util/bitmap_ops.h" #include "arrow/util/ubsan.h" namespace arrow { namespace internal { -BitBlockCounter::Block BitBlockCounter::NextBlock() { - auto load_word = [](const uint8_t* bytes) -> uint64_t { - return BitUtil::ToLittleEndian(util::SafeLoadAs(bytes)); - }; - auto shift_word = [](uint64_t current, uint64_t next, int64_t shift) -> uint64_t { - return (current >> shift) | (next << (64 - shift)); - }; +static constexpr int64_t kWordBits = 64; +static constexpr int64_t kFourWordsBits = 256; - // When the offset is > 0, we need there to be a word beyond the last aligned - // word in the bitmap for the bit shifting logic. - const int64_t bits_required_to_scan_words = offset_ == 0 ? 256 : 256 + (64 - offset_); - if (bits_remaining_ < bits_required_to_scan_words) { - // End of the bitmap, leave it to the caller to decide how to best check - // these bits, no need to do redundant computation here. - const int16_t run_length = static_cast(bits_remaining_); - bits_remaining_ -= run_length; - return {run_length, static_cast(CountSetBits(bitmap_, offset_, run_length))}; +static inline uint64_t LoadWord(const uint8_t* bytes) { + return BitUtil::ToLittleEndian(util::SafeLoadAs(bytes)); +} + +static inline uint64_t ShiftWord(uint64_t current, uint64_t next, int64_t shift) { + if (shift == 0) { + return current; + } + return (current >> shift) | (next << (64 - shift)); +} + +BitBlockCount BitBlockCounter::GetBlockSlow(int64_t block_size) { + const int16_t run_length = static_cast(std::min(bits_remaining_, block_size)); + int16_t popcount = static_cast(CountSetBits(bitmap_, offset_, run_length)); + bits_remaining_ -= run_length; + // This code path should trigger _at most_ 2 times. In the "two times" + // case, the first time the run length will be a multiple of 8 by construction + bitmap_ += run_length / 8; + return {run_length, popcount}; +} + +BitBlockCount BitBlockCounter::NextWord() { + if (!bits_remaining_) { + return {0, 0}; + } + int64_t popcount = 0; + if (offset_ == 0) { + if (bits_remaining_ < kWordBits) { + return GetBlockSlow(kWordBits); + } + popcount = BitUtil::PopCount(LoadWord(bitmap_)); + } else { + // When the offset is > 0, we need there to be a word beyond the last + // aligned word in the bitmap for the bit shifting logic. + if (bits_remaining_ < 2 * kWordBits - offset_) { + return GetBlockSlow(kWordBits); + } + popcount = + BitUtil::PopCount(ShiftWord(LoadWord(bitmap_), LoadWord(bitmap_ + 8), offset_)); } + bitmap_ += kWordBits / 8; + bits_remaining_ -= kWordBits; + return {64, static_cast(popcount)}; +} +BitBlockCount BitBlockCounter::NextFourWords() { + if (!bits_remaining_) { + return {0, 0}; + } int64_t total_popcount = 0; if (offset_ == 0) { - total_popcount += BitUtil::PopCount(load_word(bitmap_)); - total_popcount += BitUtil::PopCount(load_word(bitmap_ + 8)); - total_popcount += BitUtil::PopCount(load_word(bitmap_ + 16)); - total_popcount += BitUtil::PopCount(load_word(bitmap_ + 24)); + if (bits_remaining_ < kFourWordsBits) { + return GetBlockSlow(kFourWordsBits); + } + total_popcount += BitUtil::PopCount(LoadWord(bitmap_)); + total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 8)); + total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 16)); + total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 24)); } else { - auto current = load_word(bitmap_); - auto next = load_word(bitmap_ + 8); - total_popcount += BitUtil::PopCount(shift_word(current, next, offset_)); + // When the offset is > 0, we need there to be a word beyond the last + // aligned word in the bitmap for the bit shifting logic. + if (bits_remaining_ < 5 * kFourWordsBits - offset_) { + return GetBlockSlow(kFourWordsBits); + } + auto current = LoadWord(bitmap_); + auto next = LoadWord(bitmap_ + 8); + total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_)); current = next; - next = load_word(bitmap_ + 16); - total_popcount += BitUtil::PopCount(shift_word(current, next, offset_)); + next = LoadWord(bitmap_ + 16); + total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_)); current = next; - next = load_word(bitmap_ + 24); - total_popcount += BitUtil::PopCount(shift_word(current, next, offset_)); + next = LoadWord(bitmap_ + 24); + total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_)); current = next; - next = load_word(bitmap_ + 32); - total_popcount += BitUtil::PopCount(shift_word(current, next, offset_)); + next = LoadWord(bitmap_ + 32); + total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_)); } - bitmap_ += BitUtil::BytesForBits(kTargetBlockLength); - bits_remaining_ -= 256; + bitmap_ += BitUtil::BytesForBits(kFourWordsBits); + bits_remaining_ -= kFourWordsBits; return {256, static_cast(total_popcount)}; } +BitBlockCount BinaryBitBlockCounter::NextAndWord() { + if (!bits_remaining_) { + return {0, 0}; + } + + // When the offset is > 0, we need there to be a word beyond the last aligned + // word in the bitmap for the bit shifting logic. + const int64_t bits_required_to_use_words = + std::max(left_offset_ == 0 ? 64 : 64 + (64 - left_offset_), + right_offset_ == 0 ? 64 : 64 + (64 - right_offset_)); + if (bits_remaining_ < bits_required_to_use_words) { + const int16_t run_length = static_cast(std::min(bits_remaining_, kWordBits)); + int16_t popcount = 0; + for (int64_t i = 0; i < run_length; ++i) { + if (BitUtil::GetBit(left_bitmap_, left_offset_ + i) && + BitUtil::GetBit(right_bitmap_, right_offset_ + i)) { + ++popcount; + } + } + // This code path should trigger _at most_ 2 times. In the "two times" + // case, the first time the run length will be a multiple of 8. + left_bitmap_ += run_length / 8; + right_bitmap_ += run_length / 8; + bits_remaining_ -= run_length; + return {run_length, popcount}; + } + + int64_t popcount = 0; + if (left_offset_ == 0 && right_offset_ == 0) { + popcount = BitUtil::PopCount(LoadWord(left_bitmap_) & LoadWord(right_bitmap_)); + } else { + auto left_word = + ShiftWord(LoadWord(left_bitmap_), LoadWord(left_bitmap_ + 8), left_offset_); + auto right_word = + ShiftWord(LoadWord(right_bitmap_), LoadWord(right_bitmap_ + 8), right_offset_); + popcount = BitUtil::PopCount(left_word & right_word); + } + left_bitmap_ += kWordBits / 8; + right_bitmap_ += kWordBits / 8; + bits_remaining_ -= kWordBits; + return {64, static_cast(popcount)}; +} + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/bit_block_counter.h b/cpp/src/arrow/util/bit_block_counter.h index 92e2b9d3aa4..3e01ddafdff 100644 --- a/cpp/src/arrow/util/bit_block_counter.h +++ b/cpp/src/arrow/util/bit_block_counter.h @@ -19,37 +19,83 @@ #include +#include "arrow/util/bit_util.h" +#include "arrow/util/bitmap_ops.h" +#include "arrow/util/ubsan.h" #include "arrow/util/visibility.h" namespace arrow { namespace internal { -/// \brief A class that scans through a true/false bitmap to yield blocks of up -/// to 256 bits at a time along with their popcount. This is used to accelerate -/// processing of mostly-not-null array data. +/// \brief Return value from bit block counters: the total number of bits and +/// the number of set bits. +struct BitBlockCount { + int16_t length; + int16_t popcount; +}; + +/// \brief A class that scans through a true/false bitmap to compute popcounts +/// 64 or 256 bits at a time. This is used to accelerate processing of +/// mostly-not-null array data. class ARROW_EXPORT BitBlockCounter { public: - struct Block { - int16_t length; - int16_t popcount; - }; - - static constexpr int16_t kTargetBlockLength = 256; - BitBlockCounter(const uint8_t* bitmap, int64_t start_offset, int64_t length) : bitmap_(bitmap + start_offset / 8), bits_remaining_(length), offset_(start_offset % 8) {} - /// \brief Return the next run of available bits, up to 256. The returned - /// pair contains the size of run and the number of true values - Block NextBlock(); + /// \brief Return the next run of available bits, usually 256. The returned + /// pair contains the size of run and the number of true values. The last + /// block will have a length less than 256 if the bitmap length is not a + /// multiple of 256, and will return 0-length blocks in subsequent + /// invocations. + BitBlockCount NextFourWords(); + + /// \brief Return the next run of available bits, usually 64. The returned + /// pair contains the size of run and the number of true values. The last + /// block will have a length less than 64 if the bitmap length is not a + /// multiple of 64, and will return 0-length blocks in subsequent + /// invocations. + BitBlockCount NextWord(); private: + /// \brief Return block with the requested size when doing word-wise + /// computation is not possible due to inadequate bits remaining. + BitBlockCount GetBlockSlow(int64_t block_size); + const uint8_t* bitmap_; int64_t bits_remaining_; int64_t offset_; }; +/// \brief A class that computes popcounts on the result of bitwise operations +/// between two bitmaps, 64 bits at a time. A 64-bit word is loaded from each +/// bitmap, then the popcount is computed on e.g. the bitwise-and of the two +/// words. +class ARROW_EXPORT BinaryBitBlockCounter { + public: + BinaryBitBlockCounter(const uint8_t* left_bitmap, int64_t left_offset, + const uint8_t* right_bitmap, int64_t right_offset, int64_t length) + : left_bitmap_(left_bitmap + left_offset / 8), + left_offset_(left_offset % 8), + right_bitmap_(right_bitmap + right_offset / 8), + right_offset_(right_offset % 8), + bits_remaining_(length) {} + + /// \brief Return the popcount of the bitwise-and of the next run of + /// available bits, up to 64. The returned pair contains the size of run and + /// the number of true values. The last block will have a length less than 64 + /// if the bitmap length is not a multiple of 64, and will return 0-length + /// blocks in subsequent invocations. + BitBlockCount NextAndWord(); + + private: + const uint8_t* left_bitmap_; + int64_t left_offset_; + const uint8_t* right_bitmap_; + int64_t right_offset_; + int64_t bits_remaining_; +}; + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/bit_block_counter_benchmark.cc b/cpp/src/arrow/util/bit_block_counter_benchmark.cc new file mode 100644 index 00000000000..429993442a3 --- /dev/null +++ b/cpp/src/arrow/util/bit_block_counter_benchmark.cc @@ -0,0 +1,266 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "benchmark/benchmark.h" + +#include +#include +#include + +#include "arrow/array/array_base.h" +#include "arrow/array/array_primitive.h" +#include "arrow/testing/random.h" +#include "arrow/util/bit_block_counter.h" +#include "arrow/util/bit_util.h" +#include "arrow/util/bitmap_reader.h" + +namespace arrow { +namespace internal { + +struct UnaryBitBlockBenchmark { + benchmark::State& state; + int64_t offset; + int64_t bitmap_length; + std::shared_ptr arr; + int64_t expected; + + explicit UnaryBitBlockBenchmark(benchmark::State& state, int64_t offset = 0) + : state(state), offset(offset), bitmap_length(1 << 20) { + random::RandomArrayGenerator rng(/*seed=*/0); + // State parameter is the average number of total values for each null + // value. So 100 means that 1 out of 100 on average are null. + double null_probability = 1. / static_cast(state.range(0)); + arr = rng.Int8(bitmap_length, 0, 100, null_probability); + + // Compute the expected result + this->expected = 0; + const auto& int8_arr = static_cast(*arr); + for (int64_t i = this->offset; i < bitmap_length; ++i) { + if (int8_arr.IsValid(i)) { + this->expected += int8_arr.Value(i); + } + } + } + + template + void BenchBitBlockCounter(NextBlockFunc&& next_block) { + const auto& int8_arr = static_cast(*arr); + const uint8_t* bitmap = arr->null_bitmap_data(); + for (auto _ : state) { + BitBlockCounter scanner(bitmap, this->offset, bitmap_length - this->offset); + int64_t result = 0; + int64_t position = this->offset; + while (true) { + BitBlockCount block = next_block(&scanner); + if (block.length == 0) { + break; + } + if (block.length == block.popcount) { + // All not-null + for (int64_t i = 0; i < block.length; ++i) { + result += int8_arr.Value(position + i); + } + } else if (block.popcount > 0) { + // Some but not all not-null + for (int64_t i = 0; i < block.length; ++i) { + if (BitUtil::GetBit(bitmap, position + i)) { + result += int8_arr.Value(position + i); + } + } + } + position += block.length; + } + // Sanity check + if (result != expected) { + std::abort(); + } + } + state.SetItemsProcessed(state.iterations() * bitmap_length); + } + + void BenchBitmapReader() { + const auto& int8_arr = static_cast(*arr); + for (auto _ : state) { + internal::BitmapReader bit_reader(arr->null_bitmap_data(), this->offset, + bitmap_length - this->offset); + int64_t result = 0; + for (int64_t i = this->offset; i < bitmap_length; ++i) { + if (bit_reader.IsSet()) { + result += int8_arr.Value(i); + } + bit_reader.Next(); + } + // Sanity check + if (result != expected) { + std::abort(); + } + } + state.SetItemsProcessed(state.iterations() * bitmap_length); + } +}; + +struct BinaryBitBlockBenchmark { + benchmark::State& state; + int64_t offset; + int64_t bitmap_length; + std::shared_ptr left; + std::shared_ptr right; + int64_t expected; + const Int8Array* left_int8; + const Int8Array* right_int8; + + explicit BinaryBitBlockBenchmark(benchmark::State& state, int64_t offset = 0) + : state(state), offset(offset), bitmap_length(1 << 20) { + random::RandomArrayGenerator rng(/*seed=*/0); + + // State parameter is the average number of total values for each null + // value. So 100 means that 1 out of 100 on average are null. + double null_probability = 1. / static_cast(state.range(0)); + left = rng.Int8(bitmap_length, 0, 100, null_probability); + right = rng.Int8(bitmap_length, 0, 50, null_probability); + left_int8 = static_cast(left.get()); + right_int8 = static_cast(right.get()); + + // Compute the expected result + expected = 0; + for (int64_t i = this->offset; i < bitmap_length; ++i) { + if (left_int8->IsValid(i) && right_int8->IsValid(i)) { + expected += left_int8->Value(i) + right_int8->Value(i); + } + } + } + + void BenchBitBlockCounter() { + const uint8_t* left_bitmap = left->null_bitmap_data(); + const uint8_t* right_bitmap = right->null_bitmap_data(); + for (auto _ : state) { + BinaryBitBlockCounter scanner(left_bitmap, this->offset, right_bitmap, this->offset, + bitmap_length - this->offset); + int64_t result = 0; + int64_t position = this->offset; + while (true) { + BitBlockCount block = scanner.NextAndWord(); + if (block.length == 0) { + break; + } + if (block.length == block.popcount) { + // All not-null + for (int64_t i = 0; i < block.length; ++i) { + result += left_int8->Value(position + i) + right_int8->Value(position + i); + } + } else if (block.popcount > 0) { + // Some but not all not-null + for (int64_t i = 0; i < block.length; ++i) { + if (BitUtil::GetBit(left_bitmap, position + i) && + BitUtil::GetBit(right_bitmap, position + i)) { + result += left_int8->Value(position + i) + right_int8->Value(position + i); + } + } + } + position += block.length; + } + // Sanity check + if (result != expected) { + std::abort(); + } + } + state.SetItemsProcessed(state.iterations() * bitmap_length); + } + + void BenchBitmapReader() { + for (auto _ : state) { + internal::BitmapReader left_reader(left->null_bitmap_data(), this->offset, + bitmap_length - this->offset); + internal::BitmapReader right_reader(right->null_bitmap_data(), this->offset, + bitmap_length - this->offset); + int64_t result = 0; + for (int64_t i = this->offset; i < bitmap_length; ++i) { + if (left_reader.IsSet() && right_reader.IsSet()) { + result += left_int8->Value(i) + right_int8->Value(i); + } + left_reader.Next(); + right_reader.Next(); + } + // Sanity check + if (result != expected) { + std::abort(); + } + } + state.SetItemsProcessed(state.iterations() * bitmap_length); + } +}; + +static void BitBlockCounterSum(benchmark::State& state) { + UnaryBitBlockBenchmark(state, /*offset=*/0) + .BenchBitBlockCounter([](BitBlockCounter* counter) { return counter->NextWord(); }); +} + +static void BitBlockCounterSumWithOffset(benchmark::State& state) { + UnaryBitBlockBenchmark(state, /*offset=*/4) + .BenchBitBlockCounter([](BitBlockCounter* counter) { return counter->NextWord(); }); +} + +static void BitBlockCounterFourWordsSum(benchmark::State& state) { + UnaryBitBlockBenchmark(state, /*offset=*/0) + .BenchBitBlockCounter( + [](BitBlockCounter* counter) { return counter->NextFourWords(); }); +} + +static void BitBlockCounterFourWordsSumWithOffset(benchmark::State& state) { + UnaryBitBlockBenchmark(state, /*offset=*/4) + .BenchBitBlockCounter( + [](BitBlockCounter* counter) { return counter->NextFourWords(); }); +} + +static void BitmapReaderSum(benchmark::State& state) { + UnaryBitBlockBenchmark(state, /*offset=*/0).BenchBitmapReader(); +} + +static void BitmapReaderSumWithOffset(benchmark::State& state) { + UnaryBitBlockBenchmark(state, /*offset=*/4).BenchBitmapReader(); +} + +static void BinaryBitBlockCounterSum(benchmark::State& state) { + BinaryBitBlockBenchmark(state, /*offset=*/0).BenchBitBlockCounter(); +} + +static void BinaryBitBlockCounterSumWithOffset(benchmark::State& state) { + BinaryBitBlockBenchmark(state, /*offset=*/4).BenchBitBlockCounter(); +} + +static void BinaryBitmapReaderSum(benchmark::State& state) { + BinaryBitBlockBenchmark(state, /*offset=*/0).BenchBitmapReader(); +} + +static void BinaryBitmapReaderSumWithOffset(benchmark::State& state) { + BinaryBitBlockBenchmark(state, /*offset=*/4).BenchBitmapReader(); +} + +// Range value: average number of total values per null +BENCHMARK(BitBlockCounterSum)->Range(2, 1 << 16); +BENCHMARK(BitBlockCounterSumWithOffset)->Range(2, 1 << 16); +BENCHMARK(BitBlockCounterFourWordsSum)->Range(2, 1 << 16); +BENCHMARK(BitBlockCounterFourWordsSumWithOffset)->Range(2, 1 << 16); +BENCHMARK(BitmapReaderSum)->Range(2, 1 << 16); +BENCHMARK(BitmapReaderSumWithOffset)->Range(2, 1 << 16); +BENCHMARK(BinaryBitBlockCounterSum)->Range(2, 1 << 16); +BENCHMARK(BinaryBitBlockCounterSumWithOffset)->Range(2, 1 << 16); +BENCHMARK(BinaryBitmapReaderSum)->Range(2, 1 << 16); +BENCHMARK(BinaryBitmapReaderSumWithOffset)->Range(2, 1 << 16); + +} // namespace internal +} // namespace arrow diff --git a/cpp/src/arrow/util/bit_block_counter_test.cc b/cpp/src/arrow/util/bit_block_counter_test.cc new file mode 100644 index 00000000000..d08e1f82c9e --- /dev/null +++ b/cpp/src/arrow/util/bit_block_counter_test.cc @@ -0,0 +1,250 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include + +#include + +#include "arrow/buffer.h" +#include "arrow/memory_pool.h" +#include "arrow/result.h" +#include "arrow/testing/gtest_common.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/util.h" +#include "arrow/util/bit_block_counter.h" +#include "arrow/util/bit_util.h" + +namespace arrow { +namespace internal { + +class TestBitBlockCounter : public ::testing::Test { + public: + void Create(int64_t nbytes, int64_t offset, int64_t length) { + ASSERT_OK_AND_ASSIGN(buf_, AllocateBuffer(nbytes)); + // Start with data zeroed out + std::memset(buf_->mutable_data(), 0, nbytes); + counter_.reset(new BitBlockCounter(buf_->data(), offset, length)); + } + + protected: + std::shared_ptr buf_; + std::unique_ptr counter_; +}; + +static constexpr int64_t kWordSize = 64; + +TEST_F(TestBitBlockCounter, OneWordBasics) { + const int64_t nbytes = 1024; + + Create(nbytes, 0, nbytes * 8); + + int64_t bits_scanned = 0; + for (int64_t i = 0; i < nbytes / 8; ++i) { + BitBlockCount block = counter_->NextWord(); + ASSERT_EQ(block.length, kWordSize); + ASSERT_EQ(block.popcount, 0); + bits_scanned += block.length; + } + ASSERT_EQ(bits_scanned, 1024 * 8); + + auto block = counter_->NextWord(); + ASSERT_EQ(block.length, 0); + ASSERT_EQ(block.popcount, 0); +} + +TEST_F(TestBitBlockCounter, FourWordsBasics) { + const int64_t nbytes = 1024; + + Create(nbytes, 0, nbytes * 8); + + int64_t bits_scanned = 0; + for (int64_t i = 0; i < nbytes / 32; ++i) { + BitBlockCount block = counter_->NextFourWords(); + ASSERT_EQ(block.length, 4 * kWordSize); + ASSERT_EQ(block.popcount, 0); + bits_scanned += block.length; + } + ASSERT_EQ(bits_scanned, 1024 * 8); + + auto block = counter_->NextFourWords(); + ASSERT_EQ(block.length, 0); + ASSERT_EQ(block.popcount, 0); +} + +TEST_F(TestBitBlockCounter, OneWordWithOffsets) { + auto CheckWithOffset = [&](int64_t offset) { + const int64_t nwords = 4; + + const int64_t total_bytes = nwords * 8 + 1; + // Trim a bit from the end of the bitmap so we can check the remainder bits + // behavior + Create(total_bytes, offset, nwords * kWordSize - offset - 1); + + // Start with data all set + std::memset(buf_->mutable_data(), 0xFF, total_bytes); + + BitBlockCount block = counter_->NextWord(); + ASSERT_EQ(kWordSize, block.length); + ASSERT_EQ(block.popcount, 64); + + // Add a false value to the next word + BitUtil::SetBitTo(buf_->mutable_data(), kWordSize + offset, false); + block = counter_->NextWord(); + ASSERT_EQ(block.length, 64); + ASSERT_EQ(block.popcount, 63); + + // Set the next word to all false + BitUtil::SetBitsTo(buf_->mutable_data(), 2 * kWordSize + offset, kWordSize, false); + + block = counter_->NextWord(); + ASSERT_EQ(block.length, 64); + ASSERT_EQ(block.popcount, 0); + + block = counter_->NextWord(); + ASSERT_EQ(block.length, kWordSize - offset - 1); + ASSERT_EQ(block.length, block.popcount); + + // We can keep calling NextWord safely + block = counter_->NextWord(); + ASSERT_EQ(block.length, 0); + ASSERT_EQ(block.popcount, 0); + }; + + for (int64_t offset_i = 0; offset_i < 8; ++offset_i) { + CheckWithOffset(offset_i); + } +} + +TEST_F(TestBitBlockCounter, FourWordsWithOffsets) { + auto CheckWithOffset = [&](int64_t offset) { + const int64_t nwords = 17; + + const int64_t total_bytes = nwords * 8 + 1; + // Trim a bit from the end of the bitmap so we can check the remainder bits + // behavior + Create(total_bytes, offset, nwords * kWordSize - offset - 1); + + // Start with data all set + std::memset(buf_->mutable_data(), 0xFF, total_bytes); + + BitBlockCount block = counter_->NextFourWords(); + ASSERT_EQ(block.length, 4 * kWordSize); + ASSERT_EQ(block.popcount, block.length); + + // Add some false values to the next 3 shifted words + BitUtil::SetBitTo(buf_->mutable_data(), 4 * kWordSize + offset, false); + BitUtil::SetBitTo(buf_->mutable_data(), 5 * kWordSize + offset, false); + BitUtil::SetBitTo(buf_->mutable_data(), 6 * kWordSize + offset, false); + block = counter_->NextFourWords(); + + ASSERT_EQ(block.length, 4 * kWordSize); + ASSERT_EQ(block.popcount, 253); + + // Set the next two words to all false + BitUtil::SetBitsTo(buf_->mutable_data(), 8 * kWordSize + offset, 2 * kWordSize, + false); + + // Block is half set + block = counter_->NextFourWords(); + ASSERT_EQ(block.length, 4 * kWordSize); + ASSERT_EQ(block.popcount, 128); + + // Last full block whether offset or no + block = counter_->NextFourWords(); + ASSERT_EQ(block.length, 4 * kWordSize); + ASSERT_EQ(block.length, block.popcount); + + // Partial block + block = counter_->NextFourWords(); + ASSERT_EQ(block.length, kWordSize - offset - 1); + ASSERT_EQ(block.length, block.popcount); + + // We can keep calling NextFourWords safely + block = counter_->NextFourWords(); + ASSERT_EQ(block.length, 0); + ASSERT_EQ(block.popcount, 0); + }; + + for (int64_t offset_i = 0; offset_i < 8; ++offset_i) { + CheckWithOffset(offset_i); + } +} + +TEST_F(TestBitBlockCounter, FourWordsRandomData) { + const int64_t nbytes = 1024; + auto buffer = *AllocateBuffer(nbytes); + random_bytes(nbytes, 0, buffer->mutable_data()); + + auto CheckWithOffset = [&](int64_t offset) { + BitBlockCounter counter(buffer->data(), offset, nbytes * 8 - offset); + for (int64_t i = 0; i < nbytes / 32; ++i) { + BitBlockCount block = counter.NextFourWords(); + ASSERT_EQ(block.popcount, + CountSetBits(buffer->data(), i * 256 + offset, block.length)); + } + }; + for (int64_t offset_i = 0; offset_i < 8; ++offset_i) { + CheckWithOffset(offset_i); + } +} + +TEST(TestBinaryBitBlockCounter, NextAndWord) { + const int64_t nbytes = 1024; + auto left = *AllocateBuffer(nbytes); + auto right = *AllocateBuffer(nbytes); + random_bytes(nbytes, 0, left->mutable_data()); + random_bytes(nbytes, 0, right->mutable_data()); + + auto CheckWithOffsets = [&](int left_offset, int right_offset) { + int64_t overlap_length = nbytes * 8 - std::max(left_offset, right_offset); + BinaryBitBlockCounter counter(left->data(), left_offset, right->data(), right_offset, + overlap_length); + int64_t position = 0; + while (true) { + BitBlockCount block = counter.NextAndWord(); + if (block.length == 0) { + break; + } + int expected_popcount = 0; + for (int j = 0; j < block.length; ++j) { + expected_popcount += + static_cast(BitUtil::GetBit(left->data(), position + left_offset + j) && + BitUtil::GetBit(right->data(), position + right_offset + j)); + } + ASSERT_EQ(block.popcount, expected_popcount); + position += block.length; + } + // We made it through all the data + ASSERT_EQ(position, overlap_length); + + BitBlockCount block = counter.NextAndWord(); + ASSERT_EQ(block.length, 0); + ASSERT_EQ(block.popcount, 0); + }; + + for (int left_i = 0; left_i < 8; ++left_i) { + for (int right_i = 0; right_i < 8; ++right_i) { + CheckWithOffsets(left_i, right_i); + } + } +} + +} // namespace internal +} // namespace arrow diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index ab8f91ef6fd..a409bf72057 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -17,19 +17,20 @@ #include "benchmark/benchmark.h" -#include #include #include -#include +#include +#include +#include +#include +#include #include "arrow/array/array_base.h" +#include "arrow/array/array_primitive.h" #include "arrow/buffer.h" -#include "arrow/builder.h" -#include "arrow/memory_pool.h" -#include "arrow/testing/gtest_util.h" +#include "arrow/result.h" #include "arrow/testing/random.h" #include "arrow/testing/util.h" -#include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap.h" #include "arrow/util/bitmap_generate.h" @@ -39,7 +40,6 @@ #include "arrow/util/bitmap_writer.h" namespace arrow { - namespace BitUtil { #ifdef ARROW_WITH_BENCHMARKS_REFERENCE @@ -396,114 +396,6 @@ static void BitmapEquals(benchmark::State& state) { state.SetBytesProcessed(state.iterations() * buffer_size); } -template -static void BitBlockCounterSumNotNull(benchmark::State& state) { - using internal::BitBlockCounter; - - random::RandomArrayGenerator rng(/*seed=*/0); - - const int64_t bitmap_length = 1 << 20; - - // State parameter is the average number of total values for each false - // value. So 100 means that 1 out of 100 on average are false. - double true_probability = 1. - 1. / state.range(0); - auto arr = rng.Int8(bitmap_length, 0, 100, true_probability); - - const uint8_t* bitmap = arr->null_bitmap_data(); - - // Compute the expected result - int64_t expected = 0; - const auto& int8_arr = static_cast(*arr); - for (int64_t i = Offset; i < bitmap_length; ++i) { - if (int8_arr.IsValid(i)) { - expected += int8_arr.Value(i); - } - } - for (auto _ : state) { - BitBlockCounter scanner(bitmap, Offset, bitmap_length - Offset); - int64_t result = 0; - int64_t position = Offset; - while (true) { - BitBlockCounter::Block block = scanner.NextBlock(); - if (block.length == 0) { - break; - } - if (block.length == block.popcount) { - // All not-null - for (int64_t i = 0; i < block.length; ++i) { - result += int8_arr.Value(position + i); - } - } else if (block.popcount > 0) { - // Some but not all not-null - for (int64_t i = 0; i < block.length; ++i) { - if (BitUtil::GetBit(bitmap, position + i)) { - result += int8_arr.Value(position + i); - } - } - } - position += block.length; - } - // Sanity check - if (result != expected) { - std::abort(); - } - } - state.SetItemsProcessed(state.iterations() * bitmap_length); -} - -template -static void BitmapReaderSumNotNull(benchmark::State& state) { - random::RandomArrayGenerator rng(/*seed=*/0); - - const int64_t bitmap_length = 1 << 20; - - // State parameter is the average number of total values for each false - // value. So 100 means that 1 out of 100 on average are false. - double true_probability = 1. - 1. / state.range(0); - auto arr = rng.Int8(bitmap_length, 0, 100, true_probability); - - const uint8_t* bitmap = arr->null_bitmap_data(); - // Compute the expected result - int64_t expected = 0; - const auto& int8_arr = static_cast(*arr); - for (int64_t i = Offset; i < bitmap_length; ++i) { - if (int8_arr.IsValid(i)) { - expected += int8_arr.Value(i); - } - } - for (auto _ : state) { - internal::BitmapReader bit_reader(bitmap, Offset, bitmap_length - Offset); - int64_t result = 0; - for (int64_t i = Offset; i < bitmap_length; ++i) { - if (bit_reader.IsSet()) { - result += int8_arr.Value(i); - } - bit_reader.Next(); - } - // Sanity check - if (result != expected) { - std::abort(); - } - } - state.SetItemsProcessed(state.iterations() * bitmap_length); -} - -static void BitBlockCounterSumNotNull(benchmark::State& state) { - BitBlockCounterSumNotNull<0>(state); -} - -static void BitBlockCounterSumNotNullWithOffset(benchmark::State& state) { - BitBlockCounterSumNotNull<4>(state); -} - -static void BitmapReaderSumNotNull(benchmark::State& state) { - BitmapReaderSumNotNull<0>(state); -} - -static void BitmapReaderSumNotNullWithOffset(benchmark::State& state) { - BitmapReaderSumNotNull<4>(state); -} - static void BitmapEqualsWithoutOffset(benchmark::State& state) { BitmapEquals<0>(state); } static void BitmapEqualsWithOffset(benchmark::State& state) { BitmapEquals<4>(state); } @@ -541,12 +433,6 @@ BENCHMARK(CopyBitmapWithOffsetBoth)->Arg(kBufferSize); BENCHMARK(BitmapEqualsWithoutOffset)->Arg(kBufferSize); BENCHMARK(BitmapEqualsWithOffset)->Arg(kBufferSize); -// Range value: average number of total values per null -BENCHMARK(BitBlockCounterSumNotNull)->Range(8, 1 << 16); -BENCHMARK(BitBlockCounterSumNotNullWithOffset)->Range(8, 1 << 16); -BENCHMARK(BitmapReaderSumNotNull)->Range(8, 1 << 16); -BENCHMARK(BitmapReaderSumNotNullWithOffset)->Range(8, 1 << 16); - #define AND_BENCHMARK_RANGES \ { \ {kBufferSize * 4, kBufferSize * 16}, { 0, 2 } \ diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 2eff46f0bfb..3772e9f77c6 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -33,7 +33,6 @@ #include "arrow/memory_pool.h" #include "arrow/testing/gtest_common.h" #include "arrow/testing/gtest_util.h" -#include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_stream_utils.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap.h" @@ -1557,106 +1556,5 @@ TEST(Bitmap, VisitWordsAnd) { } } -class TestBitBlockCounter : public ::testing::Test { - public: - void Create(int64_t nbytes, int64_t offset, int64_t length) { - ASSERT_OK_AND_ASSIGN(buf_, AllocateBuffer(nbytes)); - // Start with data zeroed out - std::memset(buf_->mutable_data(), 0, nbytes); - scanner_.reset(new BitBlockCounter(buf_->data(), offset, length)); - } - - protected: - std::shared_ptr buf_; - std::unique_ptr scanner_; -}; - -static constexpr int64_t kWordSize = 64; - -TEST_F(TestBitBlockCounter, Basics) { - const int64_t nbytes = 1024; - - Create(nbytes, 0, nbytes * 8); - - int64_t bits_scanned = 0; - for (int64_t i = 0; i < nbytes / 32; ++i) { - BitBlockCounter::Block block = scanner_->NextBlock(); - ASSERT_EQ(block.length, 4 * kWordSize); - ASSERT_EQ(block.popcount, 0); - bits_scanned += block.length; - } - ASSERT_EQ(bits_scanned, 1024 * 8); - - auto block = scanner_->NextBlock(); - ASSERT_EQ(block.length, 0); - ASSERT_EQ(block.popcount, 0); -} - -TEST_F(TestBitBlockCounter, Offsets) { - auto CheckWithOffset = [&](int64_t offset) { - const int64_t nwords = 15; - - const int64_t total_bytes = nwords * 8 + 1; - // Trim a bit from the end of the bitmap so we can check the remainder bits - // behavior - Create(total_bytes, offset, nwords * kWordSize - offset - 1); - - // Start with data all set - std::memset(buf_->mutable_data(), 0xFF, total_bytes); - - BitBlockCounter::Block block = scanner_->NextBlock(); - ASSERT_EQ(4 * kWordSize, block.length); - ASSERT_EQ(block.popcount, 256); - - // Add some false values to the next 3 shifted words - BitUtil::SetBitTo(buf_->mutable_data(), 4 * kWordSize + offset, false); - BitUtil::SetBitTo(buf_->mutable_data(), 5 * kWordSize + offset, false); - BitUtil::SetBitTo(buf_->mutable_data(), 6 * kWordSize + offset, false); - block = scanner_->NextBlock(); - - ASSERT_EQ(block.length, 256); - ASSERT_EQ(block.popcount, 253); - - BitUtil::SetBitsTo(buf_->mutable_data(), 8 * kWordSize + offset, 2 * kWordSize, - false); - - block = scanner_->NextBlock(); - ASSERT_EQ(block.length, 256); - ASSERT_EQ(block.popcount, 128); - - // Last block - block = scanner_->NextBlock(); - ASSERT_EQ(block.length, 3 * kWordSize - offset - 1); - ASSERT_EQ(block.length, block.popcount); - - // We can keep calling NextBlock safely - block = scanner_->NextBlock(); - ASSERT_EQ(block.length, 0); - ASSERT_EQ(block.popcount, 0); - }; - - for (int64_t offset_i = 0; offset_i < 7; ++offset_i) { - CheckWithOffset(offset_i); - } -} - -TEST_F(TestBitBlockCounter, RandomData) { - const int64_t nbytes = 1024; - auto buffer = *AllocateBuffer(nbytes); - random_bytes(nbytes, 0, buffer->mutable_data()); - - auto CheckWithOffset = [&](int64_t offset) { - BitBlockCounter scanner(buffer->data(), offset, nbytes * 8 - offset); - for (int64_t i = 0; i < nbytes / 32; ++i) { - BitBlockCounter::Block block = scanner.NextBlock(); - ASSERT_EQ(block.popcount, - CountSetBits(buffer->data(), i * 256 + offset, block.length)); - } - }; - for (int64_t offset_i = 0; offset_i < 7; ++offset_i) { - CheckWithOffset(offset_i); - } -} - } // namespace internal } // namespace arrow