Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/array/array_primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "arrow/array/array_base.h"
#include "arrow/array/data.h"
#include "arrow/type.h"
#include "arrow/type_fwd.h" // IWYU pragma: export
#include "arrow/type_traits.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/macros.h"
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ endif()
add_arrow_test(utility-test
SOURCES
align_util_test.cc
bit_block_counter_test.cc
bit_util_test.cc
checked_cast_test.cc
compression_test.cc
Expand Down Expand Up @@ -68,6 +69,7 @@ add_arrow_test(threading-utility-test
task_group_test
thread_pool_test)

add_arrow_benchmark(bit_block_counter_benchmark)
add_arrow_benchmark(bit_util_benchmark)
add_arrow_benchmark(compression_benchmark)
add_arrow_benchmark(decimal_benchmark)
Expand Down
148 changes: 116 additions & 32 deletions cpp/src/arrow/util/bit_block_counter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,59 +17,143 @@

#include "arrow/util/bit_block_counter.h"

#include <algorithm>
#include <cstdint>
#include <type_traits>

#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/ubsan.h"

namespace arrow {
namespace internal {

BitBlockCounter::Block BitBlockCounter::NextBlock() {
auto load_word = [](const uint8_t* bytes) -> uint64_t {
return BitUtil::ToLittleEndian(util::SafeLoadAs<uint64_t>(bytes));
};
auto shift_word = [](uint64_t current, uint64_t next, int64_t shift) -> uint64_t {
return (current >> shift) | (next << (64 - shift));
};
static constexpr int64_t kWordBits = 64;
static constexpr int64_t kFourWordsBits = 256;

// When the offset is > 0, we need there to be a word beyond the last aligned
// word in the bitmap for the bit shifting logic.
const int64_t bits_required_to_scan_words = offset_ == 0 ? 256 : 256 + (64 - offset_);
if (bits_remaining_ < bits_required_to_scan_words) {
// End of the bitmap, leave it to the caller to decide how to best check
// these bits, no need to do redundant computation here.
const int16_t run_length = static_cast<int16_t>(bits_remaining_);
bits_remaining_ -= run_length;
return {run_length, static_cast<int16_t>(CountSetBits(bitmap_, offset_, run_length))};
static inline uint64_t LoadWord(const uint8_t* bytes) {
return BitUtil::ToLittleEndian(util::SafeLoadAs<uint64_t>(bytes));
}

static inline uint64_t ShiftWord(uint64_t current, uint64_t next, int64_t shift) {
if (shift == 0) {
return current;
}
return (current >> shift) | (next << (64 - shift));
}

BitBlockCount BitBlockCounter::GetBlockSlow(int64_t block_size) {
const int16_t run_length = static_cast<int16_t>(std::min(bits_remaining_, block_size));
int16_t popcount = static_cast<int16_t>(CountSetBits(bitmap_, offset_, run_length));
bits_remaining_ -= run_length;
// This code path should trigger _at most_ 2 times. In the "two times"
// case, the first time the run length will be a multiple of 8 by construction
bitmap_ += run_length / 8;
return {run_length, popcount};
}

BitBlockCount BitBlockCounter::NextWord() {
if (!bits_remaining_) {
return {0, 0};
}
int64_t popcount = 0;
if (offset_ == 0) {
if (bits_remaining_ < kWordBits) {
return GetBlockSlow(kWordBits);
}
popcount = BitUtil::PopCount(LoadWord(bitmap_));
} else {
// When the offset is > 0, we need there to be a word beyond the last
// aligned word in the bitmap for the bit shifting logic.
if (bits_remaining_ < 2 * kWordBits - offset_) {
return GetBlockSlow(kWordBits);
}
popcount =
BitUtil::PopCount(ShiftWord(LoadWord(bitmap_), LoadWord(bitmap_ + 8), offset_));
}
bitmap_ += kWordBits / 8;
bits_remaining_ -= kWordBits;
return {64, static_cast<int16_t>(popcount)};
}

BitBlockCount BitBlockCounter::NextFourWords() {
if (!bits_remaining_) {
return {0, 0};
}
int64_t total_popcount = 0;
if (offset_ == 0) {
total_popcount += BitUtil::PopCount(load_word(bitmap_));
total_popcount += BitUtil::PopCount(load_word(bitmap_ + 8));
total_popcount += BitUtil::PopCount(load_word(bitmap_ + 16));
total_popcount += BitUtil::PopCount(load_word(bitmap_ + 24));
if (bits_remaining_ < kFourWordsBits) {
return GetBlockSlow(kFourWordsBits);
}
total_popcount += BitUtil::PopCount(LoadWord(bitmap_));
total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 8));
total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 16));
total_popcount += BitUtil::PopCount(LoadWord(bitmap_ + 24));
} else {
auto current = load_word(bitmap_);
auto next = load_word(bitmap_ + 8);
total_popcount += BitUtil::PopCount(shift_word(current, next, offset_));
// When the offset is > 0, we need there to be a word beyond the last
// aligned word in the bitmap for the bit shifting logic.
if (bits_remaining_ < 5 * kFourWordsBits - offset_) {
return GetBlockSlow(kFourWordsBits);
}
auto current = LoadWord(bitmap_);
auto next = LoadWord(bitmap_ + 8);
total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_));
current = next;
next = load_word(bitmap_ + 16);
total_popcount += BitUtil::PopCount(shift_word(current, next, offset_));
next = LoadWord(bitmap_ + 16);
total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_));
current = next;
next = load_word(bitmap_ + 24);
total_popcount += BitUtil::PopCount(shift_word(current, next, offset_));
next = LoadWord(bitmap_ + 24);
total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_));
current = next;
next = load_word(bitmap_ + 32);
total_popcount += BitUtil::PopCount(shift_word(current, next, offset_));
next = LoadWord(bitmap_ + 32);
total_popcount += BitUtil::PopCount(ShiftWord(current, next, offset_));
}
bitmap_ += BitUtil::BytesForBits(kTargetBlockLength);
bits_remaining_ -= 256;
bitmap_ += BitUtil::BytesForBits(kFourWordsBits);
bits_remaining_ -= kFourWordsBits;
return {256, static_cast<int16_t>(total_popcount)};
}

BitBlockCount BinaryBitBlockCounter::NextAndWord() {
if (!bits_remaining_) {
return {0, 0};
}

// When the offset is > 0, we need there to be a word beyond the last aligned
// word in the bitmap for the bit shifting logic.
const int64_t bits_required_to_use_words =
std::max(left_offset_ == 0 ? 64 : 64 + (64 - left_offset_),
right_offset_ == 0 ? 64 : 64 + (64 - right_offset_));
if (bits_remaining_ < bits_required_to_use_words) {
const int16_t run_length = static_cast<int16_t>(std::min(bits_remaining_, kWordBits));
int16_t popcount = 0;
for (int64_t i = 0; i < run_length; ++i) {
if (BitUtil::GetBit(left_bitmap_, left_offset_ + i) &&
BitUtil::GetBit(right_bitmap_, right_offset_ + i)) {
++popcount;
}
}
// This code path should trigger _at most_ 2 times. In the "two times"
// case, the first time the run length will be a multiple of 8.
left_bitmap_ += run_length / 8;
right_bitmap_ += run_length / 8;
bits_remaining_ -= run_length;
return {run_length, popcount};
}

int64_t popcount = 0;
if (left_offset_ == 0 && right_offset_ == 0) {
popcount = BitUtil::PopCount(LoadWord(left_bitmap_) & LoadWord(right_bitmap_));
} else {
auto left_word =
ShiftWord(LoadWord(left_bitmap_), LoadWord(left_bitmap_ + 8), left_offset_);
auto right_word =
ShiftWord(LoadWord(right_bitmap_), LoadWord(right_bitmap_ + 8), right_offset_);
popcount = BitUtil::PopCount(left_word & right_word);
}
left_bitmap_ += kWordBits / 8;
right_bitmap_ += kWordBits / 8;
bits_remaining_ -= kWordBits;
return {64, static_cast<int16_t>(popcount)};
}

} // namespace internal
} // namespace arrow
72 changes: 59 additions & 13 deletions cpp/src/arrow/util/bit_block_counter.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,37 +19,83 @@

#include <cstdint>

#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/ubsan.h"
#include "arrow/util/visibility.h"

namespace arrow {
namespace internal {

/// \brief A class that scans through a true/false bitmap to yield blocks of up
/// to 256 bits at a time along with their popcount. This is used to accelerate
/// processing of mostly-not-null array data.
/// \brief Return value from bit block counters: the total number of bits and
/// the number of set bits.
struct BitBlockCount {
int16_t length;
int16_t popcount;
};

/// \brief A class that scans through a true/false bitmap to compute popcounts
/// 64 or 256 bits at a time. This is used to accelerate processing of
/// mostly-not-null array data.
class ARROW_EXPORT BitBlockCounter {
public:
struct Block {
int16_t length;
int16_t popcount;
};

static constexpr int16_t kTargetBlockLength = 256;

BitBlockCounter(const uint8_t* bitmap, int64_t start_offset, int64_t length)
: bitmap_(bitmap + start_offset / 8),
bits_remaining_(length),
offset_(start_offset % 8) {}

/// \brief Return the next run of available bits, up to 256. The returned
/// pair contains the size of run and the number of true values
Block NextBlock();
/// \brief Return the next run of available bits, usually 256. The returned
/// pair contains the size of run and the number of true values. The last
/// block will have a length less than 256 if the bitmap length is not a
/// multiple of 256, and will return 0-length blocks in subsequent
/// invocations.
BitBlockCount NextFourWords();

/// \brief Return the next run of available bits, usually 64. The returned
/// pair contains the size of run and the number of true values. The last
/// block will have a length less than 64 if the bitmap length is not a
/// multiple of 64, and will return 0-length blocks in subsequent
/// invocations.
BitBlockCount NextWord();

private:
/// \brief Return block with the requested size when doing word-wise
/// computation is not possible due to inadequate bits remaining.
BitBlockCount GetBlockSlow(int64_t block_size);

const uint8_t* bitmap_;
int64_t bits_remaining_;
int64_t offset_;
};

/// \brief A class that computes popcounts on the result of bitwise operations
/// between two bitmaps, 64 bits at a time. A 64-bit word is loaded from each
/// bitmap, then the popcount is computed on e.g. the bitwise-and of the two
/// words.
class ARROW_EXPORT BinaryBitBlockCounter {
public:
BinaryBitBlockCounter(const uint8_t* left_bitmap, int64_t left_offset,
const uint8_t* right_bitmap, int64_t right_offset, int64_t length)
: left_bitmap_(left_bitmap + left_offset / 8),
left_offset_(left_offset % 8),
right_bitmap_(right_bitmap + right_offset / 8),
right_offset_(right_offset % 8),
bits_remaining_(length) {}

/// \brief Return the popcount of the bitwise-and of the next run of
/// available bits, up to 64. The returned pair contains the size of run and
/// the number of true values. The last block will have a length less than 64
/// if the bitmap length is not a multiple of 64, and will return 0-length
/// blocks in subsequent invocations.
BitBlockCount NextAndWord();

private:
const uint8_t* left_bitmap_;
int64_t left_offset_;
const uint8_t* right_bitmap_;
int64_t right_offset_;
int64_t bits_remaining_;
};

} // namespace internal
} // namespace arrow
Loading