Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions cpp/src/arrow/util/bit_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,50 @@ void FillBitsFromBytes(const std::vector<uint8_t>& bytes, uint8_t* bits) {

} // namespace

void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set) {
if (length == 0) {
return;
}

const int64_t i_begin = start_offset;
const int64_t i_end = start_offset + length;
const uint8_t fill_byte = static_cast<uint8_t>(-static_cast<uint8_t>(bits_are_set));

const int64_t bytes_begin = i_begin / 8;
const int64_t bytes_end = i_end / 8 + 1;

const uint8_t first_byte_mask = kPrecedingBitmask[i_begin % 8];
const uint8_t last_byte_mask = kTrailingBitmask[i_end % 8];

if (bytes_end == bytes_begin + 1) {
// set bits within a single byte
const uint8_t only_byte_mask =
i_end % 8 == 0 ? first_byte_mask
: static_cast<uint8_t>(first_byte_mask | last_byte_mask);
bits[bytes_begin] &= only_byte_mask;
bits[bytes_begin] |= static_cast<uint8_t>(fill_byte & ~only_byte_mask);
return;
}

// set/clear trailing bits of first byte
bits[bytes_begin] &= first_byte_mask;
bits[bytes_begin] |= static_cast<uint8_t>(fill_byte & ~first_byte_mask);

if (bytes_end - bytes_begin > 2) {
// set/clear whole bytes
std::memset(bits + bytes_begin + 1, fill_byte,
static_cast<size_t>(bytes_end - bytes_begin - 2));
}

if (i_end % 8 == 0) {
return;
}

// set/clear leading bits of last byte
bits[bytes_end - 1] &= last_byte_mask;
bits[bytes_end - 1] |= static_cast<uint8_t>(fill_byte & ~last_byte_mask);
}

Result<std::shared_ptr<Buffer>> BytesToBits(const std::vector<uint8_t>& bytes,
MemoryPool* pool) {
int64_t bit_length = BytesForBits(bytes.size());
Expand Down
42 changes: 2 additions & 40 deletions cpp/src/arrow/util/bit_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -460,46 +460,8 @@ static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
}

/// \brief set or clear a range of bits quickly
static inline void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length,
bool bits_are_set) {
if (length == 0) return;

const auto i_begin = start_offset;
const auto i_end = start_offset + length;
const uint8_t fill_byte = static_cast<uint8_t>(-static_cast<uint8_t>(bits_are_set));

const auto bytes_begin = i_begin / 8;
const auto bytes_end = i_end / 8 + 1;

const auto first_byte_mask = kPrecedingBitmask[i_begin % 8];
const auto last_byte_mask = kTrailingBitmask[i_end % 8];

if (bytes_end == bytes_begin + 1) {
// set bits within a single byte
const auto only_byte_mask =
i_end % 8 == 0 ? first_byte_mask
: static_cast<uint8_t>(first_byte_mask | last_byte_mask);
bits[bytes_begin] &= only_byte_mask;
bits[bytes_begin] |= static_cast<uint8_t>(fill_byte & ~only_byte_mask);
return;
}

// set/clear trailing bits of first byte
bits[bytes_begin] &= first_byte_mask;
bits[bytes_begin] |= static_cast<uint8_t>(fill_byte & ~first_byte_mask);

if (bytes_end - bytes_begin > 2) {
// set/clear whole bytes
std::memset(bits + bytes_begin + 1, fill_byte,
static_cast<size_t>(bytes_end - bytes_begin - 2));
}

if (i_end % 8 == 0) return;

// set/clear leading bits of last byte
bits[bytes_end - 1] &= last_byte_mask;
bits[bytes_end - 1] |= static_cast<uint8_t>(fill_byte & ~last_byte_mask);
}
ARROW_EXPORT
void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set);

/// \brief Convert vector of bytes to bitmap buffer
ARROW_EXPORT
Expand Down
11 changes: 11 additions & 0 deletions cpp/src/arrow/util/bit_util_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,16 @@ static void VisitBitsUnrolled(benchmark::State& state) {
BenchmarkVisitBits<VisitBitsUnrolledFunctor>(state, state.range(0));
}

static void SetBitsTo(benchmark::State& state) {
int64_t nbytes = state.range(0);
std::shared_ptr<Buffer> buffer = CreateRandomBuffer(nbytes);

for (auto _ : state) {
BitUtil::SetBitsTo(buffer->mutable_data(), /*offset=*/0, nbytes * 8, true);
}
state.SetBytesProcessed(state.iterations() * nbytes);
}

constexpr int64_t kBufferSize = 1024 * 8;

template <int64_t Offset = 0>
Expand Down Expand Up @@ -364,6 +374,7 @@ BENCHMARK(ReferenceNaiveBitmapReader)->Arg(kBufferSize);
BENCHMARK(BitmapReader)->Arg(kBufferSize);
BENCHMARK(VisitBits)->Arg(kBufferSize);
BENCHMARK(VisitBitsUnrolled)->Arg(kBufferSize);
BENCHMARK(SetBitsTo)->Arg(2)->Arg(1 << 4)->Arg(1 << 10)->Arg(1 << 17);

#ifdef ARROW_WITH_BENCHMARKS_REFERENCE
static void ReferenceNaiveBitmapWriter(benchmark::State& state) {
Expand Down