Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/concatenate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ static Status ConcatenateBitmaps(const std::vector<Bitmap>& bitmaps, MemoryPool*
BitUtil::SetBitsTo(dst, bitmap_offset, bitmap.range.length, true);
} else {
internal::CopyBitmap(bitmap.data, bitmap.range.offset, bitmap.range.length, dst,
bitmap_offset, false);
bitmap_offset);
}
bitmap_offset += bitmap.range.length;
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/util/bit_util_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ static void CopyBitmap(benchmark::State& state) { // NOLINT non-const reference
auto copy = *AllocateEmptyBitmap(length);

for (auto _ : state) {
internal::CopyBitmap(src, OffsetSrc, length, copy->mutable_data(), OffsetDest, false);
internal::CopyBitmap(src, OffsetSrc, length, copy->mutable_data(), OffsetDest);
}

state.SetBytesProcessed(state.iterations() * buffer_size);
Expand Down Expand Up @@ -386,7 +386,7 @@ static void BitmapEquals(benchmark::State& state) {
const int64_t length = bits_size - offset;

auto copy = *AllocateEmptyBitmap(length + offset);
internal::CopyBitmap(src, 0, length, copy->mutable_data(), offset, false);
internal::CopyBitmap(src, 0, length, copy->mutable_data(), offset);

for (auto _ : state) {
auto is_same = internal::BitmapEquals(src, 0, copy->data(), offset, length);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/util/bit_util_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1073,7 +1073,7 @@ TEST(BitUtilTests, TestBitmapEquals) {
for (int64_t offset_dst : offsets) {
const auto bit_length = num_bits - offset_src;

internal::CopyBitmap(src, offset_src, bit_length, dst, offset_dst, false);
internal::CopyBitmap(src, offset_src, bit_length, dst, offset_dst);
ASSERT_TRUE(internal::BitmapEquals(src, offset_src, dst, offset_dst, bit_length));

// test negative cases by flip some bit at head and tail
Expand Down
69 changes: 32 additions & 37 deletions cpp/src/arrow/util/bitmap_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -282,14 +282,13 @@ class BitmapWordWriter {

} // namespace

template <bool invert_bits, bool restore_trailing_bits>
enum class TransferMode : bool { Copy, Invert };

template <TransferMode mode>
void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length,
int64_t dest_offset, uint8_t* dest) {
int64_t byte_offset = offset / 8;
int64_t bit_offset = offset % 8;
int64_t dest_byte_offset = dest_offset / 8;
int64_t dest_bit_offset = dest_offset % 8;
int64_t num_bytes = BitUtil::BytesForBits(length);

if (bit_offset || dest_bit_offset) {
auto reader = internal::BitmapWordReader<uint64_t>(data, offset, length);
Expand All @@ -298,52 +297,52 @@ void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length,
auto nwords = reader.words();
while (nwords--) {
auto word = reader.NextWord();
writer.PutNextWord(invert_bits ? ~word : word);
writer.PutNextWord(mode == TransferMode::Invert ? ~word : word);
}
auto nbytes = reader.trailing_bytes();
while (nbytes--) {
int valid_bits;
auto byte = reader.NextTrailingByte(valid_bits);
writer.PutNextTrailingByte(invert_bits ? ~byte : byte, valid_bits);
writer.PutNextTrailingByte(mode == TransferMode::Invert ? ~byte : byte, valid_bits);
}
} else {
// Shift dest by its byte offset
dest += dest_byte_offset;
} else if (length) {
int64_t num_bytes = BitUtil::BytesForBits(length);

// Shift by its byte offset
data += offset / 8;
dest += dest_offset / 8;

// Take care of the trailing bits in the last byte
// E.g., if trailing_bits = 5, last byte should be
// - low 3 bits: new bits from last byte of data buffer
// - high 5 bits: old bits from last byte of dest buffer
int64_t trailing_bits = num_bytes * 8 - length;
uint8_t trail = 0;
if (trailing_bits && restore_trailing_bits) {
trail = dest[num_bytes - 1];
}
uint8_t trail_mask = (1U << (8 - trailing_bits)) - 1;
uint8_t last_data;

if (invert_bits) {
for (int64_t i = 0; i < num_bytes; i++) {
dest[i] = static_cast<uint8_t>(~(data[byte_offset + i]));
if (mode == TransferMode::Invert) {
for (int64_t i = 0; i < num_bytes - 1; i++) {
dest[i] = static_cast<uint8_t>(~(data[i]));
}
last_data = ~data[num_bytes - 1];
} else {
std::memcpy(dest, data + byte_offset, static_cast<size_t>(num_bytes));
std::memcpy(dest, data, static_cast<size_t>(num_bytes - 1));
last_data = data[num_bytes - 1];
}

if (restore_trailing_bits) {
for (int i = 0; i < trailing_bits; i++) {
if (BitUtil::GetBit(&trail, i + 8 - trailing_bits)) {
BitUtil::SetBit(dest, length + i);
} else {
BitUtil::ClearBit(dest, length + i);
}
}
}
// Set last byte
dest[num_bytes - 1] &= ~trail_mask;
dest[num_bytes - 1] |= last_data & trail_mask;
}
}

template <bool invert_bits>
template <TransferMode mode>
Result<std::shared_ptr<Buffer>> TransferBitmap(MemoryPool* pool, const uint8_t* data,
int64_t offset, int64_t length) {
ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateEmptyBitmap(length, pool));
uint8_t* dest = buffer->mutable_data();

TransferBitmap<invert_bits, false>(data, offset, length, 0, dest);
TransferBitmap<mode>(data, offset, length, 0, dest);

// As we have freshly allocated this bitmap, we should take care of zeroing the
// remaining bits.
Expand All @@ -357,28 +356,24 @@ Result<std::shared_ptr<Buffer>> TransferBitmap(MemoryPool* pool, const uint8_t*
}

void CopyBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest,
int64_t dest_offset, bool restore_trailing_bits) {
if (restore_trailing_bits) {
TransferBitmap<false, true>(data, offset, length, dest_offset, dest);
} else {
TransferBitmap<false, false>(data, offset, length, dest_offset, dest);
}
int64_t dest_offset) {
TransferBitmap<TransferMode::Copy>(data, offset, length, dest_offset, dest);
}

void InvertBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest,
int64_t dest_offset) {
TransferBitmap<true, true>(data, offset, length, dest_offset, dest);
TransferBitmap<TransferMode::Invert>(data, offset, length, dest_offset, dest);
}

Result<std::shared_ptr<Buffer>> CopyBitmap(MemoryPool* pool, const uint8_t* data,
int64_t offset, int64_t length) {
return TransferBitmap<false>(pool, data, offset, length);
return TransferBitmap<TransferMode::Copy>(pool, data, offset, length);
}

Result<std::shared_ptr<Buffer>> InvertBitmap(MemoryPool* pool, const uint8_t* data,
int64_t offset, int64_t length,
std::shared_ptr<Buffer>* out) {
return TransferBitmap<true>(pool, data, offset, length);
return TransferBitmap<TransferMode::Invert>(pool, data, offset, length);
}

bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right,
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/arrow/util/bitmap_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,11 @@ Result<std::shared_ptr<Buffer>> CopyBitmap(MemoryPool* pool, const uint8_t* bitm
/// \param[in] offset bit offset into the source data
/// \param[in] length number of bits to copy
/// \param[in] dest_offset bit offset into the destination
/// \param[in] restore_trailing_bits don't clobber bits outside the destination range
/// \param[out] dest the destination buffer, must have at least space for
/// (offset + length) bits
ARROW_EXPORT
void CopyBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest,
int64_t dest_offset, bool restore_trailing_bits = true);
int64_t dest_offset);

/// Invert a bit range of an existing bitmap into an existing bitmap
///
Expand Down