diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc index e1f6d71dc55..42bdee0914b 100644 --- a/cpp/src/arrow/array/concatenate.cc +++ b/cpp/src/arrow/array/concatenate.cc @@ -79,7 +79,7 @@ static Status ConcatenateBitmaps(const std::vector& bitmaps, MemoryPool* BitUtil::SetBitsTo(dst, bitmap_offset, bitmap.range.length, true); } else { internal::CopyBitmap(bitmap.data, bitmap.range.offset, bitmap.range.length, dst, - bitmap_offset, false); + bitmap_offset); } bitmap_offset += bitmap.range.length; } diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index a409bf72057..10047d23390 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -355,7 +355,7 @@ static void CopyBitmap(benchmark::State& state) { // NOLINT non-const reference auto copy = *AllocateEmptyBitmap(length); for (auto _ : state) { - internal::CopyBitmap(src, OffsetSrc, length, copy->mutable_data(), OffsetDest, false); + internal::CopyBitmap(src, OffsetSrc, length, copy->mutable_data(), OffsetDest); } state.SetBytesProcessed(state.iterations() * buffer_size); @@ -386,7 +386,7 @@ static void BitmapEquals(benchmark::State& state) { const int64_t length = bits_size - offset; auto copy = *AllocateEmptyBitmap(length + offset); - internal::CopyBitmap(src, 0, length, copy->mutable_data(), offset, false); + internal::CopyBitmap(src, 0, length, copy->mutable_data(), offset); for (auto _ : state) { auto is_same = internal::BitmapEquals(src, 0, copy->data(), offset, length); diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 3772e9f77c6..28fa54f29d4 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1073,7 +1073,7 @@ TEST(BitUtilTests, TestBitmapEquals) { for (int64_t offset_dst : offsets) { const auto bit_length = num_bits - offset_src; - internal::CopyBitmap(src, offset_src, bit_length, dst, offset_dst, false); + internal::CopyBitmap(src, offset_src, bit_length, dst, offset_dst); ASSERT_TRUE(internal::BitmapEquals(src, offset_src, dst, offset_dst, bit_length)); // test negative cases by flip some bit at head and tail diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc index adc95cba8df..c3a02f23e4a 100644 --- a/cpp/src/arrow/util/bitmap_ops.cc +++ b/cpp/src/arrow/util/bitmap_ops.cc @@ -282,14 +282,13 @@ class BitmapWordWriter { } // namespace -template +enum class TransferMode : bool { Copy, Invert }; + +template void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length, int64_t dest_offset, uint8_t* dest) { - int64_t byte_offset = offset / 8; int64_t bit_offset = offset % 8; - int64_t dest_byte_offset = dest_offset / 8; int64_t dest_bit_offset = dest_offset % 8; - int64_t num_bytes = BitUtil::BytesForBits(length); if (bit_offset || dest_bit_offset) { auto reader = internal::BitmapWordReader(data, offset, length); @@ -298,52 +297,52 @@ void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length, auto nwords = reader.words(); while (nwords--) { auto word = reader.NextWord(); - writer.PutNextWord(invert_bits ? ~word : word); + writer.PutNextWord(mode == TransferMode::Invert ? ~word : word); } auto nbytes = reader.trailing_bytes(); while (nbytes--) { int valid_bits; auto byte = reader.NextTrailingByte(valid_bits); - writer.PutNextTrailingByte(invert_bits ? ~byte : byte, valid_bits); + writer.PutNextTrailingByte(mode == TransferMode::Invert ? ~byte : byte, valid_bits); } - } else { - // Shift dest by its byte offset - dest += dest_byte_offset; + } else if (length) { + int64_t num_bytes = BitUtil::BytesForBits(length); + + // Shift by its byte offset + data += offset / 8; + dest += dest_offset / 8; // Take care of the trailing bits in the last byte + // E.g., if trailing_bits = 5, last byte should be + // - low 3 bits: new bits from last byte of data buffer + // - high 5 bits: old bits from last byte of dest buffer int64_t trailing_bits = num_bytes * 8 - length; - uint8_t trail = 0; - if (trailing_bits && restore_trailing_bits) { - trail = dest[num_bytes - 1]; - } + uint8_t trail_mask = (1U << (8 - trailing_bits)) - 1; + uint8_t last_data; - if (invert_bits) { - for (int64_t i = 0; i < num_bytes; i++) { - dest[i] = static_cast(~(data[byte_offset + i])); + if (mode == TransferMode::Invert) { + for (int64_t i = 0; i < num_bytes - 1; i++) { + dest[i] = static_cast(~(data[i])); } + last_data = ~data[num_bytes - 1]; } else { - std::memcpy(dest, data + byte_offset, static_cast(num_bytes)); + std::memcpy(dest, data, static_cast(num_bytes - 1)); + last_data = data[num_bytes - 1]; } - if (restore_trailing_bits) { - for (int i = 0; i < trailing_bits; i++) { - if (BitUtil::GetBit(&trail, i + 8 - trailing_bits)) { - BitUtil::SetBit(dest, length + i); - } else { - BitUtil::ClearBit(dest, length + i); - } - } - } + // Set last byte + dest[num_bytes - 1] &= ~trail_mask; + dest[num_bytes - 1] |= last_data & trail_mask; } } -template +template Result> TransferBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t length) { ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateEmptyBitmap(length, pool)); uint8_t* dest = buffer->mutable_data(); - TransferBitmap(data, offset, length, 0, dest); + TransferBitmap(data, offset, length, 0, dest); // As we have freshly allocated this bitmap, we should take care of zeroing the // remaining bits. @@ -357,28 +356,24 @@ Result> TransferBitmap(MemoryPool* pool, const uint8_t* } void CopyBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest, - int64_t dest_offset, bool restore_trailing_bits) { - if (restore_trailing_bits) { - TransferBitmap(data, offset, length, dest_offset, dest); - } else { - TransferBitmap(data, offset, length, dest_offset, dest); - } + int64_t dest_offset) { + TransferBitmap(data, offset, length, dest_offset, dest); } void InvertBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest, int64_t dest_offset) { - TransferBitmap(data, offset, length, dest_offset, dest); + TransferBitmap(data, offset, length, dest_offset, dest); } Result> CopyBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t length) { - return TransferBitmap(pool, data, offset, length); + return TransferBitmap(pool, data, offset, length); } Result> InvertBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t length, std::shared_ptr* out) { - return TransferBitmap(pool, data, offset, length); + return TransferBitmap(pool, data, offset, length); } bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right, diff --git a/cpp/src/arrow/util/bitmap_ops.h b/cpp/src/arrow/util/bitmap_ops.h index a5b54c6189b..d7a2db61d78 100644 --- a/cpp/src/arrow/util/bitmap_ops.h +++ b/cpp/src/arrow/util/bitmap_ops.h @@ -48,12 +48,11 @@ Result> CopyBitmap(MemoryPool* pool, const uint8_t* bitm /// \param[in] offset bit offset into the source data /// \param[in] length number of bits to copy /// \param[in] dest_offset bit offset into the destination -/// \param[in] restore_trailing_bits don't clobber bits outside the destination range /// \param[out] dest the destination buffer, must have at least space for /// (offset + length) bits ARROW_EXPORT void CopyBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest, - int64_t dest_offset, bool restore_trailing_bits = true); + int64_t dest_offset); /// Invert a bit range of an existing bitmap into an existing bitmap ///