From 3d64e5e5fc35a41ed408ed9a87f270a71d16feb9 Mon Sep 17 00:00:00 2001 From: Yibo Cai Date: Sat, 6 Jun 2020 14:43:01 +0000 Subject: [PATCH 1/3] ARROW-8974: [C++] Simplify TransferBitmap This patch removes "restore_trailing_bits" template parameter of TransferBitmap class. Trailing bits are now always not clobbered, which is no harm. It also refines trailing bits processing to keep the performance influence trivial. Besides, this patch replaces "invert_bits" boolean parameter with enum to allow explicit naming. --- cpp/src/arrow/array/concatenate.cc | 2 +- cpp/src/arrow/util/bit_util_benchmark.cc | 4 +- cpp/src/arrow/util/bit_util_test.cc | 2 +- cpp/src/arrow/util/bitmap_ops.cc | 69 +++++++++++------------- cpp/src/arrow/util/bitmap_ops.h | 3 +- 5 files changed, 37 insertions(+), 43 deletions(-) diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc index e1f6d71dc55..42bdee0914b 100644 --- a/cpp/src/arrow/array/concatenate.cc +++ b/cpp/src/arrow/array/concatenate.cc @@ -79,7 +79,7 @@ static Status ConcatenateBitmaps(const std::vector& bitmaps, MemoryPool* BitUtil::SetBitsTo(dst, bitmap_offset, bitmap.range.length, true); } else { internal::CopyBitmap(bitmap.data, bitmap.range.offset, bitmap.range.length, dst, - bitmap_offset, false); + bitmap_offset); } bitmap_offset += bitmap.range.length; } diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index a409bf72057..10047d23390 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -355,7 +355,7 @@ static void CopyBitmap(benchmark::State& state) { // NOLINT non-const reference auto copy = *AllocateEmptyBitmap(length); for (auto _ : state) { - internal::CopyBitmap(src, OffsetSrc, length, copy->mutable_data(), OffsetDest, false); + internal::CopyBitmap(src, OffsetSrc, length, copy->mutable_data(), OffsetDest); } state.SetBytesProcessed(state.iterations() * buffer_size); @@ -386,7 +386,7 @@ static void BitmapEquals(benchmark::State& state) { const int64_t length = bits_size - offset; auto copy = *AllocateEmptyBitmap(length + offset); - internal::CopyBitmap(src, 0, length, copy->mutable_data(), offset, false); + internal::CopyBitmap(src, 0, length, copy->mutable_data(), offset); for (auto _ : state) { auto is_same = internal::BitmapEquals(src, 0, copy->data(), offset, length); diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 3772e9f77c6..28fa54f29d4 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1073,7 +1073,7 @@ TEST(BitUtilTests, TestBitmapEquals) { for (int64_t offset_dst : offsets) { const auto bit_length = num_bits - offset_src; - internal::CopyBitmap(src, offset_src, bit_length, dst, offset_dst, false); + internal::CopyBitmap(src, offset_src, bit_length, dst, offset_dst); ASSERT_TRUE(internal::BitmapEquals(src, offset_src, dst, offset_dst, bit_length)); // test negative cases by flip some bit at head and tail diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc index adc95cba8df..c3a02f23e4a 100644 --- a/cpp/src/arrow/util/bitmap_ops.cc +++ b/cpp/src/arrow/util/bitmap_ops.cc @@ -282,14 +282,13 @@ class BitmapWordWriter { } // namespace -template +enum class TransferMode : bool { Copy, Invert }; + +template void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length, int64_t dest_offset, uint8_t* dest) { - int64_t byte_offset = offset / 8; int64_t bit_offset = offset % 8; - int64_t dest_byte_offset = dest_offset / 8; int64_t dest_bit_offset = dest_offset % 8; - int64_t num_bytes = BitUtil::BytesForBits(length); if (bit_offset || dest_bit_offset) { auto reader = internal::BitmapWordReader(data, offset, length); @@ -298,52 +297,52 @@ void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length, auto nwords = reader.words(); while (nwords--) { auto word = reader.NextWord(); - writer.PutNextWord(invert_bits ? ~word : word); + writer.PutNextWord(mode == TransferMode::Invert ? ~word : word); } auto nbytes = reader.trailing_bytes(); while (nbytes--) { int valid_bits; auto byte = reader.NextTrailingByte(valid_bits); - writer.PutNextTrailingByte(invert_bits ? ~byte : byte, valid_bits); + writer.PutNextTrailingByte(mode == TransferMode::Invert ? ~byte : byte, valid_bits); } - } else { - // Shift dest by its byte offset - dest += dest_byte_offset; + } else if (length) { + int64_t num_bytes = BitUtil::BytesForBits(length); + + // Shift by its byte offset + data += offset / 8; + dest += dest_offset / 8; // Take care of the trailing bits in the last byte + // E.g., if trailing_bits = 5, last byte should be + // - low 3 bits: new bits from last byte of data buffer + // - high 5 bits: old bits from last byte of dest buffer int64_t trailing_bits = num_bytes * 8 - length; - uint8_t trail = 0; - if (trailing_bits && restore_trailing_bits) { - trail = dest[num_bytes - 1]; - } + uint8_t trail_mask = (1U << (8 - trailing_bits)) - 1; + uint8_t last_data; - if (invert_bits) { - for (int64_t i = 0; i < num_bytes; i++) { - dest[i] = static_cast(~(data[byte_offset + i])); + if (mode == TransferMode::Invert) { + for (int64_t i = 0; i < num_bytes - 1; i++) { + dest[i] = static_cast(~(data[i])); } + last_data = ~data[num_bytes - 1]; } else { - std::memcpy(dest, data + byte_offset, static_cast(num_bytes)); + std::memcpy(dest, data, static_cast(num_bytes - 1)); + last_data = data[num_bytes - 1]; } - if (restore_trailing_bits) { - for (int i = 0; i < trailing_bits; i++) { - if (BitUtil::GetBit(&trail, i + 8 - trailing_bits)) { - BitUtil::SetBit(dest, length + i); - } else { - BitUtil::ClearBit(dest, length + i); - } - } - } + // Set last byte + dest[num_bytes - 1] &= ~trail_mask; + dest[num_bytes - 1] |= last_data & trail_mask; } } -template +template Result> TransferBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t length) { ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateEmptyBitmap(length, pool)); uint8_t* dest = buffer->mutable_data(); - TransferBitmap(data, offset, length, 0, dest); + TransferBitmap(data, offset, length, 0, dest); // As we have freshly allocated this bitmap, we should take care of zeroing the // remaining bits. @@ -357,28 +356,24 @@ Result> TransferBitmap(MemoryPool* pool, const uint8_t* } void CopyBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest, - int64_t dest_offset, bool restore_trailing_bits) { - if (restore_trailing_bits) { - TransferBitmap(data, offset, length, dest_offset, dest); - } else { - TransferBitmap(data, offset, length, dest_offset, dest); - } + int64_t dest_offset) { + TransferBitmap(data, offset, length, dest_offset, dest); } void InvertBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest, int64_t dest_offset) { - TransferBitmap(data, offset, length, dest_offset, dest); + TransferBitmap(data, offset, length, dest_offset, dest); } Result> CopyBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t length) { - return TransferBitmap(pool, data, offset, length); + return TransferBitmap(pool, data, offset, length); } Result> InvertBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset, int64_t length, std::shared_ptr* out) { - return TransferBitmap(pool, data, offset, length); + return TransferBitmap(pool, data, offset, length); } bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right, diff --git a/cpp/src/arrow/util/bitmap_ops.h b/cpp/src/arrow/util/bitmap_ops.h index a5b54c6189b..d7a2db61d78 100644 --- a/cpp/src/arrow/util/bitmap_ops.h +++ b/cpp/src/arrow/util/bitmap_ops.h @@ -48,12 +48,11 @@ Result> CopyBitmap(MemoryPool* pool, const uint8_t* bitm /// \param[in] offset bit offset into the source data /// \param[in] length number of bits to copy /// \param[in] dest_offset bit offset into the destination -/// \param[in] restore_trailing_bits don't clobber bits outside the destination range /// \param[out] dest the destination buffer, must have at least space for /// (offset + length) bits ARROW_EXPORT void CopyBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest, - int64_t dest_offset, bool restore_trailing_bits = true); + int64_t dest_offset); /// Invert a bit range of an existing bitmap into an existing bitmap /// From 3a4bd9c1b2168e0622172785fdb5afebf19db402 Mon Sep 17 00:00:00 2001 From: Yibo Cai Date: Tue, 9 Jun 2020 02:09:01 +0000 Subject: [PATCH 2/3] refine variable naming --- cpp/src/arrow/util/bitmap_ops.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc index c3a02f23e4a..35c55936df6 100644 --- a/cpp/src/arrow/util/bitmap_ops.cc +++ b/cpp/src/arrow/util/bitmap_ops.cc @@ -317,7 +317,7 @@ void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length, // - low 3 bits: new bits from last byte of data buffer // - high 5 bits: old bits from last byte of dest buffer int64_t trailing_bits = num_bytes * 8 - length; - uint8_t trail_mask = (1U << (8 - trailing_bits)) - 1; + uint8_t kTrailingBitmask = (1U << (8 - trailing_bits)) - 1; uint8_t last_data; if (mode == TransferMode::Invert) { @@ -331,8 +331,8 @@ void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length, } // Set last byte - dest[num_bytes - 1] &= ~trail_mask; - dest[num_bytes - 1] |= last_data & trail_mask; + dest[num_bytes - 1] &= ~kTrailingBitmask; + dest[num_bytes - 1] |= last_data & kTrailingBitmask; } } From 30afbe93ad210ec12f26ea24a9a15c775bfc3a57 Mon Sep 17 00:00:00 2001 From: Yibo Cai Date: Wed, 10 Jun 2020 01:09:38 +0000 Subject: [PATCH 3/3] Revert "refine variable naming" This reverts commit 3a385542bea5954f2443f79d34a6a83c85e45f05. --- cpp/src/arrow/util/bitmap_ops.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc index 35c55936df6..c3a02f23e4a 100644 --- a/cpp/src/arrow/util/bitmap_ops.cc +++ b/cpp/src/arrow/util/bitmap_ops.cc @@ -317,7 +317,7 @@ void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length, // - low 3 bits: new bits from last byte of data buffer // - high 5 bits: old bits from last byte of dest buffer int64_t trailing_bits = num_bytes * 8 - length; - uint8_t kTrailingBitmask = (1U << (8 - trailing_bits)) - 1; + uint8_t trail_mask = (1U << (8 - trailing_bits)) - 1; uint8_t last_data; if (mode == TransferMode::Invert) { @@ -331,8 +331,8 @@ void TransferBitmap(const uint8_t* data, int64_t offset, int64_t length, } // Set last byte - dest[num_bytes - 1] &= ~kTrailingBitmask; - dest[num_bytes - 1] |= last_data & kTrailingBitmask; + dest[num_bytes - 1] &= ~trail_mask; + dest[num_bytes - 1] |= last_data & trail_mask; } }