diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 1744a626313..989893e16f2 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -175,6 +175,7 @@ set(ARROW_SRCS testing/util.cc util/basic_decimal.cc util/bit_block_counter.cc + util/bit_run_reader.cc util/bit_util.cc util/bitmap.cc util/bitmap_builders.cc diff --git a/cpp/src/arrow/array/array_primitive.cc b/cpp/src/arrow/array/array_primitive.cc index 519a7f21f43..a1aff933af4 100644 --- a/cpp/src/arrow/array/array_primitive.cc +++ b/cpp/src/arrow/array/array_primitive.cc @@ -23,6 +23,7 @@ #include "arrow/array/array_base.h" #include "arrow/type.h" #include "arrow/util/bit_block_counter.h" +#include "arrow/util/bitmap_ops.h" #include "arrow/util/logging.h" namespace arrow { diff --git a/cpp/src/arrow/util/bit_block_counter.cc b/cpp/src/arrow/util/bit_block_counter.cc index a550eccec1a..381a92eff51 100644 --- a/cpp/src/arrow/util/bit_block_counter.cc +++ b/cpp/src/arrow/util/bit_block_counter.cc @@ -23,6 +23,7 @@ #include "arrow/buffer.h" #include "arrow/util/bit_util.h" +#include "arrow/util/bitmap_ops.h" #include "arrow/util/ubsan.h" namespace arrow { diff --git a/cpp/src/arrow/util/bit_block_counter.h b/cpp/src/arrow/util/bit_block_counter.h index 3ee777fbabc..9ebcc5e9a90 100644 --- a/cpp/src/arrow/util/bit_block_counter.h +++ b/cpp/src/arrow/util/bit_block_counter.h @@ -22,9 +22,6 @@ #include #include -#include "arrow/util/bit_util.h" -#include "arrow/util/bitmap_ops.h" -#include "arrow/util/ubsan.h" #include "arrow/util/visibility.h" namespace arrow { diff --git a/cpp/src/arrow/util/bit_block_counter_test.cc b/cpp/src/arrow/util/bit_block_counter_test.cc index f1a0f35d386..924222c3810 100644 --- a/cpp/src/arrow/util/bit_block_counter_test.cc +++ b/cpp/src/arrow/util/bit_block_counter_test.cc @@ -30,6 +30,7 @@ #include "arrow/testing/util.h" #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_util.h" +#include "arrow/util/bitmap_ops.h" namespace arrow { namespace internal { diff --git a/cpp/src/arrow/util/bit_run_reader.cc b/cpp/src/arrow/util/bit_run_reader.cc new file mode 100644 index 00000000000..da5ba649dd8 --- /dev/null +++ b/cpp/src/arrow/util/bit_run_reader.cc @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/util/bit_run_reader.h" + +#include + +#include "arrow/util/bit_util.h" + +namespace arrow { +namespace internal { + +#if ARROW_LITTLE_ENDIAN + +BitRunReader::BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length) + : bitmap_(bitmap + (start_offset / 8)), + position_(start_offset % 8), + length_(position_ + length) { + if (ARROW_PREDICT_FALSE(length == 0)) { + word_ = 0; + return; + } + + // On the initial load if there is an offset we need to account for this when + // loading bytes. Every other call to LoadWord() should only occur when + // position_ is a multiple of 64. + current_run_bit_set_ = !BitUtil::GetBit(bitmap, start_offset); + int64_t bits_remaining = length + position_; + + LoadWord(bits_remaining); + + // Prepare for inversion in NextRun. + // Clear out any preceding bits. + word_ = word_ & ~BitUtil::LeastSignficantBitMask(position_); +} + +#endif + +} // namespace internal +} // namespace arrow diff --git a/cpp/src/arrow/util/bit_run_reader.h b/cpp/src/arrow/util/bit_run_reader.h new file mode 100644 index 00000000000..eaa85bf3d7c --- /dev/null +++ b/cpp/src/arrow/util/bit_run_reader.h @@ -0,0 +1,166 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "arrow/util/bit_util.h" +#include "arrow/util/bitmap_reader.h" +#include "arrow/util/macros.h" +#include "arrow/util/visibility.h" + +namespace arrow { +namespace internal { + +struct BitRun { + int64_t length; + // Whether bits are set at this point. + bool set; + + std::string ToString() const { + return std::string("{Length: ") + std::to_string(length) + + ", set=" + std::to_string(set) + "}"; + } +}; + +static inline bool operator==(const BitRun& lhs, const BitRun& rhs) { + return lhs.length == rhs.length && lhs.set == rhs.set; +} + +class BitRunReaderLinear { + public: + BitRunReaderLinear(const uint8_t* bitmap, int64_t start_offset, int64_t length) + : reader_(bitmap, start_offset, length) {} + + BitRun NextRun() { + BitRun rl = {/*length=*/0, reader_.IsSet()}; + // Advance while the values are equal and not at the end of list. + while (reader_.position() < reader_.length() && reader_.IsSet() == rl.set) { + rl.length++; + reader_.Next(); + } + return rl; + } + + private: + BitmapReader reader_; +}; + +#if ARROW_LITTLE_ENDIAN +/// A convenience class for counting the number of continguous set/unset bits +/// in a bitmap. +class ARROW_EXPORT BitRunReader { + public: + /// \brief Constructs new BitRunReader. + /// + /// \param[in] bitmap source data + /// \param[in] start_offset bit offset into the source data + /// \param[in] length number of bits to copy + BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length); + + /// Returns a new BitRun containing the number of contiguous + /// bits with the same value. length == 0 indicates the + /// end of the bitmap. + BitRun NextRun() { + if (ARROW_PREDICT_FALSE(position_ >= length_)) { + return {/*length=*/0, false}; + } + // This implementation relies on a efficient implementations of + // CountTrailingZeros and assumes that runs are more often then + // not. The logic is to incrementally find the next bit change + // from the current position. This is done by zeroing all + // bits in word_ up to position_ and using the TrailingZeroCount + // to find the index of the next set bit. + + // The runs alternate on each call, so flip the bit. + current_run_bit_set_ = !current_run_bit_set_; + + int64_t start_position = position_; + int64_t start_bit_offset = start_position & 63; + // Invert the word for proper use of CountTrailingZeros and + // clear bits so CountTrailingZeros can do it magic. + word_ = ~word_ & ~BitUtil::LeastSignficantBitMask(start_bit_offset); + + // Go forward until the next change from unset to set. + int64_t new_bits = BitUtil::CountTrailingZeros(word_) - start_bit_offset; + position_ += new_bits; + + if (ARROW_PREDICT_FALSE(BitUtil::IsMultipleOf64(position_)) && + ARROW_PREDICT_TRUE(position_ < length_)) { + // Continue extending position while we can advance an entire word. + // (updates position_ accordingly). + AdvanceUntilChange(); + } + + return {/*length=*/position_ - start_position, current_run_bit_set_}; + } + + private: + void AdvanceUntilChange() { + int64_t new_bits = 0; + do { + // Advance the position of the bitmap for loading. + bitmap_ += sizeof(uint64_t); + LoadNextWord(); + new_bits = BitUtil::CountTrailingZeros(word_); + // Continue calculating run length. + position_ += new_bits; + } while (ARROW_PREDICT_FALSE(BitUtil::IsMultipleOf64(position_)) && + ARROW_PREDICT_TRUE(position_ < length_) && new_bits > 0); + } + + void LoadNextWord() { return LoadWord(length_ - position_); } + + // Helper method for Loading the next word. + void LoadWord(int64_t bits_remaining) { + word_ = 0; + // we need at least an extra byte in this case. + if (ARROW_PREDICT_TRUE(bits_remaining >= 64)) { + std::memcpy(&word_, bitmap_, 8); + } else { + int64_t bytes_to_load = BitUtil::BytesForBits(bits_remaining); + auto word_ptr = reinterpret_cast(&word_); + std::memcpy(word_ptr, bitmap_, bytes_to_load); + // Ensure stoppage at last bit in bitmap by reversing the next higher + // order bit. + BitUtil::SetBitTo(word_ptr, bits_remaining, + !BitUtil::GetBit(word_ptr, bits_remaining - 1)); + } + + // Two cases: + // 1. For unset, CountTrailingZeros works natually so we don't + // invert the word. + // 2. Otherwise invert so we can use CountTrailingZeros. + if (current_run_bit_set_) { + word_ = ~word_; + } + } + const uint8_t* bitmap_; + int64_t position_; + int64_t length_; + uint64_t word_; + bool current_run_bit_set_; +}; +#else +using BitRunReader = BitRunReaderLinear; +#endif + +} // namespace internal +} // namespace arrow diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 1ca293bd5fc..3fe24b46412 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -139,6 +139,12 @@ constexpr bool IsMultipleOf64(int64_t n) { return (n & 63) == 0; } constexpr bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; } +// Returns a mask for the bit_index lower order bits. +// Only valid for bit_index in the range [0, 64). +constexpr uint64_t LeastSignficantBitMask(int64_t bit_index) { + return (static_cast(1) << bit_index) - 1; +} + // Returns 'value' rounded up to the nearest multiple of 'factor' constexpr int64_t RoundUp(int64_t value, int64_t factor) { return CeilDiv(value, factor) * factor; diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 10047d23390..63783da77ef 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -28,9 +28,13 @@ #include "arrow/array/array_base.h" #include "arrow/array/array_primitive.h" #include "arrow/buffer.h" +#include "arrow/builder.h" +#include "arrow/memory_pool.h" #include "arrow/result.h" +#include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" #include "arrow/testing/util.h" +#include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap.h" #include "arrow/util/bitmap_generate.h" @@ -195,6 +199,44 @@ static void BenchmarkBitmapReader(benchmark::State& state, int64_t nbytes) { state.SetBytesProcessed(2LL * state.iterations() * nbytes); } +template +static void BenchmarkBitRunReader(benchmark::State& state, int64_t set_percentage) { + ::arrow::random::RandomArrayGenerator rag(/*seed=*/23); + constexpr int64_t kNumBits = 4096; + double set_probability = + static_cast(set_percentage == -1 ? 0 : set_percentage) / 100.0; + std::shared_ptr buffer = + rag.Boolean(kNumBits, set_probability)->data()->buffers[1]; + + const uint8_t* bitmap = buffer->data(); + if (set_percentage == -1) { + internal::BitmapWriter writer(buffer->mutable_data(), /*start_offset=*/0, + /*length=*/kNumBits); + for (int x = 0; x < kNumBits; x++) { + if (x % 2 == 0) { + writer.Set(); + } else { + writer.Clear(); + } + writer.Next(); + } + } + + for (auto _ : state) { + { + BitRunReaderType reader(bitmap, 0, kNumBits); + int64_t set_total = 0; + internal::BitRun br; + do { + br = reader.NextRun(); + set_total += br.set ? br.length : 0; + } while (br.length != 0); + benchmark::DoNotOptimize(set_total); + } + } + state.SetBytesProcessed(state.iterations() * (kNumBits / 8)); +} + template static void BenchmarkVisitBits(benchmark::State& state, int64_t nbytes) { std::shared_ptr buffer = CreateRandomBuffer(nbytes); @@ -277,6 +319,14 @@ static void BitmapReader(benchmark::State& state) { BenchmarkBitmapReader(state, state.range(0)); } +static void BitRunReader(benchmark::State& state) { + BenchmarkBitRunReader(state, state.range(0)); +} + +static void BitRunReaderLinear(benchmark::State& state) { + BenchmarkBitRunReader(state, state.range(0)); +} + static void BitmapWriter(benchmark::State& state) { BenchmarkBitmapWriter(state, state.range(0)); } @@ -409,6 +459,24 @@ BENCHMARK(ReferenceNaiveBitmapReader)->Arg(kBufferSize); #endif BENCHMARK(BitmapReader)->Arg(kBufferSize); +BENCHMARK(BitRunReader) + ->Arg(-1) + ->Arg(0) + ->Arg(10) + ->Arg(25) + ->Arg(50) + ->Arg(60) + ->Arg(75) + ->Arg(99); +BENCHMARK(BitRunReaderLinear) + ->Arg(-1) + ->Arg(0) + ->Arg(10) + ->Arg(25) + ->Arg(50) + ->Arg(60) + ->Arg(75) + ->Arg(99); BENCHMARK(VisitBits)->Arg(kBufferSize); BENCHMARK(VisitBitsUnrolled)->Arg(kBufferSize); BENCHMARK(SetBitsTo)->Arg(2)->Arg(1 << 4)->Arg(1 << 10)->Arg(1 << 17); diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 28fa54f29d4..adb1e0f7ef4 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -20,29 +20,37 @@ #include #include #include -#include #include #include +#include #include +#include #include #include #include +#include "arrow/array/array_base.h" +#include "arrow/array/data.h" #include "arrow/buffer.h" -#include "arrow/memory_pool.h" +#include "arrow/result.h" +#include "arrow/status.h" #include "arrow/testing/gtest_common.h" +#include "arrow/testing/gtest_compat.h" #include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" +#include "arrow/testing/util.h" +#include "arrow/type_fwd.h" +#include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_stream_utils.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap.h" -#include "arrow/util/bitmap_builders.h" +#include "arrow/util/bitmap_generate.h" #include "arrow/util/bitmap_ops.h" #include "arrow/util/bitmap_reader.h" #include "arrow/util/bitmap_visit.h" #include "arrow/util/bitmap_writer.h" #include "arrow/util/bitset_stack.h" -#include "arrow/util/cpu_info.h" namespace arrow { @@ -193,6 +201,204 @@ TEST(BitmapReader, DoesNotReadOutOfBounds) { internal::BitmapReader r3(nullptr, 0, 0); } +namespace internal { +void PrintTo(const internal::BitRun& run, std::ostream* os) { + *os << run.ToString(); // whatever needed to print bar to os +} +} // namespace internal + +TEST(BitRunReader, ZeroLength) { + internal::BitRunReader reader(nullptr, /*start_offset=*/0, /*length=*/0); + + EXPECT_EQ(reader.NextRun().length, 0); +} + +TEST(BitRunReader, NormalOperation) { + std::vector bm_vector = {1, 0, 1}; // size: 3 + bm_vector.insert(bm_vector.end(), /*n=*/5, /*val=*/0); // size: 8 + bm_vector.insert(bm_vector.end(), /*n=*/7, /*val=*/1); // size: 15 + bm_vector.insert(bm_vector.end(), /*n=*/3, /*val=*/0); // size: 18 + bm_vector.insert(bm_vector.end(), /*n=*/25, /*val=*/1); // size: 43 + bm_vector.insert(bm_vector.end(), /*n=*/21, /*val=*/0); // size: 64 + bm_vector.insert(bm_vector.end(), /*n=*/26, /*val=*/1); // size: 90 + bm_vector.insert(bm_vector.end(), /*n=*/130, /*val=*/0); // size: 220 + bm_vector.insert(bm_vector.end(), /*n=*/65, /*val=*/1); // size: 285 + std::shared_ptr bitmap; + int64_t length; + BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length); + + internal::BitRunReader reader(bitmap->data(), /*start_offset=*/0, /*length=*/length); + std::vector results; + internal::BitRun rl; + do { + rl = reader.NextRun(); + results.push_back(rl); + } while (rl.length != 0); + EXPECT_EQ(results.back().length, 0); + results.pop_back(); + EXPECT_THAT(results, ElementsAreArray( + std::vector{{/*length=*/1, /*set=*/true}, + {/*length=*/1, /*set=*/false}, + {/*length=*/1, /*set=*/true}, + {/*length=*/5, /*set=*/false}, + {/*length=*/7, /*set=*/true}, + {/*length=*/3, /*set=*/false}, + {/*length=*/25, /*set=*/true}, + {/*length=*/21, /*set=*/false}, + {/*length=*/26, /*set=*/true}, + {/*length=*/130, /*set=*/false}, + {/*length=*/65, /*set=*/true}})); +} + +TEST(BitRunReader, AllFirstByteCombos) { + for (int offset = 0; offset < 8; offset++) { + for (int64_t x = 0; x < (1 << 8) - 1; x++) { + int64_t bits = BitUtil::ToLittleEndian(x); + internal::BitRunReader reader(reinterpret_cast(&bits), + /*start_offset=*/offset, + /*length=*/8 - offset); + std::vector results; + internal::BitRun rl; + do { + rl = reader.NextRun(); + results.push_back(rl); + } while (rl.length != 0); + EXPECT_EQ(results.back().length, 0); + results.pop_back(); + int64_t sum = 0; + for (const auto& result : results) { + sum += result.length; + } + ASSERT_EQ(sum, 8 - offset); + } + } +} + +TEST(BitRunReader, TruncatedAtWord) { + std::vector bm_vector; + bm_vector.insert(bm_vector.end(), /*n=*/7, /*val=*/1); + bm_vector.insert(bm_vector.end(), /*n=*/58, /*val=*/0); + + std::shared_ptr bitmap; + int64_t length; + BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length); + + internal::BitRunReader reader(bitmap->data(), /*start_offset=*/1, + /*length=*/63); + std::vector results; + internal::BitRun rl; + do { + rl = reader.NextRun(); + results.push_back(rl); + } while (rl.length != 0); + EXPECT_EQ(results.back().length, 0); + results.pop_back(); + EXPECT_THAT(results, + ElementsAreArray(std::vector{ + {/*length=*/6, /*set=*/true}, {/*length=*/57, /*set=*/false}})); +} + +TEST(BitRunReader, ScalarComparison) { + ::arrow::random::RandomArrayGenerator rag(/*seed=*/23); + constexpr int64_t kNumBits = 1000000; + std::shared_ptr buffer = + rag.Boolean(kNumBits, /*set_probability=*/.4)->data()->buffers[1]; + + const uint8_t* bitmap = buffer->data(); + + internal::BitRunReader reader(bitmap, 0, kNumBits); + internal::BitRunReaderLinear scalar_reader(bitmap, 0, kNumBits); + internal::BitRun br, brs; + int64_t br_bits = 0; + int64_t brs_bits = 0; + do { + br = reader.NextRun(); + brs = scalar_reader.NextRun(); + br_bits += br.length; + brs_bits += brs.length; + EXPECT_EQ(br.length, brs.length); + if (br.length > 0) { + EXPECT_EQ(br, brs) << internal::Bitmap(bitmap, 0, kNumBits).ToString() << br_bits + << " " << brs_bits; + } + } while (brs.length != 0); + EXPECT_EQ(br_bits, brs_bits); +} + +TEST(BitRunReader, TruncatedWithinWordMultipleOf8Bits) { + std::vector bm_vector; + bm_vector.insert(bm_vector.end(), /*n=*/7, /*val=*/1); + bm_vector.insert(bm_vector.end(), /*n=*/5, /*val=*/0); + + std::shared_ptr bitmap; + int64_t length; + BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length); + + internal::BitRunReader reader(bitmap->data(), /*start_offset=*/1, + /*length=*/7); + std::vector results; + internal::BitRun rl; + do { + rl = reader.NextRun(); + results.push_back(rl); + } while (rl.length != 0); + EXPECT_EQ(results.back().length, 0); + results.pop_back(); + EXPECT_THAT(results, ElementsAreArray(std::vector{ + {/*length=*/6, /*set=*/true}, {/*length=*/1, /*set=*/false}})); +} + +TEST(BitRunReader, TruncatedWithinWord) { + std::vector bm_vector; + bm_vector.insert(bm_vector.end(), /*n=*/37 + 40, /*val=*/0); + bm_vector.insert(bm_vector.end(), /*n=*/23, /*val=*/1); + + std::shared_ptr bitmap; + int64_t length; + BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length); + + constexpr int64_t kOffset = 37; + internal::BitRunReader reader(bitmap->data(), /*start_offset=*/kOffset, + /*length=*/53); + std::vector results; + internal::BitRun rl; + do { + rl = reader.NextRun(); + results.push_back(rl); + } while (rl.length != 0); + EXPECT_EQ(results.back().length, 0); + results.pop_back(); + EXPECT_THAT(results, + ElementsAreArray(std::vector{ + {/*length=*/40, /*set=*/false}, {/*length=*/13, /*set=*/true}})); +} + +TEST(BitRunReader, TruncatedMultipleWords) { + std::vector bm_vector = {1, 0, 1}; // size: 3 + bm_vector.insert(bm_vector.end(), /*n=*/5, /*val=*/0); // size: 8 + bm_vector.insert(bm_vector.end(), /*n=*/30, /*val=*/1); // size: 38 + bm_vector.insert(bm_vector.end(), /*n=*/95, /*val=*/0); // size: 133 + std::shared_ptr bitmap; + int64_t length; + BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length); + + constexpr int64_t kOffset = 5; + internal::BitRunReader reader(bitmap->data(), /*start_offset=*/kOffset, + /*length=*/length - (kOffset + 3)); + std::vector results; + internal::BitRun rl; + do { + rl = reader.NextRun(); + results.push_back(rl); + } while (rl.length != 0); + EXPECT_EQ(results.back().length, 0); + results.pop_back(); + EXPECT_THAT(results, ElementsAreArray(std::vector{ + {/*length=*/3, /*set=*/false}, + {/*length=*/30, /*set=*/true}, + {/*length=*/92, /*set=*/false}})); +} + TEST(BitmapWriter, NormalOperation) { for (const auto fill_byte_int : {0x00, 0xff}) { const uint8_t fill_byte = static_cast(fill_byte_int); diff --git a/cpp/src/arrow/util/bitmap_builders.h b/cpp/src/arrow/util/bitmap_builders.h index d11e8bc874a..5bd2ad44140 100644 --- a/cpp/src/arrow/util/bitmap_builders.h +++ b/cpp/src/arrow/util/bitmap_builders.h @@ -19,6 +19,7 @@ #include #include +#include #include "arrow/result.h" #include "arrow/type_fwd.h" diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc index c3a02f23e4a..32f21d1e08c 100644 --- a/cpp/src/arrow/util/bitmap_ops.cc +++ b/cpp/src/arrow/util/bitmap_ops.cc @@ -17,12 +17,10 @@ #include "arrow/util/bitmap_ops.h" -#include #include #include #include #include -#include #include "arrow/buffer.h" #include "arrow/result.h" diff --git a/cpp/src/arrow/util/bitmap_ops.h b/cpp/src/arrow/util/bitmap_ops.h index d7a2db61d78..7d2a3ea40fd 100644 --- a/cpp/src/arrow/util/bitmap_ops.h +++ b/cpp/src/arrow/util/bitmap_ops.h @@ -21,10 +21,13 @@ #include #include "arrow/result.h" -#include "arrow/type_fwd.h" #include "arrow/util/visibility.h" namespace arrow { + +class Buffer; +class MemoryPool; + namespace internal { // ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/bitmap_reader.h b/cpp/src/arrow/util/bitmap_reader.h index 838781db8fc..5579d95210b 100644 --- a/cpp/src/arrow/util/bitmap_reader.h +++ b/cpp/src/arrow/util/bitmap_reader.h @@ -57,6 +57,8 @@ class BitmapReader { int64_t position() const { return position_; } + int64_t length() const { return length_; } + private: const uint8_t* bitmap_; int64_t position_; diff --git a/cpp/src/arrow/util/rle_encoding.h b/cpp/src/arrow/util/rle_encoding.h index 0eab5b3a9ff..68d29930666 100644 --- a/cpp/src/arrow/util/rle_encoding.h +++ b/cpp/src/arrow/util/rle_encoding.h @@ -22,11 +22,13 @@ #include #include +#include #include +#include "arrow/util/bit_block_counter.h" +#include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_stream_utils.h" #include "arrow/util/bit_util.h" -#include "arrow/util/bitmap_reader.h" #include "arrow/util/macros.h" namespace arrow { @@ -149,6 +151,11 @@ class RleDecoder { /// are no more. template bool NextCounts(); + + /// Utility methods for retrieving spaced values. + template + int GetSpaced(Converter converter, int batch_size, int null_count, + const uint8_t* valid_bits, int64_t valid_bits_offset, T* out); }; /// Class to incrementally build the rle data. This class does not allocate any memory. @@ -301,7 +308,7 @@ inline int RleDecoder::GetBatch(T* values, int batch_size) { while (values_read < batch_size) { int remaining = batch_size - values_read; - if (repeat_count_ > 0) { + if (repeat_count_ > 0) { // Repeated value case. int repeat_batch = std::min(remaining, repeat_count_); std::fill(out, out + repeat_batch, static_cast(current_value_)); @@ -326,98 +333,220 @@ inline int RleDecoder::GetBatch(T* values, int batch_size) { return values_read; } -template -inline int RleDecoder::GetBatchSpaced(int batch_size, int null_count, - const uint8_t* valid_bits, - int64_t valid_bits_offset, T* out) { +template +inline int RleDecoder::GetSpaced(Converter converter, int batch_size, int null_count, + const uint8_t* valid_bits, int64_t valid_bits_offset, + T* out) { + if (ARROW_PREDICT_FALSE(null_count == batch_size)) { + converter.FillZero(out, out + batch_size); + return batch_size; + } + DCHECK_GE(bit_width_, 0); int values_read = 0; - int remaining_nulls = null_count; - T zero = {}; - - arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, batch_size); + int values_remaining = batch_size - null_count; + // Assume no bits to start. + arrow::internal::BitRunReader bit_reader(valid_bits, valid_bits_offset, + /*length=*/batch_size); + arrow::internal::BitRun valid_run = bit_reader.NextRun(); while (values_read < batch_size) { - DCHECK_LT(bit_reader.position(), batch_size); - bool is_valid = bit_reader.IsSet(); - bit_reader.Next(); + if (ARROW_PREDICT_FALSE(valid_run.length == 0)) { + valid_run = bit_reader.NextRun(); + } - if (is_valid) { + DCHECK_GT(batch_size, 0); + DCHECK_GT(valid_run.length, 0); + + if (valid_run.set) { if ((repeat_count_ == 0) && (literal_count_ == 0)) { - if (!NextCounts()) return values_read; + if (!NextCounts()) return values_read; + DCHECK((repeat_count_ > 0) ^ (literal_count_ > 0)); } - if (repeat_count_ > 0) { - // The current index is already valid, we don't need to check that again - int repeat_batch = 1; - repeat_count_--; + if (repeat_count_ > 0) { + int repeat_batch = 0; + // Consume the entire repeat counts incrementing repeat_batch to + // be the total of nulls + values consumed, we only need to + // get the total count because we can fill in the same value for + // nulls and non-nulls. This proves to be a big efficiency win. while (repeat_count_ > 0 && (values_read + repeat_batch) < batch_size) { - DCHECK_LT(bit_reader.position(), batch_size); - if (bit_reader.IsSet()) { - repeat_count_--; + DCHECK_GT(valid_run.length, 0); + if (valid_run.set) { + int update_size = std::min(static_cast(valid_run.length), repeat_count_); + repeat_count_ -= update_size; + repeat_batch += update_size; + valid_run.length -= update_size; + values_remaining -= update_size; } else { - remaining_nulls--; + // We can consume all nulls here because we would do so on + // the next loop anyways. + repeat_batch += static_cast(valid_run.length); + valid_run.length = 0; } - repeat_batch++; - - bit_reader.Next(); + if (valid_run.length == 0) { + valid_run = bit_reader.NextRun(); + } + } + RunType current_value = static_cast(current_value_); + if (ARROW_PREDICT_FALSE(!converter.IsValid(current_value))) { + return values_read; } - std::fill(out, out + repeat_batch, static_cast(current_value_)); + converter.Fill(out, out + repeat_batch, current_value); out += repeat_batch; values_read += repeat_batch; } else if (literal_count_ > 0) { - int literal_batch = - std::min(batch_size - values_read - remaining_nulls, literal_count_); + int literal_batch = std::min(values_remaining, literal_count_); + DCHECK_GT(literal_batch, 0); // Decode the literals constexpr int kBufferSize = 1024; - T indices[kBufferSize]; + RunType indices[kBufferSize]; literal_batch = std::min(literal_batch, kBufferSize); - int actual_read = bit_reader_.GetBatch(bit_width_, &indices[0], literal_batch); - DCHECK_EQ(actual_read, literal_batch); - + int actual_read = bit_reader_.GetBatch(bit_width_, indices, literal_batch); + if (ARROW_PREDICT_FALSE(actual_read != literal_batch)) { + return values_read; + } + if (!converter.IsValid(indices, /*length=*/actual_read)) { + return values_read; + } int skipped = 0; - int literals_read = 1; - *out++ = indices[0]; - - // Read the first bitset to the end + int literals_read = 0; while (literals_read < literal_batch) { - DCHECK_LT(bit_reader.position(), batch_size); - if (bit_reader.IsSet()) { - *out = indices[literals_read]; - literals_read++; + if (valid_run.set) { + int update_size = std::min(literal_batch - literals_read, + static_cast(valid_run.length)); + converter.Copy(out, indices + literals_read, update_size); + literals_read += update_size; + out += update_size; + valid_run.length -= update_size; } else { - *out = zero; - skipped++; + converter.FillZero(out, out + valid_run.length); + out += valid_run.length; + skipped += static_cast(valid_run.length); + valid_run.length = 0; + } + if (valid_run.length == 0) { + valid_run = bit_reader.NextRun(); } - ++out; - bit_reader.Next(); } literal_count_ -= literal_batch; + values_remaining -= literal_batch; values_read += literal_batch + skipped; - remaining_nulls -= skipped; } } else { - *out = zero; - ++out; - values_read++; - remaining_nulls--; + converter.FillZero(out, out + valid_run.length); + out += valid_run.length; + values_read += static_cast(valid_run.length); + valid_run.length = 0; } } - + DCHECK_EQ(valid_run.length, 0); + DCHECK_EQ(values_remaining, 0); return values_read; } +// Converter for GetSpaced that handles runs that get returned +// directly as output. +template +struct PlainRleConverter { + T kZero = {}; + inline bool IsValid(const T& values) const { return true; } + inline bool IsValid(const T* values, int32_t length) const { return true; } + inline void Fill(T* begin, T* end, const T& run_value) const { + std::fill(begin, end, run_value); + } + inline void FillZero(T* begin, T* end) { std::fill(begin, end, kZero); } + inline void Copy(T* out, const T* values, int length) const { + std::memcpy(out, values, length * sizeof(T)); + } +}; + +template +inline int RleDecoder::GetBatchSpaced(int batch_size, int null_count, + const uint8_t* valid_bits, + int64_t valid_bits_offset, T* out) { + if (null_count == 0) { + return GetBatch(out, batch_size); + } + + PlainRleConverter converter; + arrow::internal::BitBlockCounter block_counter(valid_bits, valid_bits_offset, + batch_size); + + int total_processed = 0; + int processed = 0; + arrow::internal::BitBlockCount block; + + do { + block = block_counter.NextFourWords(); + if (block.length == 0) { + break; + } + if (block.AllSet()) { + processed = GetBatch(out, block.length); + } else if (block.NoneSet()) { + converter.FillZero(out, out + block.length); + processed = block.length; + } else { + processed = GetSpaced>( + converter, block.length, block.length - block.popcount, valid_bits, + valid_bits_offset, out); + } + total_processed += processed; + out += block.length; + valid_bits_offset += block.length; + } while (processed == block.length); + return total_processed; +} + static inline bool IndexInRange(int32_t idx, int32_t dictionary_length) { return idx >= 0 && idx < dictionary_length; } +// Converter for GetSpaced that handles runs of returned dictionary +// indices. +template +struct DictionaryConverter { + T kZero = {}; + const T* dictionary; + int32_t dictionary_length; + + inline bool IsValid(int32_t value) { return IndexInRange(value, dictionary_length); } + + inline bool IsValid(const int32_t* values, int32_t length) const { + using IndexType = int32_t; + IndexType min_index = std::numeric_limits::max(); + IndexType max_index = std::numeric_limits::min(); + for (int x = 0; x < length; x++) { + min_index = std::min(values[x], min_index); + max_index = std::max(values[x], max_index); + } + + return IndexInRange(min_index, dictionary_length) && + IndexInRange(max_index, dictionary_length); + } + inline void Fill(T* begin, T* end, const int32_t& run_value) const { + std::fill(begin, end, dictionary[run_value]); + } + inline void FillZero(T* begin, T* end) { std::fill(begin, end, kZero); } + + inline void Copy(T* out, const int32_t* values, int length) const { + for (int x = 0; x < length; x++) { + out[x] = dictionary[values[x]]; + } + } +}; + template inline int RleDecoder::GetBatchWithDict(const T* dictionary, int32_t dictionary_length, T* values, int batch_size) { // Per https://github.com/apache/parquet-format/blob/master/Encodings.md, // the maximum dictionary index width in Parquet is 32 bits. using IndexType = int32_t; + DictionaryConverter converter; + converter.dictionary = dictionary; + converter.dictionary_length = dictionary_length; DCHECK_GE(bit_width_, 0); int values_read = 0; @@ -452,14 +581,10 @@ inline int RleDecoder::GetBatchWithDict(const T* dictionary, int32_t dictionary_ if (ARROW_PREDICT_FALSE(actual_read != literal_batch)) { return values_read; } - - for (int i = 0; i < literal_batch; ++i) { - IndexType index = indices[i]; - if (ARROW_PREDICT_FALSE(!IndexInRange(index, dictionary_length))) { - return values_read; - } - out[i] = dictionary[index]; + if (ARROW_PREDICT_FALSE(!converter.IsValid(indices, /*length=*/literal_batch))) { + return values_read; } + converter.Copy(out, indices, literal_batch); /* Upkeep counters */ literal_count_ -= literal_batch; @@ -479,98 +604,39 @@ inline int RleDecoder::GetBatchWithDictSpaced(const T* dictionary, int batch_size, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset) { + if (null_count == 0) { + return GetBatchWithDict(dictionary, dictionary_length, out, batch_size); + } + arrow::internal::BitBlockCounter block_counter(valid_bits, valid_bits_offset, + batch_size); using IndexType = int32_t; - - DCHECK_GE(bit_width_, 0); - int values_read = 0; - int remaining_nulls = null_count; - T zero = {}; - - arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, batch_size); - - while (values_read < batch_size) { - DCHECK_LT(bit_reader.position(), batch_size); - bool is_valid = bit_reader.IsSet(); - bit_reader.Next(); - - if (is_valid) { - if ((repeat_count_ == 0) && (literal_count_ == 0)) { - if (!NextCounts()) return values_read; - } - if (repeat_count_ > 0) { - auto idx = static_cast(current_value_); - if (ARROW_PREDICT_FALSE(!IndexInRange(idx, dictionary_length))) { - return values_read; - } - T value = dictionary[idx]; - // The current index is already valid, we don't need to check that again - int repeat_batch = 1; - repeat_count_--; - - while (repeat_count_ > 0 && (values_read + repeat_batch) < batch_size) { - DCHECK_LT(bit_reader.position(), batch_size); - if (bit_reader.IsSet()) { - repeat_count_--; - } else { - remaining_nulls--; - } - repeat_batch++; - - bit_reader.Next(); - } - std::fill(out, out + repeat_batch, value); - out += repeat_batch; - values_read += repeat_batch; - } else if (literal_count_ > 0) { - int literal_batch = - std::min(batch_size - values_read - remaining_nulls, literal_count_); - - // Decode the literals - constexpr int kBufferSize = 1024; - IndexType indices[kBufferSize]; - literal_batch = std::min(literal_batch, kBufferSize); - int actual_read = bit_reader_.GetBatch(bit_width_, &indices[0], literal_batch); - if (actual_read != literal_batch) return values_read; - - int skipped = 0; - int literals_read = 1; - - IndexType first_idx = indices[0]; - if (ARROW_PREDICT_FALSE(!IndexInRange(first_idx, dictionary_length))) { - return values_read; - } - *out++ = dictionary[first_idx]; - - // Read the first bitset to the end - while (literals_read < literal_batch) { - DCHECK_LT(bit_reader.position(), batch_size); - if (bit_reader.IsSet()) { - IndexType idx = indices[literals_read]; - if (ARROW_PREDICT_FALSE(!IndexInRange(idx, dictionary_length))) { - return values_read; - } - *out = dictionary[idx]; - literals_read++; - } else { - *out = zero; - skipped++; - } - ++out; - bit_reader.Next(); - } - literal_count_ -= literal_batch; - values_read += literal_batch + skipped; - remaining_nulls -= skipped; - } + DictionaryConverter converter; + converter.dictionary = dictionary; + converter.dictionary_length = dictionary_length; + + int total_processed = 0; + int processed = 0; + arrow::internal::BitBlockCount block; + do { + block = block_counter.NextFourWords(); + if (block.length == 0) { + break; + } + if (block.AllSet()) { + processed = GetBatchWithDict(dictionary, dictionary_length, out, block.length); + } else if (block.NoneSet()) { + converter.FillZero(out, out + block.length); + processed = block.length; } else { - *out = zero; - ++out; - values_read++; - remaining_nulls--; + processed = GetSpaced>( + converter, block.length, block.length - block.popcount, valid_bits, + valid_bits_offset, out); } - } - - return values_read; + total_processed += processed; + out += block.length; + valid_bits_offset += block.length; + } while (processed == block.length); + return total_processed; } template @@ -593,7 +659,7 @@ bool RleDecoder::NextCounts() { return false; } repeat_count_ = count; - T value = 0; + T value = {}; if (!bit_reader_.GetAligned(static_cast(BitUtil::CeilDiv(bit_width_, 8)), &value)) { return false; diff --git a/cpp/src/parquet/arrow/path_internal.cc b/cpp/src/parquet/arrow/path_internal.cc index 8ada325e0e5..8d2790aa0d4 100644 --- a/cpp/src/parquet/arrow/path_internal.cc +++ b/cpp/src/parquet/arrow/path_internal.cc @@ -98,6 +98,7 @@ #include "arrow/memory_pool.h" #include "arrow/type.h" #include "arrow/type_traits.h" +#include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_visit.h" #include "arrow/util/logging.h" @@ -464,8 +465,8 @@ class NullableNode { void SetRepLevelIfNull(int16_t rep_level) { rep_level_if_null_ = rep_level; } - ::arrow::internal::BitmapReader MakeReader(const ElementRange& range) { - return ::arrow::internal::BitmapReader(null_bitmap_, entry_offset_ + range.start, + ::arrow::internal::BitRunReader MakeReader(const ElementRange& range) { + return ::arrow::internal::BitRunReader(null_bitmap_, entry_offset_ + range.start, range.Size()); } @@ -478,25 +479,20 @@ class NullableNode { valid_bits_reader_ = MakeReader(*range); } child_range->start = range->start; - while (!range->Empty() && !valid_bits_reader_.IsSet()) { - ++range->start; - valid_bits_reader_.Next(); - } - int64_t null_count = range->start - child_range->start; - if (null_count > 0) { - RETURN_IF_ERROR(FillRepLevels(null_count, rep_level_if_null_, context)); - RETURN_IF_ERROR(context->AppendDefLevels(null_count, def_level_if_null_)); + ::arrow::internal::BitRun run = valid_bits_reader_.NextRun(); + if (!run.set) { + range->start += run.length; + RETURN_IF_ERROR(FillRepLevels(run.length, rep_level_if_null_, context)); + RETURN_IF_ERROR(context->AppendDefLevels(run.length, def_level_if_null_)); + run = valid_bits_reader_.NextRun(); } if (range->Empty()) { new_range_ = true; return kDone; } child_range->end = child_range->start = range->start; + child_range->end += run.length; - while (child_range->end != range->end && valid_bits_reader_.IsSet()) { - ++child_range->end; - valid_bits_reader_.Next(); - } DCHECK(!child_range->Empty()); range->start += child_range->Size(); new_range_ = false; @@ -505,7 +501,7 @@ class NullableNode { const uint8_t* null_bitmap_; int64_t entry_offset_; - ::arrow::internal::BitmapReader valid_bits_reader_; + ::arrow::internal::BitRunReader valid_bits_reader_; int16_t def_level_if_null_; int16_t rep_level_if_null_;